diff --git a/android-demo/.gitignore b/android-demo/.gitignore new file mode 100644 index 0000000..8299e9a --- /dev/null +++ b/android-demo/.gitignore @@ -0,0 +1,11 @@ +*.iml +.gradle +/local.properties +/.idea +.DS_Store +app/.cxx +app/release +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android-demo/app/.gitignore b/android-demo/app/.gitignore new file mode 100644 index 0000000..c591fde --- /dev/null +++ b/android-demo/app/.gitignore @@ -0,0 +1,2 @@ +/build +.cxx \ No newline at end of file diff --git a/android-demo/app/build.gradle b/android-demo/app/build.gradle new file mode 100644 index 0000000..68e833e --- /dev/null +++ b/android-demo/app/build.gradle @@ -0,0 +1,47 @@ +plugins { + alias(libs.plugins.android.application) +} + +android { + namespace 'com.gyq.yolov8' + compileSdk 34 + + defaultConfig { + applicationId "com.gyq.yolov8" + minSdk 24 + targetSdk 34 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_11 + targetCompatibility JavaVersion.VERSION_11 + } + externalNativeBuild { + cmake { + version "3.31.1" // 确保你的CMake版本是正确的 + path file('src/main/jni/CMakeLists.txt') + } + } + ndkVersion "25.2.9519653" +} + +dependencies { + + implementation libs.appcompat + implementation libs.material + implementation libs.activity + implementation libs.constraintlayout + testImplementation libs.junit + androidTestImplementation libs.ext.junit + androidTestImplementation libs.espresso.core +} \ No newline at end of file diff --git a/android-demo/app/proguard-rules.pro b/android-demo/app/proguard-rules.pro new file mode 100644 index 0000000..481bb43 --- /dev/null +++ b/android-demo/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android-demo/app/src/main/AndroidManifest.xml b/android-demo/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000..87ef5fb --- /dev/null +++ b/android-demo/app/src/main/AndroidManifest.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android-demo/app/src/main/assets/yolov8n.bin b/android-demo/app/src/main/assets/yolov8n.bin new file mode 100644 index 0000000..0295cc6 Binary files /dev/null and b/android-demo/app/src/main/assets/yolov8n.bin differ diff --git a/android-demo/app/src/main/assets/yolov8n.param b/android-demo/app/src/main/assets/yolov8n.param new file mode 100644 index 0000000..d5c8a60 --- /dev/null +++ b/android-demo/app/src/main/assets/yolov8n.param @@ -0,0 +1,199 @@ +7767517 +197 233 +Input images 0 1 images +Convolution Conv_0 1 1 images 128 0=16 1=3 3=2 4=1 5=1 6=432 +Swish Mul_2 1 1 128 130 +Convolution Conv_3 1 1 130 131 0=32 1=3 3=2 4=1 5=1 6=4608 +Swish Mul_5 1 1 131 133 +Convolution Conv_6 1 1 133 134 0=32 1=1 5=1 6=1024 +Swish Mul_8 1 1 134 136 +Split splitncnn_0 1 2 136 136_splitncnn_0 136_splitncnn_1 +Crop Slice_13 1 1 136_splitncnn_1 141 -23309=1,0 -23310=1,16 -23311=1,0 +Crop Slice_18 1 1 136_splitncnn_0 146 -23309=1,16 -23310=1,32 -23311=1,0 +Split splitncnn_1 1 3 146 146_splitncnn_0 146_splitncnn_1 146_splitncnn_2 +Convolution Conv_19 1 1 146_splitncnn_2 147 0=16 1=3 4=1 5=1 6=2304 +Swish Mul_21 1 1 147 149 +Convolution Conv_22 1 1 149 150 0=16 1=3 4=1 5=1 6=2304 +Swish Mul_24 1 1 150 152 +BinaryOp Add_25 2 1 146_splitncnn_1 152 153 +Concat Concat_26 3 1 141 146_splitncnn_0 153 154 +Convolution Conv_27 1 1 154 155 0=32 1=1 5=1 6=1536 +Swish Mul_29 1 1 155 157 +Convolution Conv_30 1 1 157 158 0=64 1=3 3=2 4=1 5=1 6=18432 +Swish Mul_32 1 1 158 160 +Convolution Conv_33 1 1 160 161 0=64 1=1 5=1 6=4096 +Swish Mul_35 1 1 161 163 +Split splitncnn_2 1 2 163 163_splitncnn_0 163_splitncnn_1 +Crop Slice_40 1 1 163_splitncnn_1 168 -23309=1,0 -23310=1,32 -23311=1,0 +Crop Slice_45 1 1 163_splitncnn_0 173 -23309=1,32 -23310=1,64 -23311=1,0 +Split splitncnn_3 1 3 173 173_splitncnn_0 173_splitncnn_1 173_splitncnn_2 +Convolution Conv_46 1 1 173_splitncnn_2 174 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_48 1 1 174 176 +Convolution Conv_49 1 1 176 177 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_51 1 1 177 179 +BinaryOp Add_52 2 1 173_splitncnn_1 179 180 +Split splitncnn_4 1 3 180 180_splitncnn_0 180_splitncnn_1 180_splitncnn_2 +Convolution Conv_53 1 1 180_splitncnn_2 181 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_55 1 1 181 183 +Convolution Conv_56 1 1 183 184 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_58 1 1 184 186 +BinaryOp Add_59 2 1 180_splitncnn_1 186 187 +Concat Concat_60 4 1 168 173_splitncnn_0 180_splitncnn_0 187 188 +Convolution Conv_61 1 1 188 189 0=64 1=1 5=1 6=8192 +Swish Mul_63 1 1 189 191 +Split splitncnn_5 1 2 191 191_splitncnn_0 191_splitncnn_1 +Convolution Conv_64 1 1 191_splitncnn_1 192 0=128 1=3 3=2 4=1 5=1 6=73728 +Swish Mul_66 1 1 192 194 +Convolution Conv_67 1 1 194 195 0=128 1=1 5=1 6=16384 +Swish Mul_69 1 1 195 197 +Split splitncnn_6 1 2 197 197_splitncnn_0 197_splitncnn_1 +Crop Slice_74 1 1 197_splitncnn_1 202 -23309=1,0 -23310=1,64 -23311=1,0 +Crop Slice_79 1 1 197_splitncnn_0 207 -23309=1,64 -23310=1,128 -23311=1,0 +Split splitncnn_7 1 3 207 207_splitncnn_0 207_splitncnn_1 207_splitncnn_2 +Convolution Conv_80 1 1 207_splitncnn_2 208 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_82 1 1 208 210 +Convolution Conv_83 1 1 210 211 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_85 1 1 211 213 +BinaryOp Add_86 2 1 207_splitncnn_1 213 214 +Split splitncnn_8 1 3 214 214_splitncnn_0 214_splitncnn_1 214_splitncnn_2 +Convolution Conv_87 1 1 214_splitncnn_2 215 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_89 1 1 215 217 +Convolution Conv_90 1 1 217 218 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_92 1 1 218 220 +BinaryOp Add_93 2 1 214_splitncnn_1 220 221 +Concat Concat_94 4 1 202 207_splitncnn_0 214_splitncnn_0 221 222 +Convolution Conv_95 1 1 222 223 0=128 1=1 5=1 6=32768 +Swish Mul_97 1 1 223 225 +Split splitncnn_9 1 2 225 225_splitncnn_0 225_splitncnn_1 +Convolution Conv_98 1 1 225_splitncnn_1 226 0=256 1=3 3=2 4=1 5=1 6=294912 +Swish Mul_100 1 1 226 228 +Convolution Conv_101 1 1 228 229 0=256 1=1 5=1 6=65536 +Swish Mul_103 1 1 229 231 +Split splitncnn_10 1 2 231 231_splitncnn_0 231_splitncnn_1 +Crop Slice_108 1 1 231_splitncnn_1 236 -23309=1,0 -23310=1,128 -23311=1,0 +Crop Slice_113 1 1 231_splitncnn_0 241 -23309=1,128 -23310=1,256 -23311=1,0 +Split splitncnn_11 1 3 241 241_splitncnn_0 241_splitncnn_1 241_splitncnn_2 +Convolution Conv_114 1 1 241_splitncnn_2 242 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_116 1 1 242 244 +Convolution Conv_117 1 1 244 245 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_119 1 1 245 247 +BinaryOp Add_120 2 1 241_splitncnn_1 247 248 +Concat Concat_121 3 1 236 241_splitncnn_0 248 249 +Convolution Conv_122 1 1 249 250 0=256 1=1 5=1 6=98304 +Swish Mul_124 1 1 250 252 +Convolution Conv_125 1 1 252 253 0=128 1=1 5=1 6=32768 +Swish Mul_127 1 1 253 255 +Split splitncnn_12 1 2 255 255_splitncnn_0 255_splitncnn_1 +Pooling MaxPool_128 1 1 255_splitncnn_1 256 1=5 3=2 5=1 +Split splitncnn_13 1 2 256 256_splitncnn_0 256_splitncnn_1 +Pooling MaxPool_129 1 1 256_splitncnn_1 257 1=5 3=2 5=1 +Split splitncnn_14 1 2 257 257_splitncnn_0 257_splitncnn_1 +Pooling MaxPool_130 1 1 257_splitncnn_1 258 1=5 3=2 5=1 +Concat Concat_131 4 1 255_splitncnn_0 256_splitncnn_0 257_splitncnn_0 258 259 +Convolution Conv_132 1 1 259 260 0=256 1=1 5=1 6=131072 +Swish Mul_134 1 1 260 262 +Split splitncnn_15 1 2 262 262_splitncnn_0 262_splitncnn_1 +Interp Resize_136 1 1 262_splitncnn_1 267 0=1 1=2.000000e+00 2=2.000000e+00 +Concat Concat_137 2 1 267 225_splitncnn_0 268 +Convolution Conv_138 1 1 268 269 0=128 1=1 5=1 6=49152 +Swish Mul_140 1 1 269 271 +Split splitncnn_16 1 2 271 271_splitncnn_0 271_splitncnn_1 +Crop Slice_145 1 1 271_splitncnn_1 276 -23309=1,0 -23310=1,64 -23311=1,0 +Crop Slice_150 1 1 271_splitncnn_0 281 -23309=1,64 -23310=1,128 -23311=1,0 +Split splitncnn_17 1 2 281 281_splitncnn_0 281_splitncnn_1 +Convolution Conv_151 1 1 281_splitncnn_1 282 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_153 1 1 282 284 +Convolution Conv_154 1 1 284 285 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_156 1 1 285 287 +Concat Concat_157 3 1 276 281_splitncnn_0 287 288 +Convolution Conv_158 1 1 288 289 0=128 1=1 5=1 6=24576 +Swish Mul_160 1 1 289 291 +Split splitncnn_18 1 2 291 291_splitncnn_0 291_splitncnn_1 +Interp Resize_162 1 1 291_splitncnn_1 296 0=1 1=2.000000e+00 2=2.000000e+00 +Concat Concat_163 2 1 296 191_splitncnn_0 297 +Convolution Conv_164 1 1 297 298 0=64 1=1 5=1 6=12288 +Swish Mul_166 1 1 298 300 +Split splitncnn_19 1 2 300 300_splitncnn_0 300_splitncnn_1 +Crop Slice_171 1 1 300_splitncnn_1 305 -23309=1,0 -23310=1,32 -23311=1,0 +Crop Slice_176 1 1 300_splitncnn_0 310 -23309=1,32 -23310=1,64 -23311=1,0 +Split splitncnn_20 1 2 310 310_splitncnn_0 310_splitncnn_1 +Convolution Conv_177 1 1 310_splitncnn_1 311 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_179 1 1 311 313 +Convolution Conv_180 1 1 313 314 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_182 1 1 314 316 +Concat Concat_183 3 1 305 310_splitncnn_0 316 317 +Convolution Conv_184 1 1 317 318 0=64 1=1 5=1 6=6144 +Swish Mul_186 1 1 318 320 +Split splitncnn_21 1 3 320 320_splitncnn_0 320_splitncnn_1 320_splitncnn_2 +Convolution Conv_187 1 1 320_splitncnn_2 321 0=64 1=3 3=2 4=1 5=1 6=36864 +Swish Mul_189 1 1 321 323 +Concat Concat_190 2 1 323 291_splitncnn_0 324 +Convolution Conv_191 1 1 324 325 0=128 1=1 5=1 6=24576 +Swish Mul_193 1 1 325 327 +Split splitncnn_22 1 2 327 327_splitncnn_0 327_splitncnn_1 +Crop Slice_198 1 1 327_splitncnn_1 332 -23309=1,0 -23310=1,64 -23311=1,0 +Crop Slice_203 1 1 327_splitncnn_0 337 -23309=1,64 -23310=1,128 -23311=1,0 +Split splitncnn_23 1 2 337 337_splitncnn_0 337_splitncnn_1 +Convolution Conv_204 1 1 337_splitncnn_1 338 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_206 1 1 338 340 +Convolution Conv_207 1 1 340 341 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_209 1 1 341 343 +Concat Concat_210 3 1 332 337_splitncnn_0 343 344 +Convolution Conv_211 1 1 344 345 0=128 1=1 5=1 6=24576 +Swish Mul_213 1 1 345 347 +Split splitncnn_24 1 3 347 347_splitncnn_0 347_splitncnn_1 347_splitncnn_2 +Convolution Conv_214 1 1 347_splitncnn_2 348 0=128 1=3 3=2 4=1 5=1 6=147456 +Swish Mul_216 1 1 348 350 +Concat Concat_217 2 1 350 262_splitncnn_0 351 +Convolution Conv_218 1 1 351 352 0=256 1=1 5=1 6=98304 +Swish Mul_220 1 1 352 354 +Split splitncnn_25 1 2 354 354_splitncnn_0 354_splitncnn_1 +Crop Slice_225 1 1 354_splitncnn_1 359 -23309=1,0 -23310=1,128 -23311=1,0 +Crop Slice_230 1 1 354_splitncnn_0 364 -23309=1,128 -23310=1,256 -23311=1,0 +Split splitncnn_26 1 2 364 364_splitncnn_0 364_splitncnn_1 +Convolution Conv_231 1 1 364_splitncnn_1 365 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_233 1 1 365 367 +Convolution Conv_234 1 1 367 368 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_236 1 1 368 370 +Concat Concat_237 3 1 359 364_splitncnn_0 370 371 +Convolution Conv_238 1 1 371 372 0=256 1=1 5=1 6=98304 +Swish Mul_240 1 1 372 374 +Split splitncnn_27 1 2 374 374_splitncnn_0 374_splitncnn_1 +Convolution Conv_241 1 1 320_splitncnn_1 376 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_243 1 1 376 378 +Convolution Conv_244 1 1 378 379 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_246 1 1 379 381 +Convolution Conv_247 1 1 381 382 0=64 1=1 5=1 6=4096 +Convolution Conv_248 1 1 320_splitncnn_0 383 0=80 1=3 4=1 5=1 6=46080 +Swish Mul_250 1 1 383 385 +Convolution Conv_251 1 1 385 386 0=80 1=3 4=1 5=1 6=57600 +Swish Mul_253 1 1 386 388 +Convolution Conv_254 1 1 388 389 0=80 1=1 5=1 6=6400 +Concat Concat_255 2 1 382 389 390 +Convolution Conv_256 1 1 347_splitncnn_1 391 0=64 1=3 4=1 5=1 6=73728 +Swish Mul_258 1 1 391 393 +Convolution Conv_259 1 1 393 394 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_261 1 1 394 396 +Convolution Conv_262 1 1 396 397 0=64 1=1 5=1 6=4096 +Convolution Conv_263 1 1 347_splitncnn_0 398 0=80 1=3 4=1 5=1 6=92160 +Swish Mul_265 1 1 398 400 +Convolution Conv_266 1 1 400 401 0=80 1=3 4=1 5=1 6=57600 +Swish Mul_268 1 1 401 403 +Convolution Conv_269 1 1 403 404 0=80 1=1 5=1 6=6400 +Concat Concat_270 2 1 397 404 405 +Convolution Conv_271 1 1 374_splitncnn_1 406 0=64 1=3 4=1 5=1 6=147456 +Swish Mul_273 1 1 406 408 +Convolution Conv_274 1 1 408 409 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_276 1 1 409 411 +Convolution Conv_277 1 1 411 412 0=64 1=1 5=1 6=4096 +Convolution Conv_278 1 1 374_splitncnn_0 413 0=80 1=3 4=1 5=1 6=184320 +Swish Mul_280 1 1 413 415 +Convolution Conv_281 1 1 415 416 0=80 1=3 4=1 5=1 6=57600 +Swish Mul_283 1 1 416 418 +Convolution Conv_284 1 1 418 419 0=80 1=1 5=1 6=6400 +Concat Concat_285 2 1 412 419 420 +Reshape Reshape_286 1 1 390 427 0=-1 1=144 +Reshape Reshape_287 1 1 405 434 0=-1 1=144 +Reshape Reshape_288 1 1 420 441 0=-1 1=144 +Concat Concat_289 3 1 427 434 441 442 0=1 +Permute Transpose_526 1 1 442 output 0=1 diff --git a/android-demo/app/src/main/assets/yolov8s.bin b/android-demo/app/src/main/assets/yolov8s.bin new file mode 100644 index 0000000..f4382f2 Binary files /dev/null and b/android-demo/app/src/main/assets/yolov8s.bin differ diff --git a/android-demo/app/src/main/assets/yolov8s.param b/android-demo/app/src/main/assets/yolov8s.param new file mode 100644 index 0000000..8474461 --- /dev/null +++ b/android-demo/app/src/main/assets/yolov8s.param @@ -0,0 +1,199 @@ +7767517 +197 233 +Input images 0 1 images +Convolution Conv_0 1 1 images 128 0=32 1=3 3=2 4=1 5=1 6=864 +Swish Mul_2 1 1 128 130 +Convolution Conv_3 1 1 130 131 0=64 1=3 3=2 4=1 5=1 6=18432 +Swish Mul_5 1 1 131 133 +Convolution Conv_6 1 1 133 134 0=64 1=1 5=1 6=4096 +Swish Mul_8 1 1 134 136 +Split splitncnn_0 1 2 136 136_splitncnn_0 136_splitncnn_1 +Crop Slice_13 1 1 136_splitncnn_1 141 -23309=1,0 -23310=1,32 -23311=1,0 +Crop Slice_18 1 1 136_splitncnn_0 146 -23309=1,32 -23310=1,64 -23311=1,0 +Split splitncnn_1 1 3 146 146_splitncnn_0 146_splitncnn_1 146_splitncnn_2 +Convolution Conv_19 1 1 146_splitncnn_2 147 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_21 1 1 147 149 +Convolution Conv_22 1 1 149 150 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_24 1 1 150 152 +BinaryOp Add_25 2 1 146_splitncnn_1 152 153 +Concat Concat_26 3 1 141 146_splitncnn_0 153 154 +Convolution Conv_27 1 1 154 155 0=64 1=1 5=1 6=6144 +Swish Mul_29 1 1 155 157 +Convolution Conv_30 1 1 157 158 0=128 1=3 3=2 4=1 5=1 6=73728 +Swish Mul_32 1 1 158 160 +Convolution Conv_33 1 1 160 161 0=128 1=1 5=1 6=16384 +Swish Mul_35 1 1 161 163 +Split splitncnn_2 1 2 163 163_splitncnn_0 163_splitncnn_1 +Crop Slice_40 1 1 163_splitncnn_1 168 -23309=1,0 -23310=1,64 -23311=1,0 +Crop Slice_45 1 1 163_splitncnn_0 173 -23309=1,64 -23310=1,128 -23311=1,0 +Split splitncnn_3 1 3 173 173_splitncnn_0 173_splitncnn_1 173_splitncnn_2 +Convolution Conv_46 1 1 173_splitncnn_2 174 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_48 1 1 174 176 +Convolution Conv_49 1 1 176 177 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_51 1 1 177 179 +BinaryOp Add_52 2 1 173_splitncnn_1 179 180 +Split splitncnn_4 1 3 180 180_splitncnn_0 180_splitncnn_1 180_splitncnn_2 +Convolution Conv_53 1 1 180_splitncnn_2 181 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_55 1 1 181 183 +Convolution Conv_56 1 1 183 184 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_58 1 1 184 186 +BinaryOp Add_59 2 1 180_splitncnn_1 186 187 +Concat Concat_60 4 1 168 173_splitncnn_0 180_splitncnn_0 187 188 +Convolution Conv_61 1 1 188 189 0=128 1=1 5=1 6=32768 +Swish Mul_63 1 1 189 191 +Split splitncnn_5 1 2 191 191_splitncnn_0 191_splitncnn_1 +Convolution Conv_64 1 1 191_splitncnn_1 192 0=256 1=3 3=2 4=1 5=1 6=294912 +Swish Mul_66 1 1 192 194 +Convolution Conv_67 1 1 194 195 0=256 1=1 5=1 6=65536 +Swish Mul_69 1 1 195 197 +Split splitncnn_6 1 2 197 197_splitncnn_0 197_splitncnn_1 +Crop Slice_74 1 1 197_splitncnn_1 202 -23309=1,0 -23310=1,128 -23311=1,0 +Crop Slice_79 1 1 197_splitncnn_0 207 -23309=1,128 -23310=1,256 -23311=1,0 +Split splitncnn_7 1 3 207 207_splitncnn_0 207_splitncnn_1 207_splitncnn_2 +Convolution Conv_80 1 1 207_splitncnn_2 208 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_82 1 1 208 210 +Convolution Conv_83 1 1 210 211 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_85 1 1 211 213 +BinaryOp Add_86 2 1 207_splitncnn_1 213 214 +Split splitncnn_8 1 3 214 214_splitncnn_0 214_splitncnn_1 214_splitncnn_2 +Convolution Conv_87 1 1 214_splitncnn_2 215 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_89 1 1 215 217 +Convolution Conv_90 1 1 217 218 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_92 1 1 218 220 +BinaryOp Add_93 2 1 214_splitncnn_1 220 221 +Concat Concat_94 4 1 202 207_splitncnn_0 214_splitncnn_0 221 222 +Convolution Conv_95 1 1 222 223 0=256 1=1 5=1 6=131072 +Swish Mul_97 1 1 223 225 +Split splitncnn_9 1 2 225 225_splitncnn_0 225_splitncnn_1 +Convolution Conv_98 1 1 225_splitncnn_1 226 0=512 1=3 3=2 4=1 5=1 6=1179648 +Swish Mul_100 1 1 226 228 +Convolution Conv_101 1 1 228 229 0=512 1=1 5=1 6=262144 +Swish Mul_103 1 1 229 231 +Split splitncnn_10 1 2 231 231_splitncnn_0 231_splitncnn_1 +Crop Slice_108 1 1 231_splitncnn_1 236 -23309=1,0 -23310=1,256 -23311=1,0 +Crop Slice_113 1 1 231_splitncnn_0 241 -23309=1,256 -23310=1,512 -23311=1,0 +Split splitncnn_11 1 3 241 241_splitncnn_0 241_splitncnn_1 241_splitncnn_2 +Convolution Conv_114 1 1 241_splitncnn_2 242 0=256 1=3 4=1 5=1 6=589824 +Swish Mul_116 1 1 242 244 +Convolution Conv_117 1 1 244 245 0=256 1=3 4=1 5=1 6=589824 +Swish Mul_119 1 1 245 247 +BinaryOp Add_120 2 1 241_splitncnn_1 247 248 +Concat Concat_121 3 1 236 241_splitncnn_0 248 249 +Convolution Conv_122 1 1 249 250 0=512 1=1 5=1 6=393216 +Swish Mul_124 1 1 250 252 +Convolution Conv_125 1 1 252 253 0=256 1=1 5=1 6=131072 +Swish Mul_127 1 1 253 255 +Split splitncnn_12 1 2 255 255_splitncnn_0 255_splitncnn_1 +Pooling MaxPool_128 1 1 255_splitncnn_1 256 1=5 3=2 5=1 +Split splitncnn_13 1 2 256 256_splitncnn_0 256_splitncnn_1 +Pooling MaxPool_129 1 1 256_splitncnn_1 257 1=5 3=2 5=1 +Split splitncnn_14 1 2 257 257_splitncnn_0 257_splitncnn_1 +Pooling MaxPool_130 1 1 257_splitncnn_1 258 1=5 3=2 5=1 +Concat Concat_131 4 1 255_splitncnn_0 256_splitncnn_0 257_splitncnn_0 258 259 +Convolution Conv_132 1 1 259 260 0=512 1=1 5=1 6=524288 +Swish Mul_134 1 1 260 262 +Split splitncnn_15 1 2 262 262_splitncnn_0 262_splitncnn_1 +Interp Resize_136 1 1 262_splitncnn_1 267 0=1 1=2.000000e+00 2=2.000000e+00 +Concat Concat_137 2 1 267 225_splitncnn_0 268 +Convolution Conv_138 1 1 268 269 0=256 1=1 5=1 6=196608 +Swish Mul_140 1 1 269 271 +Split splitncnn_16 1 2 271 271_splitncnn_0 271_splitncnn_1 +Crop Slice_145 1 1 271_splitncnn_1 276 -23309=1,0 -23310=1,128 -23311=1,0 +Crop Slice_150 1 1 271_splitncnn_0 281 -23309=1,128 -23310=1,256 -23311=1,0 +Split splitncnn_17 1 2 281 281_splitncnn_0 281_splitncnn_1 +Convolution Conv_151 1 1 281_splitncnn_1 282 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_153 1 1 282 284 +Convolution Conv_154 1 1 284 285 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_156 1 1 285 287 +Concat Concat_157 3 1 276 281_splitncnn_0 287 288 +Convolution Conv_158 1 1 288 289 0=256 1=1 5=1 6=98304 +Swish Mul_160 1 1 289 291 +Split splitncnn_18 1 2 291 291_splitncnn_0 291_splitncnn_1 +Interp Resize_162 1 1 291_splitncnn_1 296 0=1 1=2.000000e+00 2=2.000000e+00 +Concat Concat_163 2 1 296 191_splitncnn_0 297 +Convolution Conv_164 1 1 297 298 0=128 1=1 5=1 6=49152 +Swish Mul_166 1 1 298 300 +Split splitncnn_19 1 2 300 300_splitncnn_0 300_splitncnn_1 +Crop Slice_171 1 1 300_splitncnn_1 305 -23309=1,0 -23310=1,64 -23311=1,0 +Crop Slice_176 1 1 300_splitncnn_0 310 -23309=1,64 -23310=1,128 -23311=1,0 +Split splitncnn_20 1 2 310 310_splitncnn_0 310_splitncnn_1 +Convolution Conv_177 1 1 310_splitncnn_1 311 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_179 1 1 311 313 +Convolution Conv_180 1 1 313 314 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_182 1 1 314 316 +Concat Concat_183 3 1 305 310_splitncnn_0 316 317 +Convolution Conv_184 1 1 317 318 0=128 1=1 5=1 6=24576 +Swish Mul_186 1 1 318 320 +Split splitncnn_21 1 3 320 320_splitncnn_0 320_splitncnn_1 320_splitncnn_2 +Convolution Conv_187 1 1 320_splitncnn_2 321 0=128 1=3 3=2 4=1 5=1 6=147456 +Swish Mul_189 1 1 321 323 +Concat Concat_190 2 1 323 291_splitncnn_0 324 +Convolution Conv_191 1 1 324 325 0=256 1=1 5=1 6=98304 +Swish Mul_193 1 1 325 327 +Split splitncnn_22 1 2 327 327_splitncnn_0 327_splitncnn_1 +Crop Slice_198 1 1 327_splitncnn_1 332 -23309=1,0 -23310=1,128 -23311=1,0 +Crop Slice_203 1 1 327_splitncnn_0 337 -23309=1,128 -23310=1,256 -23311=1,0 +Split splitncnn_23 1 2 337 337_splitncnn_0 337_splitncnn_1 +Convolution Conv_204 1 1 337_splitncnn_1 338 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_206 1 1 338 340 +Convolution Conv_207 1 1 340 341 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_209 1 1 341 343 +Concat Concat_210 3 1 332 337_splitncnn_0 343 344 +Convolution Conv_211 1 1 344 345 0=256 1=1 5=1 6=98304 +Swish Mul_213 1 1 345 347 +Split splitncnn_24 1 3 347 347_splitncnn_0 347_splitncnn_1 347_splitncnn_2 +Convolution Conv_214 1 1 347_splitncnn_2 348 0=256 1=3 3=2 4=1 5=1 6=589824 +Swish Mul_216 1 1 348 350 +Concat Concat_217 2 1 350 262_splitncnn_0 351 +Convolution Conv_218 1 1 351 352 0=512 1=1 5=1 6=393216 +Swish Mul_220 1 1 352 354 +Split splitncnn_25 1 2 354 354_splitncnn_0 354_splitncnn_1 +Crop Slice_225 1 1 354_splitncnn_1 359 -23309=1,0 -23310=1,256 -23311=1,0 +Crop Slice_230 1 1 354_splitncnn_0 364 -23309=1,256 -23310=1,512 -23311=1,0 +Split splitncnn_26 1 2 364 364_splitncnn_0 364_splitncnn_1 +Convolution Conv_231 1 1 364_splitncnn_1 365 0=256 1=3 4=1 5=1 6=589824 +Swish Mul_233 1 1 365 367 +Convolution Conv_234 1 1 367 368 0=256 1=3 4=1 5=1 6=589824 +Swish Mul_236 1 1 368 370 +Concat Concat_237 3 1 359 364_splitncnn_0 370 371 +Convolution Conv_238 1 1 371 372 0=512 1=1 5=1 6=393216 +Swish Mul_240 1 1 372 374 +Split splitncnn_27 1 2 374 374_splitncnn_0 374_splitncnn_1 +Convolution Conv_241 1 1 320_splitncnn_1 376 0=64 1=3 4=1 5=1 6=73728 +Swish Mul_243 1 1 376 378 +Convolution Conv_244 1 1 378 379 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_246 1 1 379 381 +Convolution Conv_247 1 1 381 382 0=64 1=1 5=1 6=4096 +Convolution Conv_248 1 1 320_splitncnn_0 383 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_250 1 1 383 385 +Convolution Conv_251 1 1 385 386 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_253 1 1 386 388 +Convolution Conv_254 1 1 388 389 0=80 1=1 5=1 6=10240 +Concat Concat_255 2 1 382 389 390 +Convolution Conv_256 1 1 347_splitncnn_1 391 0=64 1=3 4=1 5=1 6=147456 +Swish Mul_258 1 1 391 393 +Convolution Conv_259 1 1 393 394 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_261 1 1 394 396 +Convolution Conv_262 1 1 396 397 0=64 1=1 5=1 6=4096 +Convolution Conv_263 1 1 347_splitncnn_0 398 0=128 1=3 4=1 5=1 6=294912 +Swish Mul_265 1 1 398 400 +Convolution Conv_266 1 1 400 401 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_268 1 1 401 403 +Convolution Conv_269 1 1 403 404 0=80 1=1 5=1 6=10240 +Concat Concat_270 2 1 397 404 405 +Convolution Conv_271 1 1 374_splitncnn_1 406 0=64 1=3 4=1 5=1 6=294912 +Swish Mul_273 1 1 406 408 +Convolution Conv_274 1 1 408 409 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_276 1 1 409 411 +Convolution Conv_277 1 1 411 412 0=64 1=1 5=1 6=4096 +Convolution Conv_278 1 1 374_splitncnn_0 413 0=128 1=3 4=1 5=1 6=589824 +Swish Mul_280 1 1 413 415 +Convolution Conv_281 1 1 415 416 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_283 1 1 416 418 +Convolution Conv_284 1 1 418 419 0=80 1=1 5=1 6=10240 +Concat Concat_285 2 1 412 419 420 +Reshape Reshape_286 1 1 390 427 0=-1 1=144 +Reshape Reshape_287 1 1 405 434 0=-1 1=144 +Reshape Reshape_288 1 1 420 441 0=-1 1=144 +Concat Concat_289 3 1 427 434 441 442 0=1 +Permute Transpose_526 1 1 442 output 0=1 diff --git a/android-demo/app/src/main/java/com/gyq/yolov8/MainActivity.java b/android-demo/app/src/main/java/com/gyq/yolov8/MainActivity.java new file mode 100644 index 0000000..84a6da1 --- /dev/null +++ b/android-demo/app/src/main/java/com/gyq/yolov8/MainActivity.java @@ -0,0 +1,144 @@ +package com.gyq.yolov8; + +import android.Manifest; +import android.app.Activity; +import android.content.pm.PackageManager; +import android.graphics.PixelFormat; +import android.os.Bundle; +import android.util.Log; +import android.view.SurfaceHolder; +import android.view.SurfaceView; +import android.view.View; +import android.view.WindowManager; +import android.widget.AdapterView; +import android.widget.Button; +import android.widget.Spinner; + +import androidx.core.app.ActivityCompat; +import androidx.core.content.ContextCompat; + + +public class MainActivity extends Activity implements SurfaceHolder.Callback { + public static final int REQUEST_CAMERA = 100; // note: 状态请求码(常量) + + + private Yolov8Ncnn yolov8ncnn = new Yolov8Ncnn(); // Yolov8Ncnn 对象,用于加载模型并进行推理处理 + private int facing = 0; // 标志摄像头的前置或者后置 + + private Spinner spinnerModel; // 主界面上的Spinner控件,用于切换模型 + private int current_model = 0; + private Spinner spinnerCPUGPU; // Spinner控件,用于切换CPU/GPU + private int current_cpugpu = 0; + + private SurfaceView cameraView; // SurfaceView 控件,用于显示相机画面 + + /** + * Called when the activity is first created. + */ + @Override + public void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + // 根据Xml初始化界面布局(R是自动生成的资源类,控制所有的Resources) + setContentView(R.layout.main); + // 设置保持屏幕常亮 + getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); + + // 获取布局中的相机画面预览控件 (SurfaceView) + cameraView = (SurfaceView) findViewById(R.id.cameraview); + // 设置相机传输过来的数据格式为 RGBA_8888 (具体的可以根据手机的不同而更改,一般是这个) + cameraView.getHolder().setFormat(PixelFormat.RGBA_8888); + // TODO: 为 SurfaceView 设置回调接口 + cameraView.getHolder().addCallback(this); + + // 获取切换前置后置摄像头的Button控件 + Button buttonSwitchCamera = (Button) findViewById(R.id.buttonSwitchCamera); + // 为点击摄像头的按钮写一个槽函数(事件),实现切换摄像头功能 + buttonSwitchCamera.setOnClickListener(new View.OnClickListener() { + @Override + public void onClick(View arg0) { + // 0->1, 1->0 + int new_facing = 1 - facing; + yolov8ncnn.closeCamera(); + yolov8ncnn.openCamera(new_facing); + facing = new_facing; + } + }); + + // 获取并设置模型选择 Spinner 的监听器 + spinnerModel = (Spinner) findViewById(R.id.spinnerModel); + spinnerModel.setOnItemSelectedListener(new AdapterView.OnItemSelectedListener() { + @Override + public void onItemSelected(AdapterView arg0, View arg1, int position, long id) { + if (position != current_model) { + current_model = position; + // 重新加载(新的)模型 + reload(); + } + } + + @Override + public void onNothingSelected(AdapterView arg0) { + } + }); + + // 获取并设置 CPU/GPU 选择 Spinner 的监听器 + spinnerCPUGPU = (Spinner) findViewById(R.id.spinnerCPUGPU); + spinnerCPUGPU.setOnItemSelectedListener(new AdapterView.OnItemSelectedListener() { + @Override + public void onItemSelected(AdapterView arg0, View arg1, int position, long id) { + if (position != current_cpugpu) { + current_cpugpu = position; + reload(); // 同样的,切换了设备,模型也需要重载 + } + } + + @Override + public void onNothingSelected(AdapterView arg0) { + } + }); + // 第一次启动,用默认参数加载模型 + reload(); + } + + private void reload() { + // 加载模型 + boolean ret_init = yolov8ncnn.loadModel(getAssets(), current_model, current_cpugpu); + if (!ret_init) { + Log.e("MainActivity", "yolov8ncnn loadModel failed"); + } + } + + @Override + public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) { + // 当 SurfaceView 的大小发生变化时调用此方法,设置输出的 Surface 为 Camera 的预览界面 + yolov8ncnn.setOutputWindow(holder.getSurface()); + } + + @Override + public void surfaceCreated(SurfaceHolder holder) { + } + + @Override + public void surfaceDestroyed(SurfaceHolder holder) { + } + + // note: 从后台重新回到该程序时应该做的 + @Override + public void onResume() { + super.onResume(); + + if (ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.CAMERA) == PackageManager.PERMISSION_DENIED) { + ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.CAMERA}, REQUEST_CAMERA); + } + + yolov8ncnn.openCamera(facing); + } + + // note: 程序到后台时,关闭摄像头 + @Override + public void onPause() { + super.onPause(); + + yolov8ncnn.closeCamera(); + } +} diff --git a/android-demo/app/src/main/java/com/gyq/yolov8/Yolov8Ncnn.java b/android-demo/app/src/main/java/com/gyq/yolov8/Yolov8Ncnn.java new file mode 100644 index 0000000..34d8941 --- /dev/null +++ b/android-demo/app/src/main/java/com/gyq/yolov8/Yolov8Ncnn.java @@ -0,0 +1,23 @@ +/* + * note: 这里只是声明接口,用于与 C++ 层通过 JNI (Java Native Interface) 进行交互 + */ +package com.gyq.yolov8; + +import android.content.res.AssetManager; +import android.view.Surface; + +public class Yolov8Ncnn { + // mgr 是 AssetManager 用来访问应用的资源文件, modelid 是模型的 ID,cpugpu 指定使用 CPU 还是 GPU + public native boolean loadModel(AssetManager mgr, int modelid, int cpugpu); + // facing 指定打开摄像头的方向 + public native boolean openCamera(int facing); + // 关闭摄像头 + public native boolean closeCamera(); + // 数 surface 是输出图像的显示界面,是一个 SurfaceView 控件 + public native boolean setOutputWindow(Surface surface); + + // 通过JNI,这些声明成 public native 的方法可以用C++方法实现 + static { + System.loadLibrary("yolov8ncnn"); + } +} diff --git a/android-demo/app/src/main/jni/CMakeLists.txt b/android-demo/app/src/main/jni/CMakeLists.txt new file mode 100644 index 0000000..963560f --- /dev/null +++ b/android-demo/app/src/main/jni/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.10) +project(yolov8ncnn) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_FLAGS "-Werror,-Wformat-security") + +include_directories(ocsort/include) +include_directories(eigen) +set(OpenCV_DIR ${CMAKE_SOURCE_DIR}/opencv-mobile-4.6.0-android/sdk/native/jni) +find_package(OpenCV REQUIRED core imgproc) + +set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20230223-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn) +find_package(ncnn REQUIRED) + + +add_library(yolov8ncnn SHARED yolov8ncnn.cpp yolo.cpp ndkcamera.cpp + ${CMAKE_SOURCE_DIR}/ocsort/src/lapjv.cpp + ${CMAKE_SOURCE_DIR}/ocsort/src/KalmanFilter.cpp + ${CMAKE_SOURCE_DIR}/ocsort/src/KalmanBoxTracker.cpp + ${CMAKE_SOURCE_DIR}/ocsort/src/association.cpp + ${CMAKE_SOURCE_DIR}/ocsort/src/OCsort.cpp + ${CMAKE_SOURCE_DIR}/ocsort/src/Utilities.cpp +) +target_link_libraries(yolov8ncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk) diff --git a/android-demo/app/src/main/jni/Object.h b/android-demo/app/src/main/jni/Object.h new file mode 100644 index 0000000..de4b8eb --- /dev/null +++ b/android-demo/app/src/main/jni/Object.h @@ -0,0 +1,9 @@ +#pragma once +#include +struct Object +{ + // Generally it is the coordinates of the top left corner + width and height + cv::Rect_ rect; + int label; + float prob; +}; \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/BaseTypes.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/BaseTypes.h new file mode 100644 index 0000000..156d98b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/BaseTypes.h @@ -0,0 +1,609 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _BASICTYPES_INCLUDED_ +#define _BASICTYPES_INCLUDED_ + +namespace glslang { + +// +// Basic type. Arrays, vectors, sampler details, etc., are orthogonal to this. +// +enum TBasicType { + EbtVoid, + EbtFloat, + EbtDouble, + EbtFloat16, + EbtInt8, + EbtUint8, + EbtInt16, + EbtUint16, + EbtInt, + EbtUint, + EbtInt64, + EbtUint64, + EbtBool, + EbtAtomicUint, + EbtSampler, + EbtStruct, + EbtBlock, + EbtAccStruct, + EbtReference, + EbtRayQuery, +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type + EbtSpirvType, +#endif + + // HLSL types that live only temporarily. + EbtString, + + EbtNumTypes +}; + +// +// Storage qualifiers. Should align with different kinds of storage or +// resource or GLSL storage qualifier. Expansion is deprecated. +// +// N.B.: You probably DON'T want to add anything here, but rather just add it +// to the built-in variables. See the comment above TBuiltInVariable. +// +// A new built-in variable will normally be an existing qualifier, like 'in', 'out', etc. +// DO NOT follow the design pattern of, say EvqInstanceId, etc. +// +enum TStorageQualifier { + EvqTemporary, // For temporaries (within a function), read/write + EvqGlobal, // For globals read/write + EvqConst, // User-defined constant values, will be semantically constant and constant folded + EvqVaryingIn, // pipeline input, read only, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqVaryingOut, // pipeline output, read/write, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqUniform, // read only, shared with app + EvqBuffer, // read/write, shared with app + EvqShared, // compute shader's read/write 'shared' qualifier +#ifndef GLSLANG_WEB + EvqSpirvStorageClass, // spirv_storage_class +#endif + + EvqPayload, + EvqPayloadIn, + EvqHitAttr, + EvqCallableData, + EvqCallableDataIn, + + EvqtaskPayloadSharedEXT, + + // parameters + EvqIn, // also, for 'in' in the grammar before we know if it's a pipeline input or an 'in' parameter + EvqOut, // also, for 'out' in the grammar before we know if it's a pipeline output or an 'out' parameter + EvqInOut, + EvqConstReadOnly, // input; also other read-only types having neither a constant value nor constant-value semantics + + // built-ins read by vertex shader + EvqVertexId, + EvqInstanceId, + + // built-ins written by vertex shader + EvqPosition, + EvqPointSize, + EvqClipVertex, + + // built-ins read by fragment shader + EvqFace, + EvqFragCoord, + EvqPointCoord, + + // built-ins written by fragment shader + EvqFragColor, + EvqFragDepth, + EvqFragStencil, + + // end of list + EvqLast +}; + +// +// Subcategories of the TStorageQualifier, simply to give a direct mapping +// between built-in variable names and an numerical value (the enum). +// +// For backward compatibility, there is some redundancy between the +// TStorageQualifier and these. Existing members should both be maintained accurately. +// However, any new built-in variable (and any existing non-redundant one) +// must follow the pattern that the specific built-in is here, and only its +// general qualifier is in TStorageQualifier. +// +// Something like gl_Position, which is sometimes 'in' and sometimes 'out' +// shows up as two different built-in variables in a single stage, but +// only has a single enum in TBuiltInVariable, so both the +// TStorageQualifier and the TBuitinVariable are needed to distinguish +// between them. +// +enum TBuiltInVariable { + EbvNone, + EbvNumWorkGroups, + EbvWorkGroupSize, + EbvWorkGroupId, + EbvLocalInvocationId, + EbvGlobalInvocationId, + EbvLocalInvocationIndex, + EbvNumSubgroups, + EbvSubgroupID, + EbvSubGroupSize, + EbvSubGroupInvocation, + EbvSubGroupEqMask, + EbvSubGroupGeMask, + EbvSubGroupGtMask, + EbvSubGroupLeMask, + EbvSubGroupLtMask, + EbvSubgroupSize2, + EbvSubgroupInvocation2, + EbvSubgroupEqMask2, + EbvSubgroupGeMask2, + EbvSubgroupGtMask2, + EbvSubgroupLeMask2, + EbvSubgroupLtMask2, + EbvVertexId, + EbvInstanceId, + EbvVertexIndex, + EbvInstanceIndex, + EbvBaseVertex, + EbvBaseInstance, + EbvDrawId, + EbvPosition, + EbvPointSize, + EbvClipVertex, + EbvClipDistance, + EbvCullDistance, + EbvNormal, + EbvVertex, + EbvMultiTexCoord0, + EbvMultiTexCoord1, + EbvMultiTexCoord2, + EbvMultiTexCoord3, + EbvMultiTexCoord4, + EbvMultiTexCoord5, + EbvMultiTexCoord6, + EbvMultiTexCoord7, + EbvFrontColor, + EbvBackColor, + EbvFrontSecondaryColor, + EbvBackSecondaryColor, + EbvTexCoord, + EbvFogFragCoord, + EbvInvocationId, + EbvPrimitiveId, + EbvLayer, + EbvViewportIndex, + EbvPatchVertices, + EbvTessLevelOuter, + EbvTessLevelInner, + EbvBoundingBox, + EbvTessCoord, + EbvColor, + EbvSecondaryColor, + EbvFace, + EbvFragCoord, + EbvPointCoord, + EbvFragColor, + EbvFragData, + EbvFragDepth, + EbvFragStencilRef, + EbvSampleId, + EbvSamplePosition, + EbvSampleMask, + EbvHelperInvocation, + + EbvBaryCoordNoPersp, + EbvBaryCoordNoPerspCentroid, + EbvBaryCoordNoPerspSample, + EbvBaryCoordSmooth, + EbvBaryCoordSmoothCentroid, + EbvBaryCoordSmoothSample, + EbvBaryCoordPullModel, + + EbvViewIndex, + EbvDeviceIndex, + + EbvShadingRateKHR, + EbvPrimitiveShadingRateKHR, + + EbvFragSizeEXT, + EbvFragInvocationCountEXT, + + EbvSecondaryFragDataEXT, + EbvSecondaryFragColorEXT, + + EbvViewportMaskNV, + EbvSecondaryPositionNV, + EbvSecondaryViewportMaskNV, + EbvPositionPerViewNV, + EbvViewportMaskPerViewNV, + EbvFragFullyCoveredNV, + EbvFragmentSizeNV, + EbvInvocationsPerPixelNV, + // ray tracing + EbvLaunchId, + EbvLaunchSize, + EbvInstanceCustomIndex, + EbvGeometryIndex, + EbvWorldRayOrigin, + EbvWorldRayDirection, + EbvObjectRayOrigin, + EbvObjectRayDirection, + EbvRayTmin, + EbvRayTmax, + EbvCullMask, + EbvHitT, + EbvHitKind, + EbvObjectToWorld, + EbvObjectToWorld3x4, + EbvWorldToObject, + EbvWorldToObject3x4, + EbvIncomingRayFlags, + EbvCurrentRayTimeNV, + // barycentrics + EbvBaryCoordNV, + EbvBaryCoordNoPerspNV, + EbvBaryCoordEXT, + EbvBaryCoordNoPerspEXT, + // mesh shaders + EbvTaskCountNV, + EbvPrimitiveCountNV, + EbvPrimitiveIndicesNV, + EbvClipDistancePerViewNV, + EbvCullDistancePerViewNV, + EbvLayerPerViewNV, + EbvMeshViewCountNV, + EbvMeshViewIndicesNV, + //GL_EXT_mesh_shader + EbvPrimitivePointIndicesEXT, + EbvPrimitiveLineIndicesEXT, + EbvPrimitiveTriangleIndicesEXT, + EbvCullPrimitiveEXT, + + // sm builtins + EbvWarpsPerSM, + EbvSMCount, + EbvWarpID, + EbvSMID, + + // HLSL built-ins that live only temporarily, until they get remapped + // to one of the above. + EbvFragDepthGreater, + EbvFragDepthLesser, + EbvGsOutputStream, + EbvOutputPatch, + EbvInputPatch, + + // structbuffer types + EbvAppendConsume, // no need to differentiate append and consume + EbvRWStructuredBuffer, + EbvStructuredBuffer, + EbvByteAddressBuffer, + EbvRWByteAddressBuffer, + + EbvLast +}; + +// In this enum, order matters; users can assume higher precision is a bigger value +// and EpqNone is 0. +enum TPrecisionQualifier { + EpqNone = 0, + EpqLow, + EpqMedium, + EpqHigh +}; + +#ifdef GLSLANG_WEB +__inline const char* GetStorageQualifierString(TStorageQualifier q) { return ""; } +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) { return ""; } +#else +// These will show up in error messages +__inline const char* GetStorageQualifierString(TStorageQualifier q) +{ + switch (q) { + case EvqTemporary: return "temp"; break; + case EvqGlobal: return "global"; break; + case EvqConst: return "const"; break; + case EvqConstReadOnly: return "const (read only)"; break; +#ifndef GLSLANG_WEB + case EvqSpirvStorageClass: return "spirv_storage_class"; break; +#endif + case EvqVaryingIn: return "in"; break; + case EvqVaryingOut: return "out"; break; + case EvqUniform: return "uniform"; break; + case EvqBuffer: return "buffer"; break; + case EvqShared: return "shared"; break; + case EvqIn: return "in"; break; + case EvqOut: return "out"; break; + case EvqInOut: return "inout"; break; + case EvqVertexId: return "gl_VertexId"; break; + case EvqInstanceId: return "gl_InstanceId"; break; + case EvqPosition: return "gl_Position"; break; + case EvqPointSize: return "gl_PointSize"; break; + case EvqClipVertex: return "gl_ClipVertex"; break; + case EvqFace: return "gl_FrontFacing"; break; + case EvqFragCoord: return "gl_FragCoord"; break; + case EvqPointCoord: return "gl_PointCoord"; break; + case EvqFragColor: return "fragColor"; break; + case EvqFragDepth: return "gl_FragDepth"; break; + case EvqFragStencil: return "gl_FragStencilRefARB"; break; + case EvqPayload: return "rayPayloadNV"; break; + case EvqPayloadIn: return "rayPayloadInNV"; break; + case EvqHitAttr: return "hitAttributeNV"; break; + case EvqCallableData: return "callableDataNV"; break; + case EvqCallableDataIn: return "callableDataInNV"; break; + case EvqtaskPayloadSharedEXT: return "taskPayloadSharedEXT"; break; + default: return "unknown qualifier"; + } +} + +__inline const char* GetBuiltInVariableString(TBuiltInVariable v) +{ + switch (v) { + case EbvNone: return ""; + case EbvNumWorkGroups: return "NumWorkGroups"; + case EbvWorkGroupSize: return "WorkGroupSize"; + case EbvWorkGroupId: return "WorkGroupID"; + case EbvLocalInvocationId: return "LocalInvocationID"; + case EbvGlobalInvocationId: return "GlobalInvocationID"; + case EbvLocalInvocationIndex: return "LocalInvocationIndex"; + case EbvNumSubgroups: return "NumSubgroups"; + case EbvSubgroupID: return "SubgroupID"; + case EbvSubGroupSize: return "SubGroupSize"; + case EbvSubGroupInvocation: return "SubGroupInvocation"; + case EbvSubGroupEqMask: return "SubGroupEqMask"; + case EbvSubGroupGeMask: return "SubGroupGeMask"; + case EbvSubGroupGtMask: return "SubGroupGtMask"; + case EbvSubGroupLeMask: return "SubGroupLeMask"; + case EbvSubGroupLtMask: return "SubGroupLtMask"; + case EbvSubgroupSize2: return "SubgroupSize"; + case EbvSubgroupInvocation2: return "SubgroupInvocationID"; + case EbvSubgroupEqMask2: return "SubgroupEqMask"; + case EbvSubgroupGeMask2: return "SubgroupGeMask"; + case EbvSubgroupGtMask2: return "SubgroupGtMask"; + case EbvSubgroupLeMask2: return "SubgroupLeMask"; + case EbvSubgroupLtMask2: return "SubgroupLtMask"; + case EbvVertexId: return "VertexId"; + case EbvInstanceId: return "InstanceId"; + case EbvVertexIndex: return "VertexIndex"; + case EbvInstanceIndex: return "InstanceIndex"; + case EbvBaseVertex: return "BaseVertex"; + case EbvBaseInstance: return "BaseInstance"; + case EbvDrawId: return "DrawId"; + case EbvPosition: return "Position"; + case EbvPointSize: return "PointSize"; + case EbvClipVertex: return "ClipVertex"; + case EbvClipDistance: return "ClipDistance"; + case EbvCullDistance: return "CullDistance"; + case EbvNormal: return "Normal"; + case EbvVertex: return "Vertex"; + case EbvMultiTexCoord0: return "MultiTexCoord0"; + case EbvMultiTexCoord1: return "MultiTexCoord1"; + case EbvMultiTexCoord2: return "MultiTexCoord2"; + case EbvMultiTexCoord3: return "MultiTexCoord3"; + case EbvMultiTexCoord4: return "MultiTexCoord4"; + case EbvMultiTexCoord5: return "MultiTexCoord5"; + case EbvMultiTexCoord6: return "MultiTexCoord6"; + case EbvMultiTexCoord7: return "MultiTexCoord7"; + case EbvFrontColor: return "FrontColor"; + case EbvBackColor: return "BackColor"; + case EbvFrontSecondaryColor: return "FrontSecondaryColor"; + case EbvBackSecondaryColor: return "BackSecondaryColor"; + case EbvTexCoord: return "TexCoord"; + case EbvFogFragCoord: return "FogFragCoord"; + case EbvInvocationId: return "InvocationID"; + case EbvPrimitiveId: return "PrimitiveID"; + case EbvLayer: return "Layer"; + case EbvViewportIndex: return "ViewportIndex"; + case EbvPatchVertices: return "PatchVertices"; + case EbvTessLevelOuter: return "TessLevelOuter"; + case EbvTessLevelInner: return "TessLevelInner"; + case EbvBoundingBox: return "BoundingBox"; + case EbvTessCoord: return "TessCoord"; + case EbvColor: return "Color"; + case EbvSecondaryColor: return "SecondaryColor"; + case EbvFace: return "Face"; + case EbvFragCoord: return "FragCoord"; + case EbvPointCoord: return "PointCoord"; + case EbvFragColor: return "FragColor"; + case EbvFragData: return "FragData"; + case EbvFragDepth: return "FragDepth"; + case EbvFragStencilRef: return "FragStencilRef"; + case EbvSampleId: return "SampleId"; + case EbvSamplePosition: return "SamplePosition"; + case EbvSampleMask: return "SampleMaskIn"; + case EbvHelperInvocation: return "HelperInvocation"; + + case EbvBaryCoordNoPersp: return "BaryCoordNoPersp"; + case EbvBaryCoordNoPerspCentroid: return "BaryCoordNoPerspCentroid"; + case EbvBaryCoordNoPerspSample: return "BaryCoordNoPerspSample"; + case EbvBaryCoordSmooth: return "BaryCoordSmooth"; + case EbvBaryCoordSmoothCentroid: return "BaryCoordSmoothCentroid"; + case EbvBaryCoordSmoothSample: return "BaryCoordSmoothSample"; + case EbvBaryCoordPullModel: return "BaryCoordPullModel"; + + case EbvViewIndex: return "ViewIndex"; + case EbvDeviceIndex: return "DeviceIndex"; + + case EbvFragSizeEXT: return "FragSizeEXT"; + case EbvFragInvocationCountEXT: return "FragInvocationCountEXT"; + + case EbvSecondaryFragDataEXT: return "SecondaryFragDataEXT"; + case EbvSecondaryFragColorEXT: return "SecondaryFragColorEXT"; + + case EbvViewportMaskNV: return "ViewportMaskNV"; + case EbvSecondaryPositionNV: return "SecondaryPositionNV"; + case EbvSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; + case EbvPositionPerViewNV: return "PositionPerViewNV"; + case EbvViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; + case EbvFragFullyCoveredNV: return "FragFullyCoveredNV"; + case EbvFragmentSizeNV: return "FragmentSizeNV"; + case EbvInvocationsPerPixelNV: return "InvocationsPerPixelNV"; + case EbvLaunchId: return "LaunchIdNV"; + case EbvLaunchSize: return "LaunchSizeNV"; + case EbvInstanceCustomIndex: return "InstanceCustomIndexNV"; + case EbvGeometryIndex: return "GeometryIndexEXT"; + case EbvWorldRayOrigin: return "WorldRayOriginNV"; + case EbvWorldRayDirection: return "WorldRayDirectionNV"; + case EbvObjectRayOrigin: return "ObjectRayOriginNV"; + case EbvObjectRayDirection: return "ObjectRayDirectionNV"; + case EbvRayTmin: return "ObjectRayTminNV"; + case EbvRayTmax: return "ObjectRayTmaxNV"; + case EbvHitT: return "HitTNV"; + case EbvHitKind: return "HitKindNV"; + case EbvIncomingRayFlags: return "IncomingRayFlagsNV"; + case EbvObjectToWorld: return "ObjectToWorldNV"; + case EbvWorldToObject: return "WorldToObjectNV"; + case EbvCurrentRayTimeNV: return "CurrentRayTimeNV"; + + case EbvBaryCoordEXT: + case EbvBaryCoordNV: return "BaryCoordKHR"; + case EbvBaryCoordNoPerspEXT: + case EbvBaryCoordNoPerspNV: return "BaryCoordNoPerspKHR"; + + case EbvTaskCountNV: return "TaskCountNV"; + case EbvPrimitiveCountNV: return "PrimitiveCountNV"; + case EbvPrimitiveIndicesNV: return "PrimitiveIndicesNV"; + case EbvClipDistancePerViewNV: return "ClipDistancePerViewNV"; + case EbvCullDistancePerViewNV: return "CullDistancePerViewNV"; + case EbvLayerPerViewNV: return "LayerPerViewNV"; + case EbvMeshViewCountNV: return "MeshViewCountNV"; + case EbvMeshViewIndicesNV: return "MeshViewIndicesNV"; + // GL_EXT_mesh_shader + case EbvPrimitivePointIndicesEXT: return "PrimitivePointIndicesEXT"; + case EbvPrimitiveLineIndicesEXT: return "PrimitiveLineIndicesEXT"; + case EbvPrimitiveTriangleIndicesEXT: return "PrimitiveTriangleIndicesEXT"; + case EbvCullPrimitiveEXT: return "CullPrimitiveEXT"; + + case EbvWarpsPerSM: return "WarpsPerSMNV"; + case EbvSMCount: return "SMCountNV"; + case EbvWarpID: return "WarpIDNV"; + case EbvSMID: return "SMIDNV"; + + case EbvShadingRateKHR: return "ShadingRateKHR"; + case EbvPrimitiveShadingRateKHR: return "PrimitiveShadingRateKHR"; + + default: return "unknown built-in variable"; + } +} + +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) +{ + switch (p) { + case EpqNone: return ""; break; + case EpqLow: return "lowp"; break; + case EpqMedium: return "mediump"; break; + case EpqHigh: return "highp"; break; + default: return "unknown precision qualifier"; + } +} +#endif + +__inline bool isTypeSignedInt(TBasicType type) +{ + switch (type) { + case EbtInt8: + case EbtInt16: + case EbtInt: + case EbtInt64: + return true; + default: + return false; + } +} + +__inline bool isTypeUnsignedInt(TBasicType type) +{ + switch (type) { + case EbtUint8: + case EbtUint16: + case EbtUint: + case EbtUint64: + return true; + default: + return false; + } +} + +__inline bool isTypeInt(TBasicType type) +{ + return isTypeSignedInt(type) || isTypeUnsignedInt(type); +} + +__inline bool isTypeFloat(TBasicType type) +{ + switch (type) { + case EbtFloat: + case EbtDouble: + case EbtFloat16: + return true; + default: + return false; + } +} + +__inline int getTypeRank(TBasicType type) +{ + int res = -1; + switch(type) { + case EbtInt8: + case EbtUint8: + res = 0; + break; + case EbtInt16: + case EbtUint16: + res = 1; + break; + case EbtInt: + case EbtUint: + res = 2; + break; + case EbtInt64: + case EbtUint64: + res = 3; + break; + default: + assert(false); + break; + } + return res; +} + +} // end namespace glslang + +#endif // _BASICTYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/Common.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/Common.h new file mode 100644 index 0000000..a5b41cb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/Common.h @@ -0,0 +1,340 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _COMMON_INCLUDED_ +#define _COMMON_INCLUDED_ + +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__ANDROID__) || (defined(_MSC_VER) && _MSC_VER < 1700) +#include +namespace std { +template +std::string to_string(const T& val) { + std::ostringstream os; + os << val; + return os.str(); +} +} +#endif + +#if (defined(_MSC_VER) && _MSC_VER < 1900 /*vs2015*/) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) + #include + #ifndef snprintf + #define snprintf sprintf_s + #endif + #define safe_vsprintf(buf,max,format,args) vsnprintf_s((buf), (max), (max), (format), (args)) +#elif defined (solaris) + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#else + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1800 + #include + inline long long int strtoll (const char* str, char** endptr, int base) + { + return _strtoi64(str, endptr, base); + } + inline unsigned long long int strtoull (const char* str, char** endptr, int base) + { + return _strtoui64(str, endptr, base); + } + inline long long int atoll (const char* str) + { + return strtoll(str, NULL, 10); + } +#endif + +#if defined(_MSC_VER) +#define strdup _strdup +#endif + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4786) // Don't warn about too long identifiers + #pragma warning(disable : 4514) // unused inline method + #pragma warning(disable : 4201) // nameless union +#endif + +#include "PoolAlloc.h" + +// +// Put POOL_ALLOCATOR_NEW_DELETE in base classes to make them use this scheme. +// +#define POOL_ALLOCATOR_NEW_DELETE(A) \ + void* operator new(size_t s) { return (A).allocate(s); } \ + void* operator new(size_t, void *_Where) { return (_Where); } \ + void operator delete(void*) { } \ + void operator delete(void *, void *) { } \ + void* operator new[](size_t s) { return (A).allocate(s); } \ + void* operator new[](size_t, void *_Where) { return (_Where); } \ + void operator delete[](void*) { } \ + void operator delete[](void *, void *) { } + +namespace glslang { + + // + // Pool version of string. + // + typedef pool_allocator TStringAllocator; + typedef std::basic_string , TStringAllocator> TString; + +} // end namespace glslang + +// Repackage the std::hash for use by unordered map/set with a TString key. +namespace std { + + template<> struct hash { + std::size_t operator()(const glslang::TString& s) const + { + const unsigned _FNV_offset_basis = 2166136261U; + const unsigned _FNV_prime = 16777619U; + unsigned _Val = _FNV_offset_basis; + size_t _Count = s.size(); + const char* _First = s.c_str(); + for (size_t _Next = 0; _Next < _Count; ++_Next) + { + _Val ^= (unsigned)_First[_Next]; + _Val *= _FNV_prime; + } + + return _Val; + } + }; +} + +namespace glslang { + +inline TString* NewPoolTString(const char* s) +{ + void* memory = GetThreadPoolAllocator().allocate(sizeof(TString)); + return new(memory) TString(s); +} + +template inline T* NewPoolObject(T*) +{ + return new(GetThreadPoolAllocator().allocate(sizeof(T))) T; +} + +template inline T* NewPoolObject(T, int instances) +{ + return new(GetThreadPoolAllocator().allocate(instances * sizeof(T))) T[instances]; +} + +// +// Pool allocator versions of vectors, lists, and maps +// +template class TVector : public std::vector > { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + typedef typename std::vector >::size_type size_type; + TVector() : std::vector >() {} + TVector(const pool_allocator& a) : std::vector >(a) {} + TVector(size_type i) : std::vector >(i) {} + TVector(size_type i, const T& val) : std::vector >(i, val) {} +}; + +template class TList : public std::list > { +}; + +template > +class TMap : public std::map > > { +}; + +template , class PRED = std::equal_to > +class TUnorderedMap : public std::unordered_map > > { +}; + +template > +class TSet : public std::set > { +}; + +// +// Persistent string memory. Should only be used for strings that survive +// across compiles/links. +// +typedef std::basic_string TPersistString; + +// +// templatized min and max functions. +// +template T Min(const T a, const T b) { return a < b ? a : b; } +template T Max(const T a, const T b) { return a > b ? a : b; } + +// +// Create a TString object from an integer. +// +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) +inline const TString String(const int i, const int base = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + _itoa_s(i, text, sizeof(text), base); + return text; +} +#else +inline const TString String(const int i, const int /*base*/ = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + + // we assume base 10 for all cases + snprintf(text, sizeof(text), "%d", i); + + return text; +} +#endif + +struct TSourceLoc { + void init() + { + name = nullptr; string = 0; line = 0; column = 0; + } + void init(int stringNum) { init(); string = stringNum; } + // Returns the name if it exists. Otherwise, returns the string number. + std::string getStringNameOrNum(bool quoteStringName = true) const + { + if (name != nullptr) { + TString qstr = quoteStringName ? ("\"" + *name + "\"") : *name; + std::string ret_str(qstr.c_str()); + return ret_str; + } + return std::to_string((long long)string); + } + const char* getFilename() const + { + if (name == nullptr) + return nullptr; + return name->c_str(); + } + const char* getFilenameStr() const { return name == nullptr ? "" : name->c_str(); } + TString* name; // descriptive name for this string, when a textual name is available, otherwise nullptr + int string; + int line; + int column; +}; + +class TPragmaTable : public TMap { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) +}; + +const int MaxTokenLength = 1024; + +template bool IsPow2(T powerOf2) +{ + if (powerOf2 <= 0) + return false; + + return (powerOf2 & (powerOf2 - 1)) == 0; +} + +// Round number up to a multiple of the given powerOf2, which is not +// a power, just a number that must be a power of 2. +template void RoundToPow2(T& number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + number = (number + powerOf2 - 1) & ~(powerOf2 - 1); +} + +template bool IsMultipleOfPow2(T number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + return ! (number & (powerOf2 - 1)); +} + +// Returns log2 of an integer power of 2. +// T should be integral. +template int IntLog2(T n) +{ + assert(IsPow2(n)); + int result = 0; + while ((T(1) << result) != n) { + result++; + } + return result; +} + +inline bool IsInfinity(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_NINF: + case _FPCLASS_PINF: + return true; + default: + return false; + } +#else + return std::isinf(x); +#endif +} + +inline bool IsNan(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_SNAN: + case _FPCLASS_QNAN: + return true; + default: + return false; + } +#else + return std::isnan(x); +#endif +} + +} // end namespace glslang + +#endif // _COMMON_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ConstantUnion.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ConstantUnion.h new file mode 100644 index 0000000..c4ffb85 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ConstantUnion.h @@ -0,0 +1,974 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _CONSTANT_UNION_INCLUDED_ +#define _CONSTANT_UNION_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" + +namespace glslang { + +class TConstUnion { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnion() : iConst(0), type(EbtInt) { } + + void setI8Const(signed char i) + { + i8Const = i; + type = EbtInt8; + } + + void setU8Const(unsigned char u) + { + u8Const = u; + type = EbtUint8; + } + + void setI16Const(signed short i) + { + i16Const = i; + type = EbtInt16; + } + + void setU16Const(unsigned short u) + { + u16Const = u; + type = EbtUint16; + } + + void setIConst(int i) + { + iConst = i; + type = EbtInt; + } + + void setUConst(unsigned int u) + { + uConst = u; + type = EbtUint; + } + + void setI64Const(long long i64) + { + i64Const = i64; + type = EbtInt64; + } + + void setU64Const(unsigned long long u64) + { + u64Const = u64; + type = EbtUint64; + } + + void setDConst(double d) + { + dConst = d; + type = EbtDouble; + } + + void setBConst(bool b) + { + bConst = b; + type = EbtBool; + } + + void setSConst(const TString* s) + { + sConst = s; + type = EbtString; + } + + signed char getI8Const() const { return i8Const; } + unsigned char getU8Const() const { return u8Const; } + signed short getI16Const() const { return i16Const; } + unsigned short getU16Const() const { return u16Const; } + int getIConst() const { return iConst; } + unsigned int getUConst() const { return uConst; } + long long getI64Const() const { return i64Const; } + unsigned long long getU64Const() const { return u64Const; } + double getDConst() const { return dConst; } + bool getBConst() const { return bConst; } + const TString* getSConst() const { return sConst; } + + bool operator==(const signed char i) const + { + if (i == i8Const) + return true; + + return false; + } + + bool operator==(const unsigned char u) const + { + if (u == u8Const) + return true; + + return false; + } + + bool operator==(const signed short i) const + { + if (i == i16Const) + return true; + + return false; + } + + bool operator==(const unsigned short u) const + { + if (u == u16Const) + return true; + + return false; + } + + bool operator==(const int i) const + { + if (i == iConst) + return true; + + return false; + } + + bool operator==(const unsigned int u) const + { + if (u == uConst) + return true; + + return false; + } + + bool operator==(const long long i64) const + { + if (i64 == i64Const) + return true; + + return false; + } + + bool operator==(const unsigned long long u64) const + { + if (u64 == u64Const) + return true; + + return false; + } + + bool operator==(const double d) const + { + if (d == dConst) + return true; + + return false; + } + + bool operator==(const bool b) const + { + if (b == bConst) + return true; + + return false; + } + + bool operator==(const TConstUnion& constant) const + { + if (constant.type != type) + return false; + + switch (type) { + case EbtInt: + if (constant.iConst == iConst) + return true; + + break; + case EbtUint: + if (constant.uConst == uConst) + return true; + + break; + case EbtBool: + if (constant.bConst == bConst) + return true; + + break; + case EbtDouble: + if (constant.dConst == dConst) + return true; + + break; + +#ifndef GLSLANG_WEB + case EbtInt16: + if (constant.i16Const == i16Const) + return true; + + break; + case EbtUint16: + if (constant.u16Const == u16Const) + return true; + + break; + case EbtInt8: + if (constant.i8Const == i8Const) + return true; + + break; + case EbtUint8: + if (constant.u8Const == u8Const) + return true; + + break; + case EbtInt64: + if (constant.i64Const == i64Const) + return true; + + break; + case EbtUint64: + if (constant.u64Const == u64Const) + return true; + + break; +#endif + default: + assert(false && "Default missing"); + } + + return false; + } + + bool operator!=(const signed char i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned char u) const + { + return !operator==(u); + } + + bool operator!=(const signed short i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned short u) const + { + return !operator==(u); + } + + bool operator!=(const int i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned int u) const + { + return !operator==(u); + } + + bool operator!=(const long long i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned long long u) const + { + return !operator==(u); + } + + bool operator!=(const float f) const + { + return !operator==(f); + } + + bool operator!=(const bool b) const + { + return !operator==(b); + } + + bool operator!=(const TConstUnion& constant) const + { + return !operator==(constant); + } + + bool operator>(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { + case EbtInt: + if (iConst > constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst > constant.uConst) + return true; + + return false; + case EbtDouble: + if (dConst > constant.dConst) + return true; + + return false; +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const > constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const > constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const > constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const > constant.u16Const) + return true; + + return false; + case EbtInt64: + if (i64Const > constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const > constant.u64Const) + return true; + + return false; +#endif + default: + assert(false && "Default missing"); + return false; + } + } + + bool operator<(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const < constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const < constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const < constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const < constant.u16Const) + return true; + return false; + case EbtInt64: + if (i64Const < constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const < constant.u64Const) + return true; + + return false; +#endif + case EbtDouble: + if (dConst < constant.dConst) + return true; + + return false; + case EbtInt: + if (iConst < constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst < constant.uConst) + return true; + + return false; + default: + assert(false && "Default missing"); + return false; + } + } + + TConstUnion operator+(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst + constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst + constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst + constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const + constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const + constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const + constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const + constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const + constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const + constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator-(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst - constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst - constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst - constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const - constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const - constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const - constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const - constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const - constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const - constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator*(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst * constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst * constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst * constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const * constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const * constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const * constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const * constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const * constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const * constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator%(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst % constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst % constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const % constant.i8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const % constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const % constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const % constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const % constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const % constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator>>(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const >> constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const >> constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const >> constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const >> constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const >> constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const >> constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const >> constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const >> constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const >> constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const >> constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const >> constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const >> constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst >> constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst >> constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst >> constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst >> constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst >> constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst >> constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst >> constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst >> constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst >> constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst >> constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst >> constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst >> constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; +#ifndef GLSLANG_WEB + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const >> constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const >> constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const >> constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const >> constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const >> constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const >> constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator<<(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const << constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const << constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const << constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const << constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const << constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const << constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const << constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const << constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const << constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const << constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const << constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const << constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const << constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const << constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const << constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const << constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const << constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const << constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const << constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const << constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const << constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const << constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const << constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const << constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const << constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const << constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const << constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const << constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const << constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const << constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const << constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const << constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const << constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const << constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const << constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const << constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const << constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const << constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const << constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const << constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const << constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const << constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst << constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst << constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst << constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst << constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst << constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst << constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst << constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst << constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst << constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst << constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst << constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst << constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst & constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst & constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const & constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const & constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const & constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const & constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const & constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const & constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator|(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst | constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst | constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const | constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const | constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const | constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const | constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const | constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const | constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator^(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst ^ constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst ^ constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const ^ constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const ^ constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const ^ constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const ^ constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const ^ constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const ^ constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator~() const + { + TConstUnion returnValue; + switch (type) { + case EbtInt: returnValue.setIConst(~iConst); break; + case EbtUint: returnValue.setUConst(~uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(~i8Const); break; + case EbtUint8: returnValue.setU8Const(~u8Const); break; + case EbtInt16: returnValue.setI16Const(~i16Const); break; + case EbtUint16: returnValue.setU16Const(~u16Const); break; + case EbtInt64: returnValue.setI64Const(~i64Const); break; + case EbtUint64: returnValue.setU64Const(~u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst && constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator||(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst || constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TBasicType getType() const { return type; } + +private: + union { + signed char i8Const; // used for i8vec, scalar int8s + unsigned char u8Const; // used for u8vec, scalar uint8s + signed short i16Const; // used for i16vec, scalar int16s + unsigned short u16Const; // used for u16vec, scalar uint16s + int iConst; // used for ivec, scalar ints + unsigned int uConst; // used for uvec, scalar uints + long long i64Const; // used for i64vec, scalar int64s + unsigned long long u64Const; // used for u64vec, scalar uint64s + bool bConst; // used for bvec, scalar bools + double dConst; // used for vec, dvec, mat, dmat, scalar floats and doubles + const TString* sConst; // string constant + }; + + TBasicType type; +}; + +// Encapsulate having a pointer to an array of TConstUnion, +// which only needs to be allocated if its size is going to be +// bigger than 0. +// +// One convenience is being able to use [] to go inside the array, instead +// of C++ assuming it as an array of pointers to vectors. +// +// General usage is that the size is known up front, and it is +// created once with the proper size. +// +class TConstUnionArray { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnionArray() : unionArray(nullptr) { } + virtual ~TConstUnionArray() { } + + explicit TConstUnionArray(int size) + { + if (size == 0) + unionArray = nullptr; + else + unionArray = new TConstUnionVector(size); + } + TConstUnionArray(const TConstUnionArray& a) = default; + TConstUnionArray(const TConstUnionArray& a, int start, int size) + { + unionArray = new TConstUnionVector(size); + for (int i = 0; i < size; ++i) + (*unionArray)[i] = a[start + i]; + } + + // Use this constructor for a smear operation + TConstUnionArray(int size, const TConstUnion& val) + { + unionArray = new TConstUnionVector(size, val); + } + + int size() const { return unionArray ? (int)unionArray->size() : 0; } + TConstUnion& operator[](size_t index) { return (*unionArray)[index]; } + const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; } + bool operator==(const TConstUnionArray& rhs) const + { + // this includes the case that both are unallocated + if (unionArray == rhs.unionArray) + return true; + + if (! unionArray || ! rhs.unionArray) + return false; + + return *unionArray == *rhs.unionArray; + } + bool operator!=(const TConstUnionArray& rhs) const { return ! operator==(rhs); } + + double dot(const TConstUnionArray& rhs) + { + assert(rhs.unionArray->size() == unionArray->size()); + double sum = 0.0; + + for (size_t comp = 0; comp < unionArray->size(); ++comp) + sum += (*this)[comp].getDConst() * rhs[comp].getDConst(); + + return sum; + } + + bool empty() const { return unionArray == nullptr; } + +protected: + typedef TVector TConstUnionVector; + TConstUnionVector* unionArray; +}; + +} // end namespace glslang + +#endif // _CONSTANT_UNION_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/InfoSink.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/InfoSink.h new file mode 100644 index 0000000..dceb603 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/InfoSink.h @@ -0,0 +1,144 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INFOSINK_INCLUDED_ +#define _INFOSINK_INCLUDED_ + +#include "../Include/Common.h" +#include + +namespace glslang { + +// +// TPrefixType is used to centralize how info log messages start. +// See below. +// +enum TPrefixType { + EPrefixNone, + EPrefixWarning, + EPrefixError, + EPrefixInternalError, + EPrefixUnimplemented, + EPrefixNote +}; + +enum TOutputStream { + ENull = 0, + EDebugger = 0x01, + EStdOut = 0x02, + EString = 0x04, +}; +// +// Encapsulate info logs for all objects that have them. +// +// The methods are a general set of tools for getting a variety of +// messages and types inserted into the log. +// +class TInfoSinkBase { +public: + TInfoSinkBase() : outputStream(4) {} + void erase() { sink.erase(); } + TInfoSinkBase& operator<<(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(char c) { append(1, c); return *this; } + TInfoSinkBase& operator<<(const char* s) { append(s); return *this; } + TInfoSinkBase& operator<<(int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(unsigned int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(float n) { const int size = 40; char buf[size]; + snprintf(buf, size, (fabs(n) > 1e-8 && fabs(n) < 1e8) || n == 0.0f ? "%f" : "%g", n); + append(buf); + return *this; } + TInfoSinkBase& operator+(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const char* s) { append(s); return *this; } + const char* c_str() const { return sink.c_str(); } + void prefix(TPrefixType message) { + switch(message) { + case EPrefixNone: break; + case EPrefixWarning: append("WARNING: "); break; + case EPrefixError: append("ERROR: "); break; + case EPrefixInternalError: append("INTERNAL ERROR: "); break; + case EPrefixUnimplemented: append("UNIMPLEMENTED: "); break; + case EPrefixNote: append("NOTE: "); break; + default: append("UNKNOWN ERROR: "); break; + } + } + void location(const TSourceLoc& loc) { + const int maxSize = 24; + char locText[maxSize]; + snprintf(locText, maxSize, ":%d", loc.line); + append(loc.getStringNameOrNum(false).c_str()); + append(locText); + append(": "); + } + void message(TPrefixType message, const char* s) { + prefix(message); + append(s); + append("\n"); + } + void message(TPrefixType message, const char* s, const TSourceLoc& loc) { + prefix(message); + location(loc); + append(s); + append("\n"); + } + + void setOutputStream(int output = 4) + { + outputStream = output; + } + +protected: + void append(const char* s); + + void append(int count, char c); + void append(const TPersistString& t); + void append(const TString& t); + + void checkMem(size_t growth) { if (sink.capacity() < sink.size() + growth + 2) + sink.reserve(sink.capacity() + sink.capacity() / 2); } + void appendToStream(const char* s); + TPersistString sink; + int outputStream; +}; + +} // end namespace glslang + +class TInfoSink { +public: + glslang::TInfoSinkBase info; + glslang::TInfoSinkBase debug; +}; + +#endif // _INFOSINK_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/InitializeGlobals.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/InitializeGlobals.h new file mode 100644 index 0000000..95d0a40 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/InitializeGlobals.h @@ -0,0 +1,44 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef __INITIALIZE_GLOBALS_INCLUDED_ +#define __INITIALIZE_GLOBALS_INCLUDED_ + +namespace glslang { + +bool InitializePoolIndex(); + +} // end namespace glslang + +#endif // __INITIALIZE_GLOBALS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/PoolAlloc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/PoolAlloc.h new file mode 100644 index 0000000..1f5cac7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/PoolAlloc.h @@ -0,0 +1,318 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _POOLALLOC_INCLUDED_ +#define _POOLALLOC_INCLUDED_ + +#ifdef _DEBUG +# define GUARD_BLOCKS // define to enable guard block sanity checking +#endif + +// +// This header defines an allocator that can be used to efficiently +// allocate a large number of small requests for heap memory, with the +// intention that they are not individually deallocated, but rather +// collectively deallocated at one time. +// +// This simultaneously +// +// * Makes each individual allocation much more efficient; the +// typical allocation is trivial. +// * Completely avoids the cost of doing individual deallocation. +// * Saves the trouble of tracking down and plugging a large class of leaks. +// +// Individual classes can use this allocator by supplying their own +// new and delete methods. +// +// STL containers can use this allocator by using the pool_allocator +// class as the allocator (second) template argument. +// + +#include +#include +#include + +namespace glslang { + +// If we are using guard blocks, we must track each individual +// allocation. If we aren't using guard blocks, these +// never get instantiated, so won't have any impact. +// + +class TAllocation { +public: + TAllocation(size_t size, unsigned char* mem, TAllocation* prev = 0) : + size(size), mem(mem), prevAlloc(prev) { + // Allocations are bracketed: + // [allocationHeader][initialGuardBlock][userData][finalGuardBlock] + // This would be cleaner with if (guardBlockSize)..., but that + // makes the compiler print warnings about 0 length memsets, + // even with the if() protecting them. +# ifdef GUARD_BLOCKS + memset(preGuard(), guardBlockBeginVal, guardBlockSize); + memset(data(), userDataFill, size); + memset(postGuard(), guardBlockEndVal, guardBlockSize); +# endif + } + + void check() const { + checkGuardBlock(preGuard(), guardBlockBeginVal, "before"); + checkGuardBlock(postGuard(), guardBlockEndVal, "after"); + } + + void checkAllocList() const; + + // Return total size needed to accommodate user buffer of 'size', + // plus our tracking data. + inline static size_t allocationSize(size_t size) { + return size + 2 * guardBlockSize + headerSize(); + } + + // Offset from surrounding buffer to get to user data buffer. + inline static unsigned char* offsetAllocation(unsigned char* m) { + return m + guardBlockSize + headerSize(); + } + +private: + void checkGuardBlock(unsigned char* blockMem, unsigned char val, const char* locText) const; + + // Find offsets to pre and post guard blocks, and user data buffer + unsigned char* preGuard() const { return mem + headerSize(); } + unsigned char* data() const { return preGuard() + guardBlockSize; } + unsigned char* postGuard() const { return data() + size; } + + size_t size; // size of the user data area + unsigned char* mem; // beginning of our allocation (pts to header) + TAllocation* prevAlloc; // prior allocation in the chain + + const static unsigned char guardBlockBeginVal; + const static unsigned char guardBlockEndVal; + const static unsigned char userDataFill; + + const static size_t guardBlockSize; +# ifdef GUARD_BLOCKS + inline static size_t headerSize() { return sizeof(TAllocation); } +# else + inline static size_t headerSize() { return 0; } +# endif +}; + +// +// There are several stacks. One is to track the pushing and popping +// of the user, and not yet implemented. The others are simply a +// repositories of free pages or used pages. +// +// Page stacks are linked together with a simple header at the beginning +// of each allocation obtained from the underlying OS. Multi-page allocations +// are returned to the OS. Individual page allocations are kept for future +// re-use. +// +// The "page size" used is not, nor must it match, the underlying OS +// page size. But, having it be about that size or equal to a set of +// pages is likely most optimal. +// +class TPoolAllocator { +public: + TPoolAllocator(int growthIncrement = 8*1024, int allocationAlignment = 16); + + // + // Don't call the destructor just to free up the memory, call pop() + // + ~TPoolAllocator(); + + // + // Call push() to establish a new place to pop memory too. Does not + // have to be called to get things started. + // + void push(); + + // + // Call pop() to free all memory allocated since the last call to push(), + // or if no last call to push, frees all memory since first allocation. + // + void pop(); + + // + // Call popAll() to free all memory allocated. + // + void popAll(); + + // + // Call allocate() to actually acquire memory. Returns 0 if no memory + // available, otherwise a properly aligned pointer to 'numBytes' of memory. + // + void* allocate(size_t numBytes); + + // + // There is no deallocate. The point of this class is that + // deallocation can be skipped by the user of it, as the model + // of use is to simultaneously deallocate everything at once + // by calling pop(), and to not have to solve memory leak problems. + // + +protected: + friend struct tHeader; + + struct tHeader { + tHeader(tHeader* nextPage, size_t pageCount) : +#ifdef GUARD_BLOCKS + lastAllocation(0), +#endif + nextPage(nextPage), pageCount(pageCount) { } + + ~tHeader() { +#ifdef GUARD_BLOCKS + if (lastAllocation) + lastAllocation->checkAllocList(); +#endif + } + +#ifdef GUARD_BLOCKS + TAllocation* lastAllocation; +#endif + tHeader* nextPage; + size_t pageCount; + }; + + struct tAllocState { + size_t offset; + tHeader* page; + }; + typedef std::vector tAllocStack; + + // Track allocations if and only if we're using guard blocks +#ifndef GUARD_BLOCKS + void* initializeAllocation(tHeader*, unsigned char* memory, size_t) { +#else + void* initializeAllocation(tHeader* block, unsigned char* memory, size_t numBytes) { + new(memory) TAllocation(numBytes, memory, block->lastAllocation); + block->lastAllocation = reinterpret_cast(memory); +#endif + + // This is optimized entirely away if GUARD_BLOCKS is not defined. + return TAllocation::offsetAllocation(memory); + } + + size_t pageSize; // granularity of allocation from the OS + size_t alignment; // all returned allocations will be aligned at + // this granularity, which will be a power of 2 + size_t alignmentMask; + size_t headerSkip; // amount of memory to skip to make room for the + // header (basically, size of header, rounded + // up to make it aligned + size_t currentPageOffset; // next offset in top of inUseList to allocate from + tHeader* freeList; // list of popped memory + tHeader* inUseList; // list of all memory currently being used + tAllocStack stack; // stack of where to allocate from, to partition pool + + int numCalls; // just an interesting statistic + size_t totalBytes; // just an interesting statistic +private: + TPoolAllocator& operator=(const TPoolAllocator&); // don't allow assignment operator + TPoolAllocator(const TPoolAllocator&); // don't allow default copy constructor +}; + +// +// There could potentially be many pools with pops happening at +// different times. But a simple use is to have a global pop +// with everyone using the same global allocator. +// +extern TPoolAllocator& GetThreadPoolAllocator(); +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); + +// +// This STL compatible allocator is intended to be used as the allocator +// parameter to templatized STL containers, like vector and map. +// +// It will use the pools for allocation, and not +// do any deallocation, but will still do destruction. +// +template +class pool_allocator { +public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + template + struct rebind { + typedef pool_allocator other; + }; + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pool_allocator() : allocator(GetThreadPoolAllocator()) { } + pool_allocator(TPoolAllocator& a) : allocator(a) { } + pool_allocator(const pool_allocator& p) : allocator(p.allocator) { } + + template + pool_allocator(const pool_allocator& p) : allocator(p.getAllocator()) { } + + pointer allocate(size_type n) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + pointer allocate(size_type n, const void*) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + + void deallocate(void*, size_type) { } + void deallocate(pointer, size_type) { } + + pointer _Charalloc(size_t n) { + return reinterpret_cast(getAllocator().allocate(n)); } + + void construct(pointer p, const T& val) { new ((void *)p) T(val); } + void destroy(pointer p) { p->T::~T(); } + + bool operator==(const pool_allocator& rhs) const { return &getAllocator() == &rhs.getAllocator(); } + bool operator!=(const pool_allocator& rhs) const { return &getAllocator() != &rhs.getAllocator(); } + + size_type max_size() const { return static_cast(-1) / sizeof(T); } + size_type max_size(int size) const { return static_cast(-1) / size; } + + TPoolAllocator& getAllocator() const { return allocator; } + + pool_allocator select_on_container_copy_construction() const { return pool_allocator{}; } + +protected: + pool_allocator& operator=(const pool_allocator&) { return *this; } + TPoolAllocator& allocator; +}; + +} // end namespace glslang + +#endif // _POOLALLOC_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ResourceLimits.h new file mode 100644 index 0000000..b36c8d6 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ResourceLimits.h @@ -0,0 +1,159 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _RESOURCE_LIMITS_INCLUDED_ +#define _RESOURCE_LIMITS_INCLUDED_ + +struct TLimits { + bool nonInductiveForLoops; + bool whileLoops; + bool doWhileLoops; + bool generalUniformIndexing; + bool generalAttributeMatrixVectorIndexing; + bool generalVaryingIndexing; + bool generalSamplerIndexing; + bool generalVariableIndexing; + bool generalConstantMatrixVectorIndexing; +}; + +struct TBuiltInResource { + int maxLights; + int maxClipPlanes; + int maxTextureUnits; + int maxTextureCoords; + int maxVertexAttribs; + int maxVertexUniformComponents; + int maxVaryingFloats; + int maxVertexTextureImageUnits; + int maxCombinedTextureImageUnits; + int maxTextureImageUnits; + int maxFragmentUniformComponents; + int maxDrawBuffers; + int maxVertexUniformVectors; + int maxVaryingVectors; + int maxFragmentUniformVectors; + int maxVertexOutputVectors; + int maxFragmentInputVectors; + int minProgramTexelOffset; + int maxProgramTexelOffset; + int maxClipDistances; + int maxComputeWorkGroupCountX; + int maxComputeWorkGroupCountY; + int maxComputeWorkGroupCountZ; + int maxComputeWorkGroupSizeX; + int maxComputeWorkGroupSizeY; + int maxComputeWorkGroupSizeZ; + int maxComputeUniformComponents; + int maxComputeTextureImageUnits; + int maxComputeImageUniforms; + int maxComputeAtomicCounters; + int maxComputeAtomicCounterBuffers; + int maxVaryingComponents; + int maxVertexOutputComponents; + int maxGeometryInputComponents; + int maxGeometryOutputComponents; + int maxFragmentInputComponents; + int maxImageUnits; + int maxCombinedImageUnitsAndFragmentOutputs; + int maxCombinedShaderOutputResources; + int maxImageSamples; + int maxVertexImageUniforms; + int maxTessControlImageUniforms; + int maxTessEvaluationImageUniforms; + int maxGeometryImageUniforms; + int maxFragmentImageUniforms; + int maxCombinedImageUniforms; + int maxGeometryTextureImageUnits; + int maxGeometryOutputVertices; + int maxGeometryTotalOutputComponents; + int maxGeometryUniformComponents; + int maxGeometryVaryingComponents; + int maxTessControlInputComponents; + int maxTessControlOutputComponents; + int maxTessControlTextureImageUnits; + int maxTessControlUniformComponents; + int maxTessControlTotalOutputComponents; + int maxTessEvaluationInputComponents; + int maxTessEvaluationOutputComponents; + int maxTessEvaluationTextureImageUnits; + int maxTessEvaluationUniformComponents; + int maxTessPatchComponents; + int maxPatchVertices; + int maxTessGenLevel; + int maxViewports; + int maxVertexAtomicCounters; + int maxTessControlAtomicCounters; + int maxTessEvaluationAtomicCounters; + int maxGeometryAtomicCounters; + int maxFragmentAtomicCounters; + int maxCombinedAtomicCounters; + int maxAtomicCounterBindings; + int maxVertexAtomicCounterBuffers; + int maxTessControlAtomicCounterBuffers; + int maxTessEvaluationAtomicCounterBuffers; + int maxGeometryAtomicCounterBuffers; + int maxFragmentAtomicCounterBuffers; + int maxCombinedAtomicCounterBuffers; + int maxAtomicCounterBufferSize; + int maxTransformFeedbackBuffers; + int maxTransformFeedbackInterleavedComponents; + int maxCullDistances; + int maxCombinedClipAndCullDistances; + int maxSamples; + int maxMeshOutputVerticesNV; + int maxMeshOutputPrimitivesNV; + int maxMeshWorkGroupSizeX_NV; + int maxMeshWorkGroupSizeY_NV; + int maxMeshWorkGroupSizeZ_NV; + int maxTaskWorkGroupSizeX_NV; + int maxTaskWorkGroupSizeY_NV; + int maxTaskWorkGroupSizeZ_NV; + int maxMeshViewCountNV; + int maxMeshOutputVerticesEXT; + int maxMeshOutputPrimitivesEXT; + int maxMeshWorkGroupSizeX_EXT; + int maxMeshWorkGroupSizeY_EXT; + int maxMeshWorkGroupSizeZ_EXT; + int maxTaskWorkGroupSizeX_EXT; + int maxTaskWorkGroupSizeY_EXT; + int maxTaskWorkGroupSizeZ_EXT; + int maxMeshViewCountEXT; + int maxDualSourceDrawBuffersEXT; + + TLimits limits; +}; + +#endif // _RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ShHandle.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ShHandle.h new file mode 100644 index 0000000..df07bd8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/ShHandle.h @@ -0,0 +1,176 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SHHANDLE_INCLUDED_ +#define _SHHANDLE_INCLUDED_ + +// +// Machine independent part of the compiler private objects +// sent as ShHandle to the driver. +// +// This should not be included by driver code. +// + +#define SH_EXPORTING +#include "../Public/ShaderLang.h" +#include "../MachineIndependent/Versions.h" +#include "InfoSink.h" + +class TCompiler; +class TLinker; +class TUniformMap; + +// +// The base class used to back handles returned to the driver. +// +class TShHandleBase { +public: + TShHandleBase() { pool = new glslang::TPoolAllocator; } + virtual ~TShHandleBase() { delete pool; } + virtual TCompiler* getAsCompiler() { return 0; } + virtual TLinker* getAsLinker() { return 0; } + virtual TUniformMap* getAsUniformMap() { return 0; } + virtual glslang::TPoolAllocator* getPool() const { return pool; } +private: + glslang::TPoolAllocator* pool; +}; + +// +// The base class for the machine dependent linker to derive from +// for managing where uniforms live. +// +class TUniformMap : public TShHandleBase { +public: + TUniformMap() { } + virtual ~TUniformMap() { } + virtual TUniformMap* getAsUniformMap() { return this; } + virtual int getLocation(const char* name) = 0; + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink infoSink; +}; + +class TIntermNode; + +// +// The base class for the machine dependent compiler to derive from +// for managing object code from the compile. +// +class TCompiler : public TShHandleBase { +public: + TCompiler(EShLanguage l, TInfoSink& sink) : infoSink(sink) , language(l), haveValidObjectCode(false) { } + virtual ~TCompiler() { } + EShLanguage getLanguage() { return language; } + virtual TInfoSink& getInfoSink() { return infoSink; } + + virtual bool compile(TIntermNode* root, int version = 0, EProfile profile = ENoProfile) = 0; + + virtual TCompiler* getAsCompiler() { return this; } + virtual bool linkable() { return haveValidObjectCode; } + + TInfoSink& infoSink; +protected: + TCompiler& operator=(TCompiler&); + + EShLanguage language; + bool haveValidObjectCode; +}; + +// +// Link operations are based on a list of compile results... +// +typedef glslang::TVector TCompilerList; +typedef glslang::TVector THandleList; + +// +// The base class for the machine dependent linker to derive from +// to manage the resulting executable. +// + +class TLinker : public TShHandleBase { +public: + TLinker(EShExecutable e, TInfoSink& iSink) : + infoSink(iSink), + executable(e), + haveReturnableObjectCode(false), + appAttributeBindings(0), + fixedAttributeBindings(0), + excludedAttributes(0), + excludedCount(0), + uniformBindings(0) { } + virtual TLinker* getAsLinker() { return this; } + virtual ~TLinker() { } + virtual bool link(TCompilerList&, TUniformMap*) = 0; + virtual bool link(THandleList&) { return false; } + virtual void setAppAttributeBindings(const ShBindingTable* t) { appAttributeBindings = t; } + virtual void setFixedAttributeBindings(const ShBindingTable* t) { fixedAttributeBindings = t; } + virtual void getAttributeBindings(ShBindingTable const **t) const = 0; + virtual void setExcludedAttributes(const int* attributes, int count) { excludedAttributes = attributes; excludedCount = count; } + virtual ShBindingTable* getUniformBindings() const { return uniformBindings; } + virtual const void* getObjectCode() const { return 0; } // a real compiler would be returning object code here + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink& infoSink; +protected: + TLinker& operator=(TLinker&); + EShExecutable executable; + bool haveReturnableObjectCode; // true when objectCode is acceptable to send to driver + + const ShBindingTable* appAttributeBindings; + const ShBindingTable* fixedAttributeBindings; + const int* excludedAttributes; + int excludedCount; + ShBindingTable* uniformBindings; // created by the linker +}; + +// +// This is the interface between the machine independent code +// and the machine dependent code. +// +// The machine dependent code should derive from the classes +// above. Then Construct*() and Delete*() will create and +// destroy the machine dependent objects, which contain the +// above machine independent information. +// +TCompiler* ConstructCompiler(EShLanguage, int); + +TShHandleBase* ConstructLinker(EShExecutable, int); +TShHandleBase* ConstructBindings(); +void DeleteLinker(TShHandleBase*); +void DeleteBindingList(TShHandleBase* bindingList); + +TUniformMap* ConstructUniformMap(); +void DeleteCompiler(TCompiler*); + +void DeleteUniformMap(TUniformMap*); + +#endif // _SHHANDLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/SpirvIntrinsics.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/SpirvIntrinsics.h new file mode 100644 index 0000000..3c7d72c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/SpirvIntrinsics.h @@ -0,0 +1,128 @@ +// +// Copyright(C) 2021 Advanced Micro Devices, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#ifndef GLSLANG_WEB + +// +// GL_EXT_spirv_intrinsics +// +#include "Common.h" + +namespace glslang { + +class TIntermTyped; +class TIntermConstantUnion; +class TType; + +// SPIR-V requirements +struct TSpirvRequirement { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // capability = [..] + TSet extensions; + // extension = [..] + TSet capabilities; +}; + +// SPIR-V execution modes +struct TSpirvExecutionMode { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_execution_mode + TMap> modes; + // spirv_execution_mode_id + TMap > modeIds; +}; + +// SPIR-V decorations +struct TSpirvDecorate { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_decorate + TMap > decorates; + // spirv_decorate_id + TMap> decorateIds; + // spirv_decorate_string + TMap > decorateStrings; +}; + +// SPIR-V instruction +struct TSpirvInstruction { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvInstruction() { set = ""; id = -1; } + + bool operator==(const TSpirvInstruction& rhs) const { return set == rhs.set && id == rhs.id; } + bool operator!=(const TSpirvInstruction& rhs) const { return !operator==(rhs); } + + // spirv_instruction + TString set; + int id; +}; + +// SPIR-V type parameter +struct TSpirvTypeParameter { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvTypeParameter(const TIntermConstantUnion* arg) { constant = arg; } + + bool operator==(const TSpirvTypeParameter& rhs) const { return constant == rhs.constant; } + bool operator!=(const TSpirvTypeParameter& rhs) const { return !operator==(rhs); } + + const TIntermConstantUnion* constant; +}; + +typedef TVector TSpirvTypeParameters; + +// SPIR-V type +struct TSpirvType { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + bool operator==(const TSpirvType& rhs) const + { + return spirvInst == rhs.spirvInst && typeParams == rhs.typeParams; + } + bool operator!=(const TSpirvType& rhs) const { return !operator==(rhs); } + + // spirv_type + TSpirvInstruction spirvInst; + TSpirvTypeParameters typeParams; +}; + +} // end namespace glslang + +#endif // GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/Types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/Types.h new file mode 100644 index 0000000..a6f47e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/Types.h @@ -0,0 +1,2901 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2015-2016 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _TYPES_INCLUDED +#define _TYPES_INCLUDED + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" +#include "../Public/ShaderLang.h" +#include "arrays.h" +#include "SpirvIntrinsics.h" + +#include + +namespace glslang { + +class TIntermAggregate; + +const int GlslangMaxTypeLength = 200; // TODO: need to print block/struct one member per line, so this can stay bounded + +const char* const AnonymousPrefix = "anon@"; // for something like a block whose members can be directly accessed +inline bool IsAnonymous(const TString& name) +{ + return name.compare(0, 5, AnonymousPrefix) == 0; +} + +// +// Details within a sampler type +// +enum TSamplerDim { + EsdNone, + Esd1D, + Esd2D, + Esd3D, + EsdCube, + EsdRect, + EsdBuffer, + EsdSubpass, // goes only with non-sampled image (image is true) + EsdNumDims +}; + +struct TSampler { // misnomer now; includes images, textures without sampler, and textures with sampler + TBasicType type : 8; // type returned by sampler + TSamplerDim dim : 8; + bool arrayed : 1; + bool shadow : 1; + bool ms : 1; + bool image : 1; // image, combined should be false + bool combined : 1; // true means texture is combined with a sampler, false means texture with no sampler + bool sampler : 1; // true means a pure sampler, other fields should be clear() + +#ifdef GLSLANG_WEB + bool is1D() const { return false; } + bool isBuffer() const { return false; } + bool isRect() const { return false; } + bool isSubpass() const { return false; } + bool isCombined() const { return true; } + bool isImage() const { return false; } + bool isImageClass() const { return false; } + bool isMultiSample() const { return false; } + bool isExternal() const { return false; } + void setExternal(bool e) { } + bool isYuv() const { return false; } +#else + unsigned int vectorSize : 3; // vector return type size. + // Some languages support structures as sample results. Storing the whole structure in the + // TSampler is too large, so there is an index to a separate table. + static const unsigned structReturnIndexBits = 4; // number of index bits to use. + static const unsigned structReturnSlots = (1< TTypeList; + +typedef TVector TIdentifierList; + +// +// Following are a series of helper enums for managing layouts and qualifiers, +// used for TPublicType, TType, others. +// + +enum TLayoutPacking { + ElpNone, + ElpShared, // default, but different than saying nothing + ElpStd140, + ElpStd430, + ElpPacked, + ElpScalar, + ElpCount // If expanding, see bitfield width below +}; + +enum TLayoutMatrix { + ElmNone, + ElmRowMajor, + ElmColumnMajor, // default, but different than saying nothing + ElmCount // If expanding, see bitfield width below +}; + +// Union of geometry shader and tessellation shader geometry types. +// They don't go into TType, but rather have current state per shader or +// active parser type (TPublicType). +enum TLayoutGeometry { + ElgNone, + ElgPoints, + ElgLines, + ElgLinesAdjacency, + ElgLineStrip, + ElgTriangles, + ElgTrianglesAdjacency, + ElgTriangleStrip, + ElgQuads, + ElgIsolines, +}; + +enum TVertexSpacing { + EvsNone, + EvsEqual, + EvsFractionalEven, + EvsFractionalOdd +}; + +enum TVertexOrder { + EvoNone, + EvoCw, + EvoCcw +}; + +// Note: order matters, as type of format is done by comparison. +enum TLayoutFormat { + ElfNone, + + // Float image + ElfRgba32f, + ElfRgba16f, + ElfR32f, + ElfRgba8, + ElfRgba8Snorm, + + ElfEsFloatGuard, // to help with comparisons + + ElfRg32f, + ElfRg16f, + ElfR11fG11fB10f, + ElfR16f, + ElfRgba16, + ElfRgb10A2, + ElfRg16, + ElfRg8, + ElfR16, + ElfR8, + ElfRgba16Snorm, + ElfRg16Snorm, + ElfRg8Snorm, + ElfR16Snorm, + ElfR8Snorm, + + ElfFloatGuard, // to help with comparisons + + // Int image + ElfRgba32i, + ElfRgba16i, + ElfRgba8i, + ElfR32i, + + ElfEsIntGuard, // to help with comparisons + + ElfRg32i, + ElfRg16i, + ElfRg8i, + ElfR16i, + ElfR8i, + ElfR64i, + + ElfIntGuard, // to help with comparisons + + // Uint image + ElfRgba32ui, + ElfRgba16ui, + ElfRgba8ui, + ElfR32ui, + + ElfEsUintGuard, // to help with comparisons + + ElfRg32ui, + ElfRg16ui, + ElfRgb10a2ui, + ElfRg8ui, + ElfR16ui, + ElfR8ui, + ElfR64ui, + + ElfCount +}; + +enum TLayoutDepth { + EldNone, + EldAny, + EldGreater, + EldLess, + EldUnchanged, + + EldCount +}; + +enum TLayoutStencil { + ElsNone, + ElsRefUnchangedFrontAMD, + ElsRefGreaterFrontAMD, + ElsRefLessFrontAMD, + ElsRefUnchangedBackAMD, + ElsRefGreaterBackAMD, + ElsRefLessBackAMD, + + ElsCount +}; + +enum TBlendEquationShift { + // No 'EBlendNone': + // These are used as bit-shift amounts. A mask of such shifts will have type 'int', + // and in that space, 0 means no bits set, or none. In this enum, 0 means (1 << 0), a bit is set. + EBlendMultiply, + EBlendScreen, + EBlendOverlay, + EBlendDarken, + EBlendLighten, + EBlendColordodge, + EBlendColorburn, + EBlendHardlight, + EBlendSoftlight, + EBlendDifference, + EBlendExclusion, + EBlendHslHue, + EBlendHslSaturation, + EBlendHslColor, + EBlendHslLuminosity, + EBlendAllEquations, + + EBlendCount +}; + +enum TInterlockOrdering { + EioNone, + EioPixelInterlockOrdered, + EioPixelInterlockUnordered, + EioSampleInterlockOrdered, + EioSampleInterlockUnordered, + EioShadingRateInterlockOrdered, + EioShadingRateInterlockUnordered, + + EioCount, +}; + +enum TShaderInterface +{ + // Includes both uniform blocks and buffer blocks + EsiUniform = 0, + EsiInput, + EsiOutput, + EsiNone, + + EsiCount +}; + +class TQualifier { +public: + static const int layoutNotSet = -1; + + void clear() + { + precision = EpqNone; + invariant = false; + makeTemporary(); + declaredBuiltIn = EbvNone; +#ifndef GLSLANG_WEB + noContraction = false; + nullInit = false; + spirvByReference = false; + spirvLiteral = false; +#endif + defaultBlock = false; + } + + // drop qualifiers that don't belong in a temporary variable + void makeTemporary() + { + semanticName = nullptr; + storage = EvqTemporary; + builtIn = EbvNone; + clearInterstage(); + clearMemory(); + specConstant = false; + nonUniform = false; + nullInit = false; + defaultBlock = false; + clearLayout(); +#ifndef GLSLANG_WEB + spirvStorageClass = -1; + spirvDecorate = nullptr; + spirvByReference = false; + spirvLiteral = false; +#endif + } + + void clearInterstage() + { + clearInterpolation(); +#ifndef GLSLANG_WEB + patch = false; + sample = false; +#endif + } + + void clearInterpolation() + { + centroid = false; + smooth = false; + flat = false; +#ifndef GLSLANG_WEB + nopersp = false; + explicitInterp = false; + pervertexNV = false; + perPrimitiveNV = false; + perViewNV = false; + perTaskNV = false; +#endif + pervertexEXT = false; + } + + void clearMemory() + { +#ifndef GLSLANG_WEB + coherent = false; + devicecoherent = false; + queuefamilycoherent = false; + workgroupcoherent = false; + subgroupcoherent = false; + shadercallcoherent = false; + nonprivate = false; + volatil = false; + restrict = false; + readonly = false; + writeonly = false; +#endif + } + + const char* semanticName; + TStorageQualifier storage : 6; + TBuiltInVariable builtIn : 9; + TBuiltInVariable declaredBuiltIn : 9; + static_assert(EbvLast < 256, "need to increase size of TBuiltInVariable bitfields!"); + TPrecisionQualifier precision : 3; + bool invariant : 1; // require canonical treatment for cross-shader invariance + bool centroid : 1; + bool smooth : 1; + bool flat : 1; + // having a constant_id is not sufficient: expressions have no id, but are still specConstant + bool specConstant : 1; + bool nonUniform : 1; + bool explicitOffset : 1; + bool defaultBlock : 1; // default blocks with matching names have structures merged when linking + +#ifdef GLSLANG_WEB + bool isWriteOnly() const { return false; } + bool isReadOnly() const { return false; } + bool isRestrict() const { return false; } + bool isCoherent() const { return false; } + bool isVolatile() const { return false; } + bool isSample() const { return false; } + bool isMemory() const { return false; } + bool isMemoryQualifierImageAndSSBOOnly() const { return false; } + bool bufferReferenceNeedsVulkanMemoryModel() const { return false; } + bool isInterpolation() const { return flat || smooth; } + bool isExplicitInterpolation() const { return false; } + bool isAuxiliary() const { return centroid; } + bool isPatch() const { return false; } + bool isNoContraction() const { return false; } + void setNoContraction() { } + bool isPervertexNV() const { return false; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() {} + bool isNullInit() const { return false; } + void setSpirvByReference() { } + bool isSpirvByReference() { return false; } + void setSpirvLiteral() { } + bool isSpirvLiteral() { return false; } +#else + bool noContraction: 1; // prevent contraction and reassociation, e.g., for 'precise' keyword, and expressions it affects + bool nopersp : 1; + bool explicitInterp : 1; + bool pervertexNV : 1; + bool pervertexEXT : 1; + bool perPrimitiveNV : 1; + bool perViewNV : 1; + bool perTaskNV : 1; + bool patch : 1; + bool sample : 1; + bool restrict : 1; + bool readonly : 1; + bool writeonly : 1; + bool coherent : 1; + bool volatil : 1; + bool devicecoherent : 1; + bool queuefamilycoherent : 1; + bool workgroupcoherent : 1; + bool subgroupcoherent : 1; + bool shadercallcoherent : 1; + bool nonprivate : 1; + bool nullInit : 1; + bool spirvByReference : 1; + bool spirvLiteral : 1; + bool isWriteOnly() const { return writeonly; } + bool isReadOnly() const { return readonly; } + bool isRestrict() const { return restrict; } + bool isCoherent() const { return coherent; } + bool isVolatile() const { return volatil; } + bool isSample() const { return sample; } + bool isMemory() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly || nonprivate; + } + bool isMemoryQualifierImageAndSSBOOnly() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly; + } + bool bufferReferenceNeedsVulkanMemoryModel() const + { + // include qualifiers that map to load/store availability/visibility/nonprivate memory access operands + return subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || nonprivate; + } + bool isInterpolation() const + { + return flat || smooth || nopersp || explicitInterp; + } + bool isExplicitInterpolation() const + { + return explicitInterp; + } + bool isAuxiliary() const + { + return centroid || patch || sample || pervertexNV || pervertexEXT; + } + bool isPatch() const { return patch; } + bool isNoContraction() const { return noContraction; } + void setNoContraction() { noContraction = true; } + bool isPervertexNV() const { return pervertexNV; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() { nullInit = true; } + bool isNullInit() const { return nullInit; } + void setSpirvByReference() { spirvByReference = true; } + bool isSpirvByReference() const { return spirvByReference; } + void setSpirvLiteral() { spirvLiteral = true; } + bool isSpirvLiteral() const { return spirvLiteral; } +#endif + + bool isPipeInput() const + { + switch (storage) { + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + return true; + default: + return false; + } + } + + bool isPipeOutput() const + { + switch (storage) { + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + bool isParamInput() const + { + switch (storage) { + case EvqIn: + case EvqInOut: + case EvqConstReadOnly: + return true; + default: + return false; + } + } + + bool isParamOutput() const + { + switch (storage) { + case EvqOut: + case EvqInOut: + return true; + default: + return false; + } + } + + bool isUniformOrBuffer() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + return true; + default: + return false; + } + } + + bool isUniform() const + { + switch (storage) { + case EvqUniform: + return true; + default: + return false; + } + } + + bool isIo() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + // non-built-in symbols that might link between compilation units + bool isLinkable() const + { + switch (storage) { + case EvqGlobal: + case EvqVaryingIn: + case EvqVaryingOut: + case EvqUniform: + case EvqBuffer: + case EvqShared: + return true; + default: + return false; + } + } + + TBlockStorageClass getBlockStorage() const { + if (storage == EvqUniform && !isPushConstant()) { + return EbsUniform; + } + else if (storage == EvqUniform) { + return EbsPushConstant; + } + else if (storage == EvqBuffer) { + return EbsStorageBuffer; + } + return EbsNone; + } + + void setBlockStorage(TBlockStorageClass newBacking) { +#ifndef GLSLANG_WEB + layoutPushConstant = (newBacking == EbsPushConstant); +#endif + switch (newBacking) { + case EbsUniform : + if (layoutPacking == ElpStd430) { + // std430 would not be valid + layoutPacking = ElpStd140; + } + storage = EvqUniform; + break; + case EbsStorageBuffer : + storage = EvqBuffer; + break; +#ifndef GLSLANG_WEB + case EbsPushConstant : + storage = EvqUniform; + layoutSet = TQualifier::layoutSetEnd; + layoutBinding = TQualifier::layoutBindingEnd; + break; +#endif + default: + break; + } + } + +#ifdef GLSLANG_WEB + bool isPerView() const { return false; } + bool isTaskMemory() const { return false; } + bool isArrayedIo(EShLanguage language) const { return false; } +#else + bool isPerPrimitive() const { return perPrimitiveNV; } + bool isPerView() const { return perViewNV; } + bool isTaskMemory() const { return perTaskNV; } + bool isTaskPayload() const { return storage == EvqtaskPayloadSharedEXT; } + bool isAnyPayload() const { + return storage == EvqPayload || storage == EvqPayloadIn; + } + bool isAnyCallable() const { + return storage == EvqCallableData || storage == EvqCallableDataIn; + } + + // True if this type of IO is supposed to be arrayed with extra level for per-vertex data + bool isArrayedIo(EShLanguage language) const + { + switch (language) { + case EShLangGeometry: + return isPipeInput(); + case EShLangTessControl: + return ! patch && (isPipeInput() || isPipeOutput()); + case EShLangTessEvaluation: + return ! patch && isPipeInput(); + case EShLangFragment: + return (pervertexNV || pervertexEXT) && isPipeInput(); + case EShLangMesh: + return ! perTaskNV && isPipeOutput(); + + default: + return false; + } + } +#endif + + // Implementing an embedded layout-qualifier class here, since C++ can't have a real class bitfield + void clearLayout() // all layout + { + clearUniformLayout(); + +#ifndef GLSLANG_WEB + layoutPushConstant = false; + layoutBufferReference = false; + layoutPassthrough = false; + layoutViewportRelative = false; + // -2048 as the default value indicating layoutSecondaryViewportRelative is not set + layoutSecondaryViewportRelativeOffset = -2048; + layoutShaderRecord = false; + layoutBufferReferenceAlign = layoutBufferReferenceAlignEnd; + layoutFormat = ElfNone; +#endif + + clearInterstageLayout(); + + layoutSpecConstantId = layoutSpecConstantIdEnd; + } + void clearInterstageLayout() + { + layoutLocation = layoutLocationEnd; + layoutComponent = layoutComponentEnd; +#ifndef GLSLANG_WEB + layoutIndex = layoutIndexEnd; + clearStreamLayout(); + clearXfbLayout(); +#endif + } + +#ifndef GLSLANG_WEB + void clearStreamLayout() + { + layoutStream = layoutStreamEnd; + } + void clearXfbLayout() + { + layoutXfbBuffer = layoutXfbBufferEnd; + layoutXfbStride = layoutXfbStrideEnd; + layoutXfbOffset = layoutXfbOffsetEnd; + } +#endif + + bool hasNonXfbLayout() const + { + return hasUniformLayout() || + hasAnyLocation() || + hasStream() || + hasFormat() || + isShaderRecord() || + isPushConstant() || + hasBufferReference(); + } + bool hasLayout() const + { + return hasNonXfbLayout() || + hasXfb(); + } + + TLayoutMatrix layoutMatrix : 3; + TLayoutPacking layoutPacking : 4; + int layoutOffset; + int layoutAlign; + + unsigned int layoutLocation : 12; + static const unsigned int layoutLocationEnd = 0xFFF; + + unsigned int layoutComponent : 3; + static const unsigned int layoutComponentEnd = 4; + + unsigned int layoutSet : 7; + static const unsigned int layoutSetEnd = 0x3F; + + unsigned int layoutBinding : 16; + static const unsigned int layoutBindingEnd = 0xFFFF; + + unsigned int layoutIndex : 8; + static const unsigned int layoutIndexEnd = 0xFF; + + unsigned int layoutStream : 8; + static const unsigned int layoutStreamEnd = 0xFF; + + unsigned int layoutXfbBuffer : 4; + static const unsigned int layoutXfbBufferEnd = 0xF; + + unsigned int layoutXfbStride : 14; + static const unsigned int layoutXfbStrideEnd = 0x3FFF; + + unsigned int layoutXfbOffset : 13; + static const unsigned int layoutXfbOffsetEnd = 0x1FFF; + + unsigned int layoutAttachment : 8; // for input_attachment_index + static const unsigned int layoutAttachmentEnd = 0XFF; + + unsigned int layoutSpecConstantId : 11; + static const unsigned int layoutSpecConstantIdEnd = 0x7FF; + +#ifndef GLSLANG_WEB + // stored as log2 of the actual alignment value + unsigned int layoutBufferReferenceAlign : 6; + static const unsigned int layoutBufferReferenceAlignEnd = 0x3F; + + TLayoutFormat layoutFormat : 8; + + bool layoutPushConstant; + bool layoutBufferReference; + bool layoutPassthrough; + bool layoutViewportRelative; + int layoutSecondaryViewportRelativeOffset; + bool layoutShaderRecord; + + // GL_EXT_spirv_intrinsics + int spirvStorageClass; + TSpirvDecorate* spirvDecorate; +#endif + + bool hasUniformLayout() const + { + return hasMatrix() || + hasPacking() || + hasOffset() || + hasBinding() || + hasSet() || + hasAlign(); + } + void clearUniformLayout() // only uniform specific + { + layoutMatrix = ElmNone; + layoutPacking = ElpNone; + layoutOffset = layoutNotSet; + layoutAlign = layoutNotSet; + + layoutSet = layoutSetEnd; + layoutBinding = layoutBindingEnd; +#ifndef GLSLANG_WEB + layoutAttachment = layoutAttachmentEnd; +#endif + } + + bool hasMatrix() const + { + return layoutMatrix != ElmNone; + } + bool hasPacking() const + { + return layoutPacking != ElpNone; + } + bool hasAlign() const + { + return layoutAlign != layoutNotSet; + } + bool hasAnyLocation() const + { + return hasLocation() || + hasComponent() || + hasIndex(); + } + bool hasLocation() const + { + return layoutLocation != layoutLocationEnd; + } + bool hasSet() const + { + return layoutSet != layoutSetEnd; + } + bool hasBinding() const + { + return layoutBinding != layoutBindingEnd; + } +#ifdef GLSLANG_WEB + bool hasOffset() const { return false; } + bool isNonPerspective() const { return false; } + bool hasIndex() const { return false; } + unsigned getIndex() const { return 0; } + bool hasComponent() const { return false; } + bool hasStream() const { return false; } + bool hasFormat() const { return false; } + bool hasXfb() const { return false; } + bool hasXfbBuffer() const { return false; } + bool hasXfbStride() const { return false; } + bool hasXfbOffset() const { return false; } + bool hasAttachment() const { return false; } + TLayoutFormat getFormat() const { return ElfNone; } + bool isPushConstant() const { return false; } + bool isShaderRecord() const { return false; } + bool hasBufferReference() const { return false; } + bool hasBufferReferenceAlign() const { return false; } + bool isNonUniform() const { return false; } +#else + bool hasOffset() const + { + return layoutOffset != layoutNotSet; + } + bool isNonPerspective() const { return nopersp; } + bool hasIndex() const + { + return layoutIndex != layoutIndexEnd; + } + unsigned getIndex() const { return layoutIndex; } + bool hasComponent() const + { + return layoutComponent != layoutComponentEnd; + } + bool hasStream() const + { + return layoutStream != layoutStreamEnd; + } + bool hasFormat() const + { + return layoutFormat != ElfNone; + } + bool hasXfb() const + { + return hasXfbBuffer() || + hasXfbStride() || + hasXfbOffset(); + } + bool hasXfbBuffer() const + { + return layoutXfbBuffer != layoutXfbBufferEnd; + } + bool hasXfbStride() const + { + return layoutXfbStride != layoutXfbStrideEnd; + } + bool hasXfbOffset() const + { + return layoutXfbOffset != layoutXfbOffsetEnd; + } + bool hasAttachment() const + { + return layoutAttachment != layoutAttachmentEnd; + } + TLayoutFormat getFormat() const { return layoutFormat; } + bool isPushConstant() const { return layoutPushConstant; } + bool isShaderRecord() const { return layoutShaderRecord; } + bool hasBufferReference() const { return layoutBufferReference; } + bool hasBufferReferenceAlign() const + { + return layoutBufferReferenceAlign != layoutBufferReferenceAlignEnd; + } + bool isNonUniform() const + { + return nonUniform; + } + + // GL_EXT_spirv_intrinsics + bool hasSprivDecorate() const { return spirvDecorate != nullptr; } + void setSpirvDecorate(int decoration, const TIntermAggregate* args = nullptr); + void setSpirvDecorateId(int decoration, const TIntermAggregate* args); + void setSpirvDecorateString(int decoration, const TIntermAggregate* args); + const TSpirvDecorate& getSpirvDecorate() const { assert(spirvDecorate); return *spirvDecorate; } + TSpirvDecorate& getSpirvDecorate() { assert(spirvDecorate); return *spirvDecorate; } + TString getSpirvDecorateQualifierString() const; +#endif + bool hasSpecConstantId() const + { + // Not the same thing as being a specialization constant, this + // is just whether or not it was declared with an ID. + return layoutSpecConstantId != layoutSpecConstantIdEnd; + } + bool isSpecConstant() const + { + // True if type is a specialization constant, whether or not it + // had a specialization-constant ID, and false if it is not a + // true front-end constant. + return specConstant; + } + bool isFrontEndConstant() const + { + // True if the front-end knows the final constant value. + // This allows front-end constant folding. + return storage == EvqConst && ! specConstant; + } + bool isConstant() const + { + // True if is either kind of constant; specialization or regular. + return isFrontEndConstant() || isSpecConstant(); + } + void makeSpecConstant() + { + storage = EvqConst; + specConstant = true; + } + static const char* getLayoutPackingString(TLayoutPacking packing) + { + switch (packing) { + case ElpStd140: return "std140"; +#ifndef GLSLANG_WEB + case ElpPacked: return "packed"; + case ElpShared: return "shared"; + case ElpStd430: return "std430"; + case ElpScalar: return "scalar"; +#endif + default: return "none"; + } + } + static const char* getLayoutMatrixString(TLayoutMatrix m) + { + switch (m) { + case ElmColumnMajor: return "column_major"; + case ElmRowMajor: return "row_major"; + default: return "none"; + } + } +#ifdef GLSLANG_WEB + static const char* getLayoutFormatString(TLayoutFormat f) { return "none"; } +#else + static const char* getLayoutFormatString(TLayoutFormat f) + { + switch (f) { + case ElfRgba32f: return "rgba32f"; + case ElfRgba16f: return "rgba16f"; + case ElfRg32f: return "rg32f"; + case ElfRg16f: return "rg16f"; + case ElfR11fG11fB10f: return "r11f_g11f_b10f"; + case ElfR32f: return "r32f"; + case ElfR16f: return "r16f"; + case ElfRgba16: return "rgba16"; + case ElfRgb10A2: return "rgb10_a2"; + case ElfRgba8: return "rgba8"; + case ElfRg16: return "rg16"; + case ElfRg8: return "rg8"; + case ElfR16: return "r16"; + case ElfR8: return "r8"; + case ElfRgba16Snorm: return "rgba16_snorm"; + case ElfRgba8Snorm: return "rgba8_snorm"; + case ElfRg16Snorm: return "rg16_snorm"; + case ElfRg8Snorm: return "rg8_snorm"; + case ElfR16Snorm: return "r16_snorm"; + case ElfR8Snorm: return "r8_snorm"; + + case ElfRgba32i: return "rgba32i"; + case ElfRgba16i: return "rgba16i"; + case ElfRgba8i: return "rgba8i"; + case ElfRg32i: return "rg32i"; + case ElfRg16i: return "rg16i"; + case ElfRg8i: return "rg8i"; + case ElfR32i: return "r32i"; + case ElfR16i: return "r16i"; + case ElfR8i: return "r8i"; + + case ElfRgba32ui: return "rgba32ui"; + case ElfRgba16ui: return "rgba16ui"; + case ElfRgba8ui: return "rgba8ui"; + case ElfRg32ui: return "rg32ui"; + case ElfRg16ui: return "rg16ui"; + case ElfRgb10a2ui: return "rgb10_a2ui"; + case ElfRg8ui: return "rg8ui"; + case ElfR32ui: return "r32ui"; + case ElfR16ui: return "r16ui"; + case ElfR8ui: return "r8ui"; + case ElfR64ui: return "r64ui"; + case ElfR64i: return "r64i"; + default: return "none"; + } + } + static const char* getLayoutDepthString(TLayoutDepth d) + { + switch (d) { + case EldAny: return "depth_any"; + case EldGreater: return "depth_greater"; + case EldLess: return "depth_less"; + case EldUnchanged: return "depth_unchanged"; + default: return "none"; + } + } + static const char* getLayoutStencilString(TLayoutStencil s) + { + switch (s) { + case ElsRefUnchangedFrontAMD: return "stencil_ref_unchanged_front_amd"; + case ElsRefGreaterFrontAMD: return "stencil_ref_greater_front_amd"; + case ElsRefLessFrontAMD: return "stencil_ref_less_front_amd"; + case ElsRefUnchangedBackAMD: return "stencil_ref_unchanged_back_amd"; + case ElsRefGreaterBackAMD: return "stencil_ref_greater_back_amd"; + case ElsRefLessBackAMD: return "stencil_ref_less_back_amd"; + default: return "none"; + } + } + static const char* getBlendEquationString(TBlendEquationShift e) + { + switch (e) { + case EBlendMultiply: return "blend_support_multiply"; + case EBlendScreen: return "blend_support_screen"; + case EBlendOverlay: return "blend_support_overlay"; + case EBlendDarken: return "blend_support_darken"; + case EBlendLighten: return "blend_support_lighten"; + case EBlendColordodge: return "blend_support_colordodge"; + case EBlendColorburn: return "blend_support_colorburn"; + case EBlendHardlight: return "blend_support_hardlight"; + case EBlendSoftlight: return "blend_support_softlight"; + case EBlendDifference: return "blend_support_difference"; + case EBlendExclusion: return "blend_support_exclusion"; + case EBlendHslHue: return "blend_support_hsl_hue"; + case EBlendHslSaturation: return "blend_support_hsl_saturation"; + case EBlendHslColor: return "blend_support_hsl_color"; + case EBlendHslLuminosity: return "blend_support_hsl_luminosity"; + case EBlendAllEquations: return "blend_support_all_equations"; + default: return "unknown"; + } + } + static const char* getGeometryString(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return "points"; + case ElgLines: return "lines"; + case ElgLinesAdjacency: return "lines_adjacency"; + case ElgLineStrip: return "line_strip"; + case ElgTriangles: return "triangles"; + case ElgTrianglesAdjacency: return "triangles_adjacency"; + case ElgTriangleStrip: return "triangle_strip"; + case ElgQuads: return "quads"; + case ElgIsolines: return "isolines"; + default: return "none"; + } + } + static const char* getVertexSpacingString(TVertexSpacing spacing) + { + switch (spacing) { + case EvsEqual: return "equal_spacing"; + case EvsFractionalEven: return "fractional_even_spacing"; + case EvsFractionalOdd: return "fractional_odd_spacing"; + default: return "none"; + } + } + static const char* getVertexOrderString(TVertexOrder order) + { + switch (order) { + case EvoCw: return "cw"; + case EvoCcw: return "ccw"; + default: return "none"; + } + } + static int mapGeometryToSize(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return 1; + case ElgLines: return 2; + case ElgLinesAdjacency: return 4; + case ElgTriangles: return 3; + case ElgTrianglesAdjacency: return 6; + default: return 0; + } + } + static const char* getInterlockOrderingString(TInterlockOrdering order) + { + switch (order) { + case EioPixelInterlockOrdered: return "pixel_interlock_ordered"; + case EioPixelInterlockUnordered: return "pixel_interlock_unordered"; + case EioSampleInterlockOrdered: return "sample_interlock_ordered"; + case EioSampleInterlockUnordered: return "sample_interlock_unordered"; + case EioShadingRateInterlockOrdered: return "shading_rate_interlock_ordered"; + case EioShadingRateInterlockUnordered: return "shading_rate_interlock_unordered"; + default: return "none"; + } + } +#endif +}; + +// Qualifiers that don't need to be keep per object. They have shader scope, not object scope. +// So, they will not be part of TType, TQualifier, etc. +struct TShaderQualifiers { + TLayoutGeometry geometry; // geometry/tessellation shader in/out primitives + bool pixelCenterInteger; // fragment shader + bool originUpperLeft; // fragment shader + int invocations; + int vertices; // for tessellation "vertices", geometry & mesh "max_vertices" + TVertexSpacing spacing; + TVertexOrder order; + bool pointMode; + int localSize[3]; // compute shader + bool localSizeNotDefault[3]; // compute shader + int localSizeSpecId[3]; // compute shader specialization id for gl_WorkGroupSize +#ifndef GLSLANG_WEB + bool earlyFragmentTests; // fragment input + bool postDepthCoverage; // fragment input + bool earlyAndLateFragmentTestsAMD; //fragment input + TLayoutDepth layoutDepth; + TLayoutStencil layoutStencil; + bool blendEquation; // true if any blend equation was specified + int numViews; // multiview extenstions + TInterlockOrdering interlockOrdering; + bool layoutOverrideCoverage; // true if layout override_coverage set + bool layoutDerivativeGroupQuads; // true if layout derivative_group_quadsNV set + bool layoutDerivativeGroupLinear; // true if layout derivative_group_linearNV set + int primitives; // mesh shader "max_primitives"DerivativeGroupLinear; // true if layout derivative_group_linearNV set + bool layoutPrimitiveCulling; // true if layout primitive_culling set + TLayoutDepth getDepth() const { return layoutDepth; } + TLayoutStencil getStencil() const { return layoutStencil; } +#else + TLayoutDepth getDepth() const { return EldNone; } +#endif + + void init() + { + geometry = ElgNone; + originUpperLeft = false; + pixelCenterInteger = false; + invocations = TQualifier::layoutNotSet; + vertices = TQualifier::layoutNotSet; + spacing = EvsNone; + order = EvoNone; + pointMode = false; + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + earlyFragmentTests = false; + earlyAndLateFragmentTestsAMD = false; + postDepthCoverage = false; + layoutDepth = EldNone; + layoutStencil = ElsNone; + blendEquation = false; + numViews = TQualifier::layoutNotSet; + layoutOverrideCoverage = false; + layoutDerivativeGroupQuads = false; + layoutDerivativeGroupLinear = false; + layoutPrimitiveCulling = false; + primitives = TQualifier::layoutNotSet; + interlockOrdering = EioNone; +#endif + } + +#ifdef GLSLANG_WEB + bool hasBlendEquation() const { return false; } +#else + bool hasBlendEquation() const { return blendEquation; } +#endif + + // Merge in characteristics from the 'src' qualifier. They can override when + // set, but never erase when not set. + void merge(const TShaderQualifiers& src) + { + if (src.geometry != ElgNone) + geometry = src.geometry; + if (src.pixelCenterInteger) + pixelCenterInteger = src.pixelCenterInteger; + if (src.originUpperLeft) + originUpperLeft = src.originUpperLeft; + if (src.invocations != TQualifier::layoutNotSet) + invocations = src.invocations; + if (src.vertices != TQualifier::layoutNotSet) + vertices = src.vertices; + if (src.spacing != EvsNone) + spacing = src.spacing; + if (src.order != EvoNone) + order = src.order; + if (src.pointMode) + pointMode = true; + for (int i = 0; i < 3; ++i) { + if (src.localSize[i] > 1) + localSize[i] = src.localSize[i]; + } + for (int i = 0; i < 3; ++i) { + localSizeNotDefault[i] = src.localSizeNotDefault[i] || localSizeNotDefault[i]; + } + for (int i = 0; i < 3; ++i) { + if (src.localSizeSpecId[i] != TQualifier::layoutNotSet) + localSizeSpecId[i] = src.localSizeSpecId[i]; + } +#ifndef GLSLANG_WEB + if (src.earlyFragmentTests) + earlyFragmentTests = true; + if (src.earlyAndLateFragmentTestsAMD) + earlyAndLateFragmentTestsAMD = true; + if (src.postDepthCoverage) + postDepthCoverage = true; + if (src.layoutDepth) + layoutDepth = src.layoutDepth; + if (src.layoutStencil) + layoutStencil = src.layoutStencil; + if (src.blendEquation) + blendEquation = src.blendEquation; + if (src.numViews != TQualifier::layoutNotSet) + numViews = src.numViews; + if (src.layoutOverrideCoverage) + layoutOverrideCoverage = src.layoutOverrideCoverage; + if (src.layoutDerivativeGroupQuads) + layoutDerivativeGroupQuads = src.layoutDerivativeGroupQuads; + if (src.layoutDerivativeGroupLinear) + layoutDerivativeGroupLinear = src.layoutDerivativeGroupLinear; + if (src.primitives != TQualifier::layoutNotSet) + primitives = src.primitives; + if (src.interlockOrdering != EioNone) + interlockOrdering = src.interlockOrdering; + if (src.layoutPrimitiveCulling) + layoutPrimitiveCulling = src.layoutPrimitiveCulling; +#endif + } +}; + +// +// TPublicType is just temporarily used while parsing and not quite the same +// information kept per node in TType. Due to the bison stack, it can't have +// types that it thinks have non-trivial constructors. It should +// just be used while recognizing the grammar, not anything else. +// Once enough is known about the situation, the proper information +// moved into a TType, or the parse context, etc. +// +class TPublicType { +public: + TBasicType basicType; + TSampler sampler; + TQualifier qualifier; + TShaderQualifiers shaderQualifiers; + int vectorSize : 4; + int matrixCols : 4; + int matrixRows : 4; + bool coopmat : 1; + TArraySizes* arraySizes; + const TType* userDef; + TSourceLoc loc; + TArraySizes* typeParameters; +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type directive + TSpirvType* spirvType; +#endif + +#ifdef GLSLANG_WEB + bool isCoopmat() const { return false; } +#else + bool isCoopmat() const { return coopmat; } +#endif + + void initType(const TSourceLoc& l) + { + basicType = EbtVoid; + vectorSize = 1; + matrixRows = 0; + matrixCols = 0; + arraySizes = nullptr; + userDef = nullptr; + loc = l; + typeParameters = nullptr; + coopmat = false; +#ifndef GLSLANG_WEB + spirvType = nullptr; +#endif + } + + void initQualifiers(bool global = false) + { + qualifier.clear(); + if (global) + qualifier.storage = EvqGlobal; + } + + void init(const TSourceLoc& l, bool global = false) + { + initType(l); + sampler.clear(); + initQualifiers(global); + shaderQualifiers.init(); + } + + void setVector(int s) + { + matrixRows = 0; + matrixCols = 0; + vectorSize = s; + } + + void setMatrix(int c, int r) + { + matrixRows = r; + matrixCols = c; + vectorSize = 0; + } + + bool isScalar() const + { + return matrixCols == 0 && vectorSize == 1 && arraySizes == nullptr && userDef == nullptr; + } + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + void setSpirvType(const TSpirvInstruction& spirvInst, const TSpirvTypeParameters* typeParams = nullptr); +#endif + + // "Image" is a superset of "Subpass" + bool isImage() const { return basicType == EbtSampler && sampler.isImage(); } + bool isSubpass() const { return basicType == EbtSampler && sampler.isSubpass(); } +}; + +// +// Base class for things that have a type. +// +class TType { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // for "empty" type (no args) or simple scalar/vector/matrix + explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for explicit precision qualifier + TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + qualifier.precision = p; + assert(p >= EpqNone && p <= EpqHigh); + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for turning a TPublicType into a TType, using a shallow copy + explicit TType(const TPublicType& p) : + basicType(p.basicType), + vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), coopmat(p.coopmat), + arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters) +#ifndef GLSLANG_WEB + , spirvType(p.spirvType) +#endif + { + if (basicType == EbtSampler) + sampler = p.sampler; + else + sampler.clear(); + qualifier = p.qualifier; + if (p.userDef) { + if (p.userDef->basicType == EbtReference) { + basicType = EbtReference; + referentType = p.userDef->referentType; + } else { + structure = p.userDef->getWritableStruct(); // public type is short-lived; there are no sharing issues + } + typeName = NewPoolTString(p.userDef->getTypeName().c_str()); + } + if (p.isCoopmat() && p.typeParameters && p.typeParameters->getNumDims() > 0) { + int numBits = p.typeParameters->getDimSize(0); + if (p.basicType == EbtFloat && numBits == 16) { + basicType = EbtFloat16; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtUint && numBits == 8) { + basicType = EbtUint8; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtInt && numBits == 8) { + basicType = EbtInt8; + qualifier.precision = EpqNone; + } + } + } + // for construction of sampler types + TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) : + basicType(EbtSampler), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), + sampler(sampler), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + qualifier.clear(); + qualifier.storage = q; + } + // to efficiently make a dereferenced type + // without ever duplicating the outer structure that will be thrown away + // and using only shallow copy + TType(const TType& type, int derefIndex, bool rowMajor = false) + { + if (type.isArray()) { + shallowCopy(type); + if (type.getArraySizes()->getNumDims() == 1) { + arraySizes = nullptr; + } else { + // want our own copy of the array, so we can edit it + arraySizes = new TArraySizes; + arraySizes->copyDereferenced(*type.arraySizes); + } + } else if (type.basicType == EbtStruct || type.basicType == EbtBlock) { + // do a structure dereference + const TTypeList& memberList = *type.getStruct(); + shallowCopy(*memberList[derefIndex].type); + return; + } else { + // do a vector/matrix dereference + shallowCopy(type); + if (matrixCols > 0) { + // dereference from matrix to vector + if (rowMajor) + vectorSize = matrixCols; + else + vectorSize = matrixRows; + matrixCols = 0; + matrixRows = 0; + if (vectorSize == 1) + vector1 = true; + } else if (isVector()) { + // dereference from vector to scalar + vectorSize = 1; + vector1 = false; + } else if (isCoopMat()) { + coopmat = false; + typeParameters = nullptr; + } + } + } + // for making structures, ... + TType(TTypeList* userDef, const TString& n) : + basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + typeName = NewPoolTString(n.c_str()); + } + // For interface blocks + TType(TTypeList* userDef, const TString& n, const TQualifier& q) : + basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + typeName = NewPoolTString(n.c_str()); + } + // for block reference (first parameter must be EbtReference) + explicit TType(TBasicType t, const TType &p, const TString& n) : + basicType(t), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + assert(t == EbtReference); + typeName = NewPoolTString(n.c_str()); + qualifier.clear(); + qualifier.storage = p.qualifier.storage; + referentType = p.clone(); + } + virtual ~TType() {} + + // Not for use across pool pops; it will cause multiple instances of TType to point to the same information. + // This only works if that information (like a structure's list of types) does not change and + // the instances are sharing the same pool. + void shallowCopy(const TType& copyOf) + { + basicType = copyOf.basicType; + sampler = copyOf.sampler; + qualifier = copyOf.qualifier; + vectorSize = copyOf.vectorSize; + matrixCols = copyOf.matrixCols; + matrixRows = copyOf.matrixRows; + vector1 = copyOf.vector1; + arraySizes = copyOf.arraySizes; // copying the pointer only, not the contents + fieldName = copyOf.fieldName; + typeName = copyOf.typeName; + if (isStruct()) { + structure = copyOf.structure; + } else { + referentType = copyOf.referentType; + } + typeParameters = copyOf.typeParameters; +#ifndef GLSLANG_WEB + spirvType = copyOf.spirvType; +#endif + coopmat = copyOf.isCoopMat(); + } + + // Make complete copy of the whole type graph rooted at 'copyOf'. + void deepCopy(const TType& copyOf) + { + TMap copied; // to enable copying a type graph as a graph, not a tree + deepCopy(copyOf, copied); + } + + // Recursively make temporary + void makeTemporary() + { + getQualifier().makeTemporary(); + + if (isStruct()) + for (unsigned int i = 0; i < structure->size(); ++i) + (*structure)[i].type->makeTemporary(); + } + + TType* clone() const + { + TType *newType = new TType(); + newType->deepCopy(*this); + + return newType; + } + + void makeVector() { vector1 = true; } + + virtual void hideMember() { basicType = EbtVoid; vectorSize = 1; } + virtual bool hiddenMember() const { return basicType == EbtVoid; } + + virtual void setFieldName(const TString& n) { fieldName = NewPoolTString(n.c_str()); } + virtual const TString& getTypeName() const + { + assert(typeName); + return *typeName; + } + + virtual const TString& getFieldName() const + { + assert(fieldName); + return *fieldName; + } + TShaderInterface getShaderInterface() const + { + if (basicType != EbtBlock) + return EsiNone; + + switch (qualifier.storage) { + default: + return EsiNone; + case EvqVaryingIn: + return EsiInput; + case EvqVaryingOut: + return EsiOutput; + case EvqUniform: + case EvqBuffer: + return EsiUniform; + } + } + + virtual TBasicType getBasicType() const { return basicType; } + virtual const TSampler& getSampler() const { return sampler; } + virtual TSampler& getSampler() { return sampler; } + + virtual TQualifier& getQualifier() { return qualifier; } + virtual const TQualifier& getQualifier() const { return qualifier; } + + virtual int getVectorSize() const { return vectorSize; } // returns 1 for either scalar or vector of size 1, valid for both + virtual int getMatrixCols() const { return matrixCols; } + virtual int getMatrixRows() const { return matrixRows; } + virtual int getOuterArraySize() const { return arraySizes->getOuterSize(); } + virtual TIntermTyped* getOuterArrayNode() const { return arraySizes->getOuterNode(); } + virtual int getCumulativeArraySize() const { return arraySizes->getCumulativeSize(); } +#ifdef GLSLANG_WEB + bool isArrayOfArrays() const { return false; } +#else + bool isArrayOfArrays() const { return arraySizes != nullptr && arraySizes->getNumDims() > 1; } +#endif + virtual int getImplicitArraySize() const { return arraySizes->getImplicitSize(); } + virtual const TArraySizes* getArraySizes() const { return arraySizes; } + virtual TArraySizes* getArraySizes() { return arraySizes; } + virtual TType* getReferentType() const { return referentType; } + virtual const TArraySizes* getTypeParameters() const { return typeParameters; } + virtual TArraySizes* getTypeParameters() { return typeParameters; } + + virtual bool isScalar() const { return ! isVector() && ! isMatrix() && ! isStruct() && ! isArray(); } + virtual bool isScalarOrVec1() const { return isScalar() || vector1; } + virtual bool isScalarOrVector() const { return !isMatrix() && !isStruct() && !isArray(); } + virtual bool isVector() const { return vectorSize > 1 || vector1; } + virtual bool isMatrix() const { return matrixCols ? true : false; } + virtual bool isArray() const { return arraySizes != nullptr; } + virtual bool isSizedArray() const { return isArray() && arraySizes->isSized(); } + virtual bool isUnsizedArray() const { return isArray() && !arraySizes->isSized(); } + virtual bool isArrayVariablyIndexed() const { assert(isArray()); return arraySizes->isVariablyIndexed(); } + virtual void setArrayVariablyIndexed() { assert(isArray()); arraySizes->setVariablyIndexed(); } + virtual void updateImplicitArraySize(int size) { assert(isArray()); arraySizes->updateImplicitSize(size); } + virtual bool isStruct() const { return basicType == EbtStruct || basicType == EbtBlock; } + virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16; } + virtual bool isIntegerDomain() const + { + switch (basicType) { + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtAtomicUint: + return true; + default: + break; + } + return false; + } + virtual bool isOpaque() const { return basicType == EbtSampler +#ifndef GLSLANG_WEB + || basicType == EbtAtomicUint || basicType == EbtAccStruct || basicType == EbtRayQuery +#endif + ; } + virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; } + + // "Image" is a superset of "Subpass" + virtual bool isImage() const { return basicType == EbtSampler && getSampler().isImage(); } + virtual bool isSubpass() const { return basicType == EbtSampler && getSampler().isSubpass(); } + virtual bool isTexture() const { return basicType == EbtSampler && getSampler().isTexture(); } + // Check the block-name convention of creating a block without populating it's members: + virtual bool isUnusableName() const { return isStruct() && structure == nullptr; } + virtual bool isParameterized() const { return typeParameters != nullptr; } +#ifdef GLSLANG_WEB + bool isAtomic() const { return false; } + bool isCoopMat() const { return false; } + bool isReference() const { return false; } + bool isSpirvType() const { return false; } +#else + bool isAtomic() const { return basicType == EbtAtomicUint; } + bool isCoopMat() const { return coopmat; } + bool isReference() const { return getBasicType() == EbtReference; } + bool isSpirvType() const { return getBasicType() == EbtSpirvType; } +#endif + + // return true if this type contains any subtype which satisfies the given predicate. + template + bool contains(P predicate) const + { + if (predicate(this)) + return true; + + const auto hasa = [predicate](const TTypeLoc& tl) { return tl.type->contains(predicate); }; + + return isStruct() && std::any_of(structure->begin(), structure->end(), hasa); + } + + // Recursively checks if the type contains the given basic type + virtual bool containsBasicType(TBasicType checkType) const + { + return contains([checkType](const TType* t) { return t->basicType == checkType; } ); + } + + // Recursively check the structure for any arrays, needed for some error checks + virtual bool containsArray() const + { + return contains([](const TType* t) { return t->isArray(); } ); + } + + // Check the structure for any structures, needed for some error checks + virtual bool containsStructure() const + { + return contains([this](const TType* t) { return t != this && t->isStruct(); } ); + } + + // Recursively check the structure for any unsized arrays, needed for triggering a copyUp(). + virtual bool containsUnsizedArray() const + { + return contains([](const TType* t) { return t->isUnsizedArray(); } ); + } + + virtual bool containsOpaque() const + { + return contains([](const TType* t) { return t->isOpaque(); } ); + } + + // Recursively checks if the type contains a built-in variable + virtual bool containsBuiltIn() const + { + return contains([](const TType* t) { return t->isBuiltIn(); } ); + } + + virtual bool containsNonOpaque() const + { + const auto nonOpaque = [](const TType* t) { + switch (t->basicType) { + case EbtVoid: + case EbtFloat: + case EbtDouble: + case EbtFloat16: + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtBool: + case EbtReference: + return true; + default: + return false; + } + }; + + return contains(nonOpaque); + } + + virtual bool containsSpecializationSize() const + { + return contains([](const TType* t) { return t->isArray() && t->arraySizes->isOuterSpecialization(); } ); + } + +#ifdef GLSLANG_WEB + bool containsDouble() const { return false; } + bool contains16BitFloat() const { return false; } + bool contains64BitInt() const { return false; } + bool contains16BitInt() const { return false; } + bool contains8BitInt() const { return false; } + bool containsCoopMat() const { return false; } + bool containsReference() const { return false; } +#else + bool containsDouble() const + { + return containsBasicType(EbtDouble); + } + bool contains16BitFloat() const + { + return containsBasicType(EbtFloat16); + } + bool contains64BitInt() const + { + return containsBasicType(EbtInt64) || containsBasicType(EbtUint64); + } + bool contains16BitInt() const + { + return containsBasicType(EbtInt16) || containsBasicType(EbtUint16); + } + bool contains8BitInt() const + { + return containsBasicType(EbtInt8) || containsBasicType(EbtUint8); + } + bool containsCoopMat() const + { + return contains([](const TType* t) { return t->coopmat; } ); + } + bool containsReference() const + { + return containsBasicType(EbtReference); + } +#endif + + // Array editing methods. Array descriptors can be shared across + // type instances. This allows all uses of the same array + // to be updated at once. E.g., all nodes can be explicitly sized + // by tracking and correcting one implicit size. Or, all nodes + // can get the explicit size on a redeclaration that gives size. + // + // N.B.: Don't share with the shared symbol tables (symbols are + // marked as isReadOnly(). Such symbols with arrays that will be + // edited need to copyUp() on first use, so that + // A) the edits don't effect the shared symbol table, and + // B) the edits are shared across all users. + void updateArraySizes(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(arraySizes != nullptr); + assert(type.arraySizes != nullptr); + *arraySizes = *type.arraySizes; + } + void copyArraySizes(const TArraySizes& s) + { + // For setting a fresh new set of array sizes, not yet worrying about sharing. + arraySizes = new TArraySizes; + *arraySizes = s; + } + void transferArraySizes(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + arraySizes = s; + } + void clearArraySizes() + { + arraySizes = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyArrayInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (arraySizes == nullptr) + copyArraySizes(*s); + else + arraySizes->addInnerSizes(*s); + } + } + void changeOuterArraySize(int s) { arraySizes->changeOuterSize(s); } + + // Recursively make the implicit array size the explicit array size. + // Expicit arrays are compile-time or link-time sized, never run-time sized. + // Sometimes, policy calls for an array to be run-time sized even if it was + // never variably indexed: Don't turn a 'skipNonvariablyIndexed' array into + // an explicit array. + void adoptImplicitArraySizes(bool skipNonvariablyIndexed) + { + if (isUnsizedArray() && !(skipNonvariablyIndexed || isArrayVariablyIndexed())) + changeOuterArraySize(getImplicitArraySize()); + // For multi-dim per-view arrays, set unsized inner dimension size to 1 + if (qualifier.isPerView() && arraySizes && arraySizes->isInnerUnsized()) + arraySizes->clearInnerUnsized(); + if (isStruct() && structure->size() > 0) { + int lastMember = (int)structure->size() - 1; + for (int i = 0; i < lastMember; ++i) + (*structure)[i].type->adoptImplicitArraySizes(false); + // implement the "last member of an SSBO" policy + (*structure)[lastMember].type->adoptImplicitArraySizes(getQualifier().storage == EvqBuffer); + } + } + + + void updateTypeParameters(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(typeParameters != nullptr); + assert(type.typeParameters != nullptr); + *typeParameters = *type.typeParameters; + } + void copyTypeParameters(const TArraySizes& s) + { + // For setting a fresh new set of type parameters, not yet worrying about sharing. + typeParameters = new TArraySizes; + *typeParameters = s; + } + void transferTypeParameters(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + typeParameters = s; + } + void clearTypeParameters() + { + typeParameters = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyTypeParametersInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (typeParameters == nullptr) + copyTypeParameters(*s); + else + typeParameters->addInnerSizes(*s); + } + } + + const char* getBasicString() const + { + return TType::getBasicString(basicType); + } + + static const char* getBasicString(TBasicType t) + { + switch (t) { + case EbtFloat: return "float"; + case EbtInt: return "int"; + case EbtUint: return "uint"; + case EbtSampler: return "sampler/image"; +#ifndef GLSLANG_WEB + case EbtVoid: return "void"; + case EbtDouble: return "double"; + case EbtFloat16: return "float16_t"; + case EbtInt8: return "int8_t"; + case EbtUint8: return "uint8_t"; + case EbtInt16: return "int16_t"; + case EbtUint16: return "uint16_t"; + case EbtInt64: return "int64_t"; + case EbtUint64: return "uint64_t"; + case EbtBool: return "bool"; + case EbtAtomicUint: return "atomic_uint"; + case EbtStruct: return "structure"; + case EbtBlock: return "block"; + case EbtAccStruct: return "accelerationStructureNV"; + case EbtRayQuery: return "rayQueryEXT"; + case EbtReference: return "reference"; + case EbtString: return "string"; + case EbtSpirvType: return "spirv_type"; +#endif + default: return "unknown type"; + } + } + +#ifdef GLSLANG_WEB + TString getCompleteString() const { return ""; } + const char* getStorageQualifierString() const { return ""; } + const char* getBuiltInVariableString() const { return ""; } + const char* getPrecisionQualifierString() const { return ""; } + TString getBasicTypeString() const { return ""; } +#else + TString getCompleteString(bool syntactic = false, bool getQualifiers = true, bool getPrecision = true, + bool getType = true, TString name = "", TString structName = "") const + { + TString typeString; + + const auto appendStr = [&](const char* s) { typeString.append(s); }; + const auto appendUint = [&](unsigned int u) { typeString.append(std::to_string(u).c_str()); }; + const auto appendInt = [&](int i) { typeString.append(std::to_string(i).c_str()); }; + + if (getQualifiers) { + if (qualifier.hasSprivDecorate()) + appendStr(qualifier.getSpirvDecorateQualifierString().c_str()); + + if (qualifier.hasLayout()) { + // To reduce noise, skip this if the only layout is an xfb_buffer + // with no triggering xfb_offset. + TQualifier noXfbBuffer = qualifier; + noXfbBuffer.layoutXfbBuffer = TQualifier::layoutXfbBufferEnd; + if (noXfbBuffer.hasLayout()) { + appendStr("layout("); + if (qualifier.hasAnyLocation()) { + appendStr(" location="); + appendUint(qualifier.layoutLocation); + if (qualifier.hasComponent()) { + appendStr(" component="); + appendUint(qualifier.layoutComponent); + } + if (qualifier.hasIndex()) { + appendStr(" index="); + appendUint(qualifier.layoutIndex); + } + } + if (qualifier.hasSet()) { + appendStr(" set="); + appendUint(qualifier.layoutSet); + } + if (qualifier.hasBinding()) { + appendStr(" binding="); + appendUint(qualifier.layoutBinding); + } + if (qualifier.hasStream()) { + appendStr(" stream="); + appendUint(qualifier.layoutStream); + } + if (qualifier.hasMatrix()) { + appendStr(" "); + appendStr(TQualifier::getLayoutMatrixString(qualifier.layoutMatrix)); + } + if (qualifier.hasPacking()) { + appendStr(" "); + appendStr(TQualifier::getLayoutPackingString(qualifier.layoutPacking)); + } + if (qualifier.hasOffset()) { + appendStr(" offset="); + appendInt(qualifier.layoutOffset); + } + if (qualifier.hasAlign()) { + appendStr(" align="); + appendInt(qualifier.layoutAlign); + } + if (qualifier.hasFormat()) { + appendStr(" "); + appendStr(TQualifier::getLayoutFormatString(qualifier.layoutFormat)); + } + if (qualifier.hasXfbBuffer() && qualifier.hasXfbOffset()) { + appendStr(" xfb_buffer="); + appendUint(qualifier.layoutXfbBuffer); + } + if (qualifier.hasXfbOffset()) { + appendStr(" xfb_offset="); + appendUint(qualifier.layoutXfbOffset); + } + if (qualifier.hasXfbStride()) { + appendStr(" xfb_stride="); + appendUint(qualifier.layoutXfbStride); + } + if (qualifier.hasAttachment()) { + appendStr(" input_attachment_index="); + appendUint(qualifier.layoutAttachment); + } + if (qualifier.hasSpecConstantId()) { + appendStr(" constant_id="); + appendUint(qualifier.layoutSpecConstantId); + } + if (qualifier.layoutPushConstant) + appendStr(" push_constant"); + if (qualifier.layoutBufferReference) + appendStr(" buffer_reference"); + if (qualifier.hasBufferReferenceAlign()) { + appendStr(" buffer_reference_align="); + appendUint(1u << qualifier.layoutBufferReferenceAlign); + } + + if (qualifier.layoutPassthrough) + appendStr(" passthrough"); + if (qualifier.layoutViewportRelative) + appendStr(" layoutViewportRelative"); + if (qualifier.layoutSecondaryViewportRelativeOffset != -2048) { + appendStr(" layoutSecondaryViewportRelativeOffset="); + appendInt(qualifier.layoutSecondaryViewportRelativeOffset); + } + if (qualifier.layoutShaderRecord) + appendStr(" shaderRecordNV"); + + appendStr(")"); + } + } + + if (qualifier.invariant) + appendStr(" invariant"); + if (qualifier.noContraction) + appendStr(" noContraction"); + if (qualifier.centroid) + appendStr(" centroid"); + if (qualifier.smooth) + appendStr(" smooth"); + if (qualifier.flat) + appendStr(" flat"); + if (qualifier.nopersp) + appendStr(" noperspective"); + if (qualifier.explicitInterp) + appendStr(" __explicitInterpAMD"); + if (qualifier.pervertexNV) + appendStr(" pervertexNV"); + if (qualifier.pervertexEXT) + appendStr(" pervertexEXT"); + if (qualifier.perPrimitiveNV) + appendStr(" perprimitiveNV"); + if (qualifier.perViewNV) + appendStr(" perviewNV"); + if (qualifier.perTaskNV) + appendStr(" taskNV"); + if (qualifier.patch) + appendStr(" patch"); + if (qualifier.sample) + appendStr(" sample"); + if (qualifier.coherent) + appendStr(" coherent"); + if (qualifier.devicecoherent) + appendStr(" devicecoherent"); + if (qualifier.queuefamilycoherent) + appendStr(" queuefamilycoherent"); + if (qualifier.workgroupcoherent) + appendStr(" workgroupcoherent"); + if (qualifier.subgroupcoherent) + appendStr(" subgroupcoherent"); + if (qualifier.shadercallcoherent) + appendStr(" shadercallcoherent"); + if (qualifier.nonprivate) + appendStr(" nonprivate"); + if (qualifier.volatil) + appendStr(" volatile"); + if (qualifier.restrict) + appendStr(" restrict"); + if (qualifier.readonly) + appendStr(" readonly"); + if (qualifier.writeonly) + appendStr(" writeonly"); + if (qualifier.specConstant) + appendStr(" specialization-constant"); + if (qualifier.nonUniform) + appendStr(" nonuniform"); + if (qualifier.isNullInit()) + appendStr(" null-init"); + if (qualifier.isSpirvByReference()) + appendStr(" spirv_by_reference"); + if (qualifier.isSpirvLiteral()) + appendStr(" spirv_literal"); + appendStr(" "); + appendStr(getStorageQualifierString()); + } + if (getType) { + if (syntactic) { + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isVector() || isMatrix()) { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("d"); + break; + case EbtInt: + appendStr("i"); + break; + case EbtUint: + appendStr("u"); + break; + case EbtBool: + appendStr("b"); + break; + case EbtFloat: + default: + break; + } + if (isVector()) { + appendStr("vec"); + appendInt(vectorSize); + } else { + appendStr("mat"); + appendInt(matrixCols); + appendStr("x"); + appendInt(matrixRows); + } + } else if (isStruct() && structure) { + appendStr(" "); + appendStr(structName.c_str()); + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString(syntactic, getQualifiers, getPrecision, getType, (*structure)[i].type->getFieldName())); + hasHiddenMember = false; + } + } + appendStr("}"); + } else { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("double"); + break; + case EbtInt: + appendStr("int"); + break; + case EbtUint: + appendStr("uint"); + break; + case EbtBool: + appendStr("bool"); + break; + case EbtFloat: + appendStr("float"); + break; + default: + appendStr("unexpected"); + break; + } + } + if (name.length() > 0) { + appendStr(" "); + appendStr(name.c_str()); + } + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr("[]"); + else { + if (size == UnsizedArraySize) { + appendStr("["); + if (i == 0) + appendInt(arraySizes->getImplicitSize()); + appendStr("]"); + } + else { + appendStr("["); + appendInt(arraySizes->getDimSize(i)); + appendStr("]"); + } + } + } + } + } + else { + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr(" runtime-sized array of"); + else { + if (size == UnsizedArraySize) { + appendStr(" unsized"); + if (i == 0) { + appendStr(" "); + appendInt(arraySizes->getImplicitSize()); + } + } + else { + appendStr(" "); + appendInt(arraySizes->getDimSize(i)); + } + appendStr("-element array of"); + } + } + } + if (isParameterized()) { + appendStr("<"); + for (int i = 0; i < (int)typeParameters->getNumDims(); ++i) { + appendInt(typeParameters->getDimSize(i)); + if (i != (int)typeParameters->getNumDims() - 1) + appendStr(", "); + } + appendStr(">"); + } + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isMatrix()) { + appendStr(" "); + appendInt(matrixCols); + appendStr("X"); + appendInt(matrixRows); + appendStr(" matrix of"); + } + else if (isVector()) { + appendStr(" "); + appendInt(vectorSize); + appendStr("-component vector of"); + } + + appendStr(" "); + typeString.append(getBasicTypeString()); + + if (qualifier.builtIn != EbvNone) { + appendStr(" "); + appendStr(getBuiltInVariableString()); + } + + // Add struct/block members + if (isStruct() && structure) { + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString()); + typeString.append(" "); + typeString.append((*structure)[i].type->getFieldName()); + hasHiddenMember = false; + } + } + appendStr("}"); + } + } + } + + return typeString; + } + + TString getBasicTypeString() const + { + if (basicType == EbtSampler) + return sampler.getString(); + else + return getBasicString(); + } + + const char* getStorageQualifierString() const { return GetStorageQualifierString(qualifier.storage); } + const char* getBuiltInVariableString() const { return GetBuiltInVariableString(qualifier.builtIn); } + const char* getPrecisionQualifierString() const { return GetPrecisionQualifierString(qualifier.precision); } +#endif + + const TTypeList* getStruct() const { assert(isStruct()); return structure; } + void setStruct(TTypeList* s) { assert(isStruct()); structure = s; } + TTypeList* getWritableStruct() const { assert(isStruct()); return structure; } // This should only be used when known to not be sharing with other threads + void setBasicType(const TBasicType& t) { basicType = t; } + + int computeNumComponents() const + { + int components = 0; + + if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) { + for (TTypeList::const_iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) + components += ((*tl).type)->computeNumComponents(); + } else if (matrixCols) + components = matrixCols * matrixRows; + else + components = vectorSize; + + if (arraySizes != nullptr) { + components *= arraySizes->getCumulativeSize(); + } + + return components; + } + + // append this type's mangled name to the passed in 'name' + void appendMangledName(TString& name) const + { + buildMangledName(name); + name += ';' ; + } + + // These variables are inconsistently declared inside and outside of gl_PerVertex in glslang right now. + // They are declared inside of 'in gl_PerVertex', but sitting as standalone when they are 'out'puts. + bool isInconsistentGLPerVertexMember(const TString& name) const + { + if (name == "gl_SecondaryPositionNV" || + name == "gl_PositionPerViewNV") + return true; + return false; + } + + + // Do two structure types match? They could be declared independently, + // in different places, but still might satisfy the definition of matching. + // From the spec: + // + // "Structures must have the same name, sequence of type names, and + // type definitions, and member names to be considered the same type. + // This rule applies recursively for nested or embedded types." + // + // If type mismatch in structure, return member indices through lpidx and rpidx. + // If matching members for either block are exhausted, return -1 for exhausted + // block and the index of the unmatched member. Otherwise return {-1,-1}. + // + bool sameStructType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + // Initialize error to general type mismatch. + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + + // Most commonly, they are both nullptr, or the same pointer to the same actual structure + // TODO: Why return true when neither types are structures? + if ((!isStruct() && !right.isStruct()) || + (isStruct() && right.isStruct() && structure == right.structure)) + return true; + + if (!isStruct() || !right.isStruct()) + return false; + + // Structure names have to match + if (*typeName != *right.typeName) + return false; + + // There are inconsistencies with how gl_PerVertex is setup. For now ignore those as errors if they + // are known inconsistencies. + bool isGLPerVertex = *typeName == "gl_PerVertex"; + + // Both being nullptr was caught above, now they both have to be structures of the same number of elements + if (lpidx == nullptr && + (structure->size() != right.structure->size() && !isGLPerVertex)) { + return false; + } + + // Compare the names and types of all the members, which have to match + for (size_t li = 0, ri = 0; li < structure->size() || ri < right.structure->size(); ++li, ++ri) { + if (lpidx != nullptr) { + *lpidx = static_cast(li); + *rpidx = static_cast(ri); + } + if (li < structure->size() && ri < right.structure->size()) { + if ((*structure)[li].type->getFieldName() == (*right.structure)[ri].type->getFieldName()) { + if (*(*structure)[li].type != *(*right.structure)[ri].type) + return false; + } else { + // Skip hidden members + if ((*structure)[li].type->hiddenMember()) { + ri--; + continue; + } else if ((*right.structure)[ri].type->hiddenMember()) { + li--; + continue; + } + // If one of the members is something that's inconsistently declared, skip over it + // for now. + if (isGLPerVertex) { + if (isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + ri--; + continue; + } else if (isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + li--; + continue; + } + } else { + return false; + } + } + // If we get here, then there should only be inconsistently declared members left + } else if (li < structure->size()) { + if (!(*structure)[li].type->hiddenMember() && !isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + if (lpidx != nullptr) { + *rpidx = -1; + } + return false; + } + } else { + if (!(*right.structure)[ri].type->hiddenMember() && !isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + if (lpidx != nullptr) { + *lpidx = -1; + } + return false; + } + } + } + + return true; + } + + bool sameReferenceType(const TType& right) const + { + if (isReference() != right.isReference()) + return false; + + if (!isReference() && !right.isReference()) + return true; + + assert(referentType != nullptr); + assert(right.referentType != nullptr); + + if (referentType == right.referentType) + return true; + + return *referentType == *right.referentType; + } + + // See if two types match, in all aspects except arrayness + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return basicType == right.basicType && sameElementShape(right, lpidx, rpidx); + } + + // See if two type's arrayness match + bool sameArrayness(const TType& right) const + { + return ((arraySizes == nullptr && right.arraySizes == nullptr) || + (arraySizes != nullptr && right.arraySizes != nullptr && *arraySizes == *right.arraySizes)); + } + + // See if two type's arrayness match in everything except their outer dimension + bool sameInnerArrayness(const TType& right) const + { + assert(arraySizes != nullptr && right.arraySizes != nullptr); + return arraySizes->sameInnerArrayness(*right.arraySizes); + } + + // See if two type's parameters match + bool sameTypeParameters(const TType& right) const + { + return ((typeParameters == nullptr && right.typeParameters == nullptr) || + (typeParameters != nullptr && right.typeParameters != nullptr && *typeParameters == *right.typeParameters)); + } + +#ifndef GLSLANG_WEB + // See if two type's SPIR-V type contents match + bool sameSpirvType(const TType& right) const + { + return ((spirvType == nullptr && right.spirvType == nullptr) || + (spirvType != nullptr && right.spirvType != nullptr && *spirvType == *right.spirvType)); + } +#endif + + // See if two type's elements match in all ways except basic type + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementShape(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return ((basicType != EbtSampler && right.basicType != EbtSampler) || sampler == right.sampler) && + vectorSize == right.vectorSize && + matrixCols == right.matrixCols && + matrixRows == right.matrixRows && + vector1 == right.vector1 && + isCoopMat() == right.isCoopMat() && + sameStructType(right, lpidx, rpidx) && + sameReferenceType(right); + } + + // See if a cooperative matrix type parameter with unspecified parameters is + // an OK function parameter + bool coopMatParameterOK(const TType& right) const + { + return isCoopMat() && right.isCoopMat() && (getBasicType() == right.getBasicType()) && + typeParameters == nullptr && right.typeParameters != nullptr; + } + + bool sameCoopMatBaseType(const TType &right) const { + bool rv = coopmat && right.coopmat; + if (getBasicType() == EbtFloat || getBasicType() == EbtFloat16) + rv = right.getBasicType() == EbtFloat || right.getBasicType() == EbtFloat16; + else if (getBasicType() == EbtUint || getBasicType() == EbtUint8) + rv = right.getBasicType() == EbtUint || right.getBasicType() == EbtUint8; + else if (getBasicType() == EbtInt || getBasicType() == EbtInt8) + rv = right.getBasicType() == EbtInt || right.getBasicType() == EbtInt8; + else + rv = false; + return rv; + } + + + // See if two types match in all ways (just the actual type, not qualification) + bool operator==(const TType& right) const + { +#ifndef GLSLANG_WEB + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right) && sameSpirvType(right); +#else + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right); +#endif + } + + bool operator!=(const TType& right) const + { + return ! operator==(right); + } + + unsigned int getBufferReferenceAlignment() const + { +#ifndef GLSLANG_WEB + if (getBasicType() == glslang::EbtReference) { + return getReferentType()->getQualifier().hasBufferReferenceAlign() ? + (1u << getReferentType()->getQualifier().layoutBufferReferenceAlign) : 16u; + } +#endif + return 0; + } + +#ifndef GLSLANG_WEB + const TSpirvType& getSpirvType() const { assert(spirvType); return *spirvType; } +#endif + +protected: + // Require consumer to pick between deep copy and shallow copy. + TType(const TType& type); + TType& operator=(const TType& type); + + // Recursively copy a type graph, while preserving the graph-like + // quality. That is, don't make more than one copy of a structure that + // gets reused multiple times in the type graph. + void deepCopy(const TType& copyOf, TMap& copiedMap) + { + shallowCopy(copyOf); + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + if (copyOf.qualifier.spirvDecorate) { + qualifier.spirvDecorate = new TSpirvDecorate; + *qualifier.spirvDecorate = *copyOf.qualifier.spirvDecorate; + } + + if (copyOf.spirvType) { + spirvType = new TSpirvType; + *spirvType = *copyOf.spirvType; + } +#endif + + if (copyOf.arraySizes) { + arraySizes = new TArraySizes; + *arraySizes = *copyOf.arraySizes; + } + + if (copyOf.typeParameters) { + typeParameters = new TArraySizes; + *typeParameters = *copyOf.typeParameters; + } + + if (copyOf.isStruct() && copyOf.structure) { + auto prevCopy = copiedMap.find(copyOf.structure); + if (prevCopy != copiedMap.end()) + structure = prevCopy->second; + else { + structure = new TTypeList; + copiedMap[copyOf.structure] = structure; + for (unsigned int i = 0; i < copyOf.structure->size(); ++i) { + TTypeLoc typeLoc; + typeLoc.loc = (*copyOf.structure)[i].loc; + typeLoc.type = new TType(); + typeLoc.type->deepCopy(*(*copyOf.structure)[i].type, copiedMap); + structure->push_back(typeLoc); + } + } + } + + if (copyOf.fieldName) + fieldName = NewPoolTString(copyOf.fieldName->c_str()); + if (copyOf.typeName) + typeName = NewPoolTString(copyOf.typeName->c_str()); + } + + + void buildMangledName(TString&) const; + + TBasicType basicType : 8; + int vectorSize : 4; // 1 means either scalar or 1-component vector; see vector1 to disambiguate. + int matrixCols : 4; + int matrixRows : 4; + bool vector1 : 1; // Backward-compatible tracking of a 1-component vector distinguished from a scalar. + // GLSL 4.5 never has a 1-component vector; so this will always be false until such + // functionality is added. + // HLSL does have a 1-component vectors, so this will be true to disambiguate + // from a scalar. + bool coopmat : 1; + TQualifier qualifier; + + TArraySizes* arraySizes; // nullptr unless an array; can be shared across types + // A type can't be both a structure (EbtStruct/EbtBlock) and a reference (EbtReference), so + // conserve space by making these a union + union { + TTypeList* structure; // invalid unless this is a struct; can be shared across types + TType *referentType; // invalid unless this is an EbtReference + }; + TString *fieldName; // for structure field names + TString *typeName; // for structure type name + TSampler sampler; + TArraySizes* typeParameters;// nullptr unless a parameterized type; can be shared across types +#ifndef GLSLANG_WEB + TSpirvType* spirvType; // SPIR-V type defined by spirv_type directive +#endif +}; + +} // end namespace glslang + +#endif // _TYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/arrays.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/arrays.h new file mode 100644 index 0000000..7f047d9 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/arrays.h @@ -0,0 +1,341 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Implement types for tracking GLSL arrays, arrays of arrays, etc. +// + +#ifndef _ARRAYS_INCLUDED +#define _ARRAYS_INCLUDED + +#include + +namespace glslang { + +// This is used to mean there is no size yet (unsized), it is waiting to get a size from somewhere else. +const int UnsizedArraySize = 0; + +class TIntermTyped; +extern bool SameSpecializationConstants(TIntermTyped*, TIntermTyped*); + +// Specialization constants need both a nominal size and a node that defines +// the specialization constant being used. Array types are the same when their +// size and specialization constant nodes are the same. +struct TArraySize { + unsigned int size; + TIntermTyped* node; // nullptr means no specialization constant node + bool operator==(const TArraySize& rhs) const + { + if (size != rhs.size) + return false; + if (node == nullptr || rhs.node == nullptr) + return node == rhs.node; + + return SameSpecializationConstants(node, rhs.node); + } +}; + +// +// TSmallArrayVector is used as the container for the set of sizes in TArraySizes. +// It has generic-container semantics, while TArraySizes has array-of-array semantics. +// That is, TSmallArrayVector should be more focused on mechanism and TArraySizes on policy. +// +struct TSmallArrayVector { + // + // TODO: memory: TSmallArrayVector is intended to be smaller. + // Almost all arrays could be handled by two sizes each fitting + // in 16 bits, needing a real vector only in the cases where there + // are more than 3 sizes or a size needing more than 16 bits. + // + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSmallArrayVector() : sizes(nullptr) { } + virtual ~TSmallArrayVector() { dealloc(); } + + // For breaking into two non-shared copies, independently modifiable. + TSmallArrayVector& operator=(const TSmallArrayVector& from) + { + if (from.sizes == nullptr) + sizes = nullptr; + else { + alloc(); + *sizes = *from.sizes; + } + + return *this; + } + + int size() const + { + if (sizes == nullptr) + return 0; + return (int)sizes->size(); + } + + unsigned int frontSize() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().size; + } + + TIntermTyped* frontNode() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().node; + } + + void changeFront(unsigned int s) + { + assert(sizes != nullptr); + // this should only happen for implicitly sized arrays, not specialization constants + assert(sizes->front().node == nullptr); + sizes->front().size = s; + } + + void push_back(unsigned int e, TIntermTyped* n) + { + alloc(); + TArraySize pair = { e, n }; + sizes->push_back(pair); + } + + void push_back(const TSmallArrayVector& newDims) + { + alloc(); + sizes->insert(sizes->end(), newDims.sizes->begin(), newDims.sizes->end()); + } + + void pop_front() + { + assert(sizes != nullptr && sizes->size() > 0); + if (sizes->size() == 1) + dealloc(); + else + sizes->erase(sizes->begin()); + } + + // 'this' should currently not be holding anything, and copyNonFront + // will make it hold a copy of all but the first element of rhs. + // (This would be useful for making a type that is dereferenced by + // one dimension.) + void copyNonFront(const TSmallArrayVector& rhs) + { + assert(sizes == nullptr); + if (rhs.size() > 1) { + alloc(); + sizes->insert(sizes->begin(), rhs.sizes->begin() + 1, rhs.sizes->end()); + } + } + + unsigned int getDimSize(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].size; + } + + void setDimSize(int i, unsigned int size) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + assert((*sizes)[i].node == nullptr); + (*sizes)[i].size = size; + } + + TIntermTyped* getDimNode(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].node; + } + + bool operator==(const TSmallArrayVector& rhs) const + { + if (sizes == nullptr && rhs.sizes == nullptr) + return true; + if (sizes == nullptr || rhs.sizes == nullptr) + return false; + return *sizes == *rhs.sizes; + } + bool operator!=(const TSmallArrayVector& rhs) const { return ! operator==(rhs); } + +protected: + TSmallArrayVector(const TSmallArrayVector&); + + void alloc() + { + if (sizes == nullptr) + sizes = new TVector; + } + void dealloc() + { + delete sizes; + sizes = nullptr; + } + + TVector* sizes; // will either hold such a pointer, or in the future, hold the two array sizes +}; + +// +// Represent an array, or array of arrays, to arbitrary depth. This is not +// done through a hierarchy of types in a type tree, rather all contiguous arrayness +// in the type hierarchy is localized into this single cumulative object. +// +// The arrayness in TTtype is a pointer, so that it can be non-allocated and zero +// for the vast majority of types that are non-array types. +// +// Order Policy: these are all identical: +// - left to right order within a contiguous set of ...[..][..][..]... in the source language +// - index order 0, 1, 2, ... within the 'sizes' member below +// - outer-most to inner-most +// +struct TArraySizes { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TArraySizes() : implicitArraySize(1), variablyIndexed(false) { } + + // For breaking into two non-shared copies, independently modifiable. + TArraySizes& operator=(const TArraySizes& from) + { + implicitArraySize = from.implicitArraySize; + variablyIndexed = from.variablyIndexed; + sizes = from.sizes; + + return *this; + } + + // translate from array-of-array semantics to container semantics + int getNumDims() const { return sizes.size(); } + int getDimSize(int dim) const { return sizes.getDimSize(dim); } + TIntermTyped* getDimNode(int dim) const { return sizes.getDimNode(dim); } + void setDimSize(int dim, int size) { sizes.setDimSize(dim, size); } + int getOuterSize() const { return sizes.frontSize(); } + TIntermTyped* getOuterNode() const { return sizes.frontNode(); } + int getCumulativeSize() const + { + int size = 1; + for (int d = 0; d < sizes.size(); ++d) { + // this only makes sense in paths that have a known array size + assert(sizes.getDimSize(d) != UnsizedArraySize); + size *= sizes.getDimSize(d); + } + return size; + } + void addInnerSize() { addInnerSize((unsigned)UnsizedArraySize); } + void addInnerSize(int s) { addInnerSize((unsigned)s, nullptr); } + void addInnerSize(int s, TIntermTyped* n) { sizes.push_back((unsigned)s, n); } + void addInnerSize(TArraySize pair) { + sizes.push_back(pair.size, pair.node); + } + void addInnerSizes(const TArraySizes& s) { sizes.push_back(s.sizes); } + void changeOuterSize(int s) { sizes.changeFront((unsigned)s); } + int getImplicitSize() const { return implicitArraySize; } + void updateImplicitSize(int s) { implicitArraySize = std::max(implicitArraySize, s); } + bool isInnerUnsized() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + return true; + } + + return false; + } + bool clearInnerUnsized() + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + setDimSize(d, 1); + } + + return false; + } + bool isInnerSpecialization() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimNode(d) != nullptr) + return true; + } + + return false; + } + bool isOuterSpecialization() + { + return sizes.getDimNode(0) != nullptr; + } + + bool hasUnsized() const { return getOuterSize() == UnsizedArraySize || isInnerUnsized(); } + bool isSized() const { return getOuterSize() != UnsizedArraySize; } + void dereference() { sizes.pop_front(); } + void copyDereferenced(const TArraySizes& rhs) + { + assert(sizes.size() == 0); + if (rhs.sizes.size() > 1) + sizes.copyNonFront(rhs.sizes); + } + + bool sameInnerArrayness(const TArraySizes& rhs) const + { + if (sizes.size() != rhs.sizes.size()) + return false; + + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) != rhs.sizes.getDimSize(d) || + sizes.getDimNode(d) != rhs.sizes.getDimNode(d)) + return false; + } + + return true; + } + + void setVariablyIndexed() { variablyIndexed = true; } + bool isVariablyIndexed() const { return variablyIndexed; } + + bool operator==(const TArraySizes& rhs) const { return sizes == rhs.sizes; } + bool operator!=(const TArraySizes& rhs) const { return sizes != rhs.sizes; } + +protected: + TSmallArrayVector sizes; + + TArraySizes(const TArraySizes&); + + // For tracking maximum referenced compile-time constant index. + // Applies only to the outer-most dimension. Potentially becomes + // the implicit size of the array, if not variably indexed and + // otherwise legal. + int implicitArraySize; + bool variablyIndexed; // true if array is indexed with a non compile-time constant +}; + +} // end namespace glslang + +#endif // _ARRAYS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/glslang_c_interface.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/glslang_c_interface.h new file mode 100644 index 0000000..f540f26 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/glslang_c_interface.h @@ -0,0 +1,279 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef GLSLANG_C_IFACE_H_INCLUDED +#define GLSLANG_C_IFACE_H_INCLUDED + +#include +#include + +#include "glslang_c_shader_types.h" + +typedef struct glslang_shader_s glslang_shader_t; +typedef struct glslang_program_s glslang_program_t; + +/* TLimits counterpart */ +typedef struct glslang_limits_s { + bool non_inductive_for_loops; + bool while_loops; + bool do_while_loops; + bool general_uniform_indexing; + bool general_attribute_matrix_vector_indexing; + bool general_varying_indexing; + bool general_sampler_indexing; + bool general_variable_indexing; + bool general_constant_matrix_vector_indexing; +} glslang_limits_t; + +/* TBuiltInResource counterpart */ +typedef struct glslang_resource_s { + int max_lights; + int max_clip_planes; + int max_texture_units; + int max_texture_coords; + int max_vertex_attribs; + int max_vertex_uniform_components; + int max_varying_floats; + int max_vertex_texture_image_units; + int max_combined_texture_image_units; + int max_texture_image_units; + int max_fragment_uniform_components; + int max_draw_buffers; + int max_vertex_uniform_vectors; + int max_varying_vectors; + int max_fragment_uniform_vectors; + int max_vertex_output_vectors; + int max_fragment_input_vectors; + int min_program_texel_offset; + int max_program_texel_offset; + int max_clip_distances; + int max_compute_work_group_count_x; + int max_compute_work_group_count_y; + int max_compute_work_group_count_z; + int max_compute_work_group_size_x; + int max_compute_work_group_size_y; + int max_compute_work_group_size_z; + int max_compute_uniform_components; + int max_compute_texture_image_units; + int max_compute_image_uniforms; + int max_compute_atomic_counters; + int max_compute_atomic_counter_buffers; + int max_varying_components; + int max_vertex_output_components; + int max_geometry_input_components; + int max_geometry_output_components; + int max_fragment_input_components; + int max_image_units; + int max_combined_image_units_and_fragment_outputs; + int max_combined_shader_output_resources; + int max_image_samples; + int max_vertex_image_uniforms; + int max_tess_control_image_uniforms; + int max_tess_evaluation_image_uniforms; + int max_geometry_image_uniforms; + int max_fragment_image_uniforms; + int max_combined_image_uniforms; + int max_geometry_texture_image_units; + int max_geometry_output_vertices; + int max_geometry_total_output_components; + int max_geometry_uniform_components; + int max_geometry_varying_components; + int max_tess_control_input_components; + int max_tess_control_output_components; + int max_tess_control_texture_image_units; + int max_tess_control_uniform_components; + int max_tess_control_total_output_components; + int max_tess_evaluation_input_components; + int max_tess_evaluation_output_components; + int max_tess_evaluation_texture_image_units; + int max_tess_evaluation_uniform_components; + int max_tess_patch_components; + int max_patch_vertices; + int max_tess_gen_level; + int max_viewports; + int max_vertex_atomic_counters; + int max_tess_control_atomic_counters; + int max_tess_evaluation_atomic_counters; + int max_geometry_atomic_counters; + int max_fragment_atomic_counters; + int max_combined_atomic_counters; + int max_atomic_counter_bindings; + int max_vertex_atomic_counter_buffers; + int max_tess_control_atomic_counter_buffers; + int max_tess_evaluation_atomic_counter_buffers; + int max_geometry_atomic_counter_buffers; + int max_fragment_atomic_counter_buffers; + int max_combined_atomic_counter_buffers; + int max_atomic_counter_buffer_size; + int max_transform_feedback_buffers; + int max_transform_feedback_interleaved_components; + int max_cull_distances; + int max_combined_clip_and_cull_distances; + int max_samples; + int max_mesh_output_vertices_nv; + int max_mesh_output_primitives_nv; + int max_mesh_work_group_size_x_nv; + int max_mesh_work_group_size_y_nv; + int max_mesh_work_group_size_z_nv; + int max_task_work_group_size_x_nv; + int max_task_work_group_size_y_nv; + int max_task_work_group_size_z_nv; + int max_mesh_view_count_nv; + int max_mesh_output_vertices_ext; + int max_mesh_output_primitives_ext; + int max_mesh_work_group_size_x_ext; + int max_mesh_work_group_size_y_ext; + int max_mesh_work_group_size_z_ext; + int max_task_work_group_size_x_ext; + int max_task_work_group_size_y_ext; + int max_task_work_group_size_z_ext; + int max_mesh_view_count_ext; + int maxDualSourceDrawBuffersEXT; + + glslang_limits_t limits; +} glslang_resource_t; + +typedef struct glslang_input_s { + glslang_source_t language; + glslang_stage_t stage; + glslang_client_t client; + glslang_target_client_version_t client_version; + glslang_target_language_t target_language; + glslang_target_language_version_t target_language_version; + /** Shader source code */ + const char* code; + int default_version; + glslang_profile_t default_profile; + int force_default_version_and_profile; + int forward_compatible; + glslang_messages_t messages; + const glslang_resource_t* resource; +} glslang_input_t; + +/* Inclusion result structure allocated by C include_local/include_system callbacks */ +typedef struct glsl_include_result_s { + /* Header file name or NULL if inclusion failed */ + const char* header_name; + + /* Header contents or NULL */ + const char* header_data; + size_t header_length; + +} glsl_include_result_t; + +/* Callback for local file inclusion */ +typedef glsl_include_result_t* (*glsl_include_local_func)(void* ctx, const char* header_name, const char* includer_name, + size_t include_depth); + +/* Callback for system file inclusion */ +typedef glsl_include_result_t* (*glsl_include_system_func)(void* ctx, const char* header_name, + const char* includer_name, size_t include_depth); + +/* Callback for include result destruction */ +typedef int (*glsl_free_include_result_func)(void* ctx, glsl_include_result_t* result); + +/* Collection of callbacks for GLSL preprocessor */ +typedef struct glsl_include_callbacks_s { + glsl_include_system_func include_system; + glsl_include_local_func include_local; + glsl_free_include_result_func free_include_result; +} glsl_include_callbacks_t; + +/* SpvOptions counterpart */ +typedef struct glslang_spv_options_s { + bool generate_debug_info; + bool strip_debug_info; + bool disable_optimizer; + bool optimize_size; + bool disassemble; + bool validate; + bool emit_nonsemantic_shader_debug_info; + bool emit_nonsemantic_shader_debug_source; +} glslang_spv_options_t; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +GLSLANG_EXPORT int glslang_initialize_process(); +GLSLANG_EXPORT void glslang_finalize_process(); + +GLSLANG_EXPORT glslang_shader_t* glslang_shader_create(const glslang_input_t* input); +GLSLANG_EXPORT void glslang_shader_delete(glslang_shader_t* shader); +GLSLANG_EXPORT void glslang_shader_set_preamble(glslang_shader_t* shader, const char* s); +GLSLANG_EXPORT void glslang_shader_shift_binding(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base); +GLSLANG_EXPORT void glslang_shader_shift_binding_for_set(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base, unsigned int set); +GLSLANG_EXPORT void glslang_shader_set_options(glslang_shader_t* shader, int options); // glslang_shader_options_t +GLSLANG_EXPORT void glslang_shader_set_glsl_version(glslang_shader_t* shader, int version); +GLSLANG_EXPORT int glslang_shader_preprocess(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT int glslang_shader_parse(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT const char* glslang_shader_get_preprocessed_code(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_log(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_debug_log(glslang_shader_t* shader); + +GLSLANG_EXPORT glslang_program_t* glslang_program_create(); +GLSLANG_EXPORT void glslang_program_delete(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_add_shader(glslang_program_t* program, glslang_shader_t* shader); +GLSLANG_EXPORT int glslang_program_link(glslang_program_t* program, int messages); // glslang_messages_t +GLSLANG_EXPORT void glslang_program_add_source_text(glslang_program_t* program, glslang_stage_t stage, const char* text, size_t len); +GLSLANG_EXPORT void glslang_program_set_source_file(glslang_program_t* program, glslang_stage_t stage, const char* file); +GLSLANG_EXPORT int glslang_program_map_io(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_generate(glslang_program_t* program, glslang_stage_t stage); +GLSLANG_EXPORT void glslang_program_SPIRV_generate_with_options(glslang_program_t* program, glslang_stage_t stage, glslang_spv_options_t* spv_options); +GLSLANG_EXPORT size_t glslang_program_SPIRV_get_size(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_get(glslang_program_t* program, unsigned int*); +GLSLANG_EXPORT unsigned int* glslang_program_SPIRV_get_ptr(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_SPIRV_get_messages(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_log(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_debug_log(glslang_program_t* program); + +#ifdef __cplusplus +} +#endif + +#endif /* #ifdef GLSLANG_C_IFACE_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/glslang_c_shader_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/glslang_c_shader_types.h new file mode 100644 index 0000000..9bc2114 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/glslang_c_shader_types.h @@ -0,0 +1,227 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef C_SHADER_TYPES_H_INCLUDED +#define C_SHADER_TYPES_H_INCLUDED + +#define LAST_ELEMENT_MARKER(x) x + +/* EShLanguage counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX, + GLSLANG_STAGE_TESSCONTROL, + GLSLANG_STAGE_TESSEVALUATION, + GLSLANG_STAGE_GEOMETRY, + GLSLANG_STAGE_FRAGMENT, + GLSLANG_STAGE_COMPUTE, + GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_RAYGEN_NV = GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_INTERSECT_NV = GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_ANYHIT_NV = GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_CLOSESTHIT_NV = GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_MISS, + GLSLANG_STAGE_MISS_NV = GLSLANG_STAGE_MISS, + GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_CALLABLE_NV = GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_TASK, + GLSLANG_STAGE_TASK_NV = GLSLANG_STAGE_TASK, + GLSLANG_STAGE_MESH, + GLSLANG_STAGE_MESH_NV = GLSLANG_STAGE_MESH, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_COUNT), +} glslang_stage_t; // would be better as stage, but this is ancient now + +/* EShLanguageMask counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX_MASK = (1 << GLSLANG_STAGE_VERTEX), + GLSLANG_STAGE_TESSCONTROL_MASK = (1 << GLSLANG_STAGE_TESSCONTROL), + GLSLANG_STAGE_TESSEVALUATION_MASK = (1 << GLSLANG_STAGE_TESSEVALUATION), + GLSLANG_STAGE_GEOMETRY_MASK = (1 << GLSLANG_STAGE_GEOMETRY), + GLSLANG_STAGE_FRAGMENT_MASK = (1 << GLSLANG_STAGE_FRAGMENT), + GLSLANG_STAGE_COMPUTE_MASK = (1 << GLSLANG_STAGE_COMPUTE), + GLSLANG_STAGE_RAYGEN_MASK = (1 << GLSLANG_STAGE_RAYGEN), + GLSLANG_STAGE_RAYGEN_NV_MASK = GLSLANG_STAGE_RAYGEN_MASK, + GLSLANG_STAGE_INTERSECT_MASK = (1 << GLSLANG_STAGE_INTERSECT), + GLSLANG_STAGE_INTERSECT_NV_MASK = GLSLANG_STAGE_INTERSECT_MASK, + GLSLANG_STAGE_ANYHIT_MASK = (1 << GLSLANG_STAGE_ANYHIT), + GLSLANG_STAGE_ANYHIT_NV_MASK = GLSLANG_STAGE_ANYHIT_MASK, + GLSLANG_STAGE_CLOSESTHIT_MASK = (1 << GLSLANG_STAGE_CLOSESTHIT), + GLSLANG_STAGE_CLOSESTHIT_NV_MASK = GLSLANG_STAGE_CLOSESTHIT_MASK, + GLSLANG_STAGE_MISS_MASK = (1 << GLSLANG_STAGE_MISS), + GLSLANG_STAGE_MISS_NV_MASK = GLSLANG_STAGE_MISS_MASK, + GLSLANG_STAGE_CALLABLE_MASK = (1 << GLSLANG_STAGE_CALLABLE), + GLSLANG_STAGE_CALLABLE_NV_MASK = GLSLANG_STAGE_CALLABLE_MASK, + GLSLANG_STAGE_TASK_MASK = (1 << GLSLANG_STAGE_TASK), + GLSLANG_STAGE_TASK_NV_MASK = GLSLANG_STAGE_TASK_MASK, + GLSLANG_STAGE_MESH_MASK = (1 << GLSLANG_STAGE_MESH), + GLSLANG_STAGE_MESH_NV_MASK = GLSLANG_STAGE_MESH_MASK, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_MASK_COUNT), +} glslang_stage_mask_t; + +/* EShSource counterpart */ +typedef enum { + GLSLANG_SOURCE_NONE, + GLSLANG_SOURCE_GLSL, + GLSLANG_SOURCE_HLSL, + LAST_ELEMENT_MARKER(GLSLANG_SOURCE_COUNT), +} glslang_source_t; + +/* EShClient counterpart */ +typedef enum { + GLSLANG_CLIENT_NONE, + GLSLANG_CLIENT_VULKAN, + GLSLANG_CLIENT_OPENGL, + LAST_ELEMENT_MARKER(GLSLANG_CLIENT_COUNT), +} glslang_client_t; + +/* EShTargetLanguage counterpart */ +typedef enum { + GLSLANG_TARGET_NONE, + GLSLANG_TARGET_SPV, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_COUNT), +} glslang_target_language_t; + +/* SH_TARGET_ClientVersion counterpart */ +typedef enum { + GLSLANG_TARGET_VULKAN_1_0 = (1 << 22), + GLSLANG_TARGET_VULKAN_1_1 = (1 << 22) | (1 << 12), + GLSLANG_TARGET_VULKAN_1_2 = (1 << 22) | (2 << 12), + GLSLANG_TARGET_VULKAN_1_3 = (1 << 22) | (3 << 12), + GLSLANG_TARGET_OPENGL_450 = 450, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_CLIENT_VERSION_COUNT = 5), +} glslang_target_client_version_t; + +/* SH_TARGET_LanguageVersion counterpart */ +typedef enum { + GLSLANG_TARGET_SPV_1_0 = (1 << 16), + GLSLANG_TARGET_SPV_1_1 = (1 << 16) | (1 << 8), + GLSLANG_TARGET_SPV_1_2 = (1 << 16) | (2 << 8), + GLSLANG_TARGET_SPV_1_3 = (1 << 16) | (3 << 8), + GLSLANG_TARGET_SPV_1_4 = (1 << 16) | (4 << 8), + GLSLANG_TARGET_SPV_1_5 = (1 << 16) | (5 << 8), + GLSLANG_TARGET_SPV_1_6 = (1 << 16) | (6 << 8), + LAST_ELEMENT_MARKER(GLSLANG_TARGET_LANGUAGE_VERSION_COUNT = 7), +} glslang_target_language_version_t; + +/* EShExecutable counterpart */ +typedef enum { GLSLANG_EX_VERTEX_FRAGMENT, GLSLANG_EX_FRAGMENT } glslang_executable_t; + +// EShOptimizationLevel counterpart +// This enum is not used in the current C interface, but could be added at a later date. +// GLSLANG_OPT_NONE is the current default. +typedef enum { + GLSLANG_OPT_NO_GENERATION, + GLSLANG_OPT_NONE, + GLSLANG_OPT_SIMPLE, + GLSLANG_OPT_FULL, + LAST_ELEMENT_MARKER(GLSLANG_OPT_LEVEL_COUNT), +} glslang_optimization_level_t; + +/* EShTextureSamplerTransformMode counterpart */ +typedef enum { + GLSLANG_TEX_SAMP_TRANS_KEEP, + GLSLANG_TEX_SAMP_TRANS_UPGRADE_TEXTURE_REMOVE_SAMPLER, + LAST_ELEMENT_MARKER(GLSLANG_TEX_SAMP_TRANS_COUNT), +} glslang_texture_sampler_transform_mode_t; + +/* EShMessages counterpart */ +typedef enum { + GLSLANG_MSG_DEFAULT_BIT = 0, + GLSLANG_MSG_RELAXED_ERRORS_BIT = (1 << 0), + GLSLANG_MSG_SUPPRESS_WARNINGS_BIT = (1 << 1), + GLSLANG_MSG_AST_BIT = (1 << 2), + GLSLANG_MSG_SPV_RULES_BIT = (1 << 3), + GLSLANG_MSG_VULKAN_RULES_BIT = (1 << 4), + GLSLANG_MSG_ONLY_PREPROCESSOR_BIT = (1 << 5), + GLSLANG_MSG_READ_HLSL_BIT = (1 << 6), + GLSLANG_MSG_CASCADING_ERRORS_BIT = (1 << 7), + GLSLANG_MSG_KEEP_UNCALLED_BIT = (1 << 8), + GLSLANG_MSG_HLSL_OFFSETS_BIT = (1 << 9), + GLSLANG_MSG_DEBUG_INFO_BIT = (1 << 10), + GLSLANG_MSG_HLSL_ENABLE_16BIT_TYPES_BIT = (1 << 11), + GLSLANG_MSG_HLSL_LEGALIZATION_BIT = (1 << 12), + GLSLANG_MSG_HLSL_DX9_COMPATIBLE_BIT = (1 << 13), + GLSLANG_MSG_BUILTIN_SYMBOL_TABLE_BIT = (1 << 14), + GLSLANG_MSG_ENHANCED = (1 << 15), + LAST_ELEMENT_MARKER(GLSLANG_MSG_COUNT), +} glslang_messages_t; + +/* EShReflectionOptions counterpart */ +typedef enum { + GLSLANG_REFLECTION_DEFAULT_BIT = 0, + GLSLANG_REFLECTION_STRICT_ARRAY_SUFFIX_BIT = (1 << 0), + GLSLANG_REFLECTION_BASIC_ARRAY_SUFFIX_BIT = (1 << 1), + GLSLANG_REFLECTION_INTERMEDIATE_IOO_BIT = (1 << 2), + GLSLANG_REFLECTION_SEPARATE_BUFFERS_BIT = (1 << 3), + GLSLANG_REFLECTION_ALL_BLOCK_VARIABLES_BIT = (1 << 4), + GLSLANG_REFLECTION_UNWRAP_IO_BLOCKS_BIT = (1 << 5), + GLSLANG_REFLECTION_ALL_IO_VARIABLES_BIT = (1 << 6), + GLSLANG_REFLECTION_SHARED_STD140_SSBO_BIT = (1 << 7), + GLSLANG_REFLECTION_SHARED_STD140_UBO_BIT = (1 << 8), + LAST_ELEMENT_MARKER(GLSLANG_REFLECTION_COUNT), +} glslang_reflection_options_t; + +/* EProfile counterpart (from Versions.h) */ +typedef enum { + GLSLANG_BAD_PROFILE = 0, + GLSLANG_NO_PROFILE = (1 << 0), + GLSLANG_CORE_PROFILE = (1 << 1), + GLSLANG_COMPATIBILITY_PROFILE = (1 << 2), + GLSLANG_ES_PROFILE = (1 << 3), + LAST_ELEMENT_MARKER(GLSLANG_PROFILE_COUNT), +} glslang_profile_t; + +/* Shader options */ +typedef enum { + GLSLANG_SHADER_DEFAULT_BIT = 0, + GLSLANG_SHADER_AUTO_MAP_BINDINGS = (1 << 0), + GLSLANG_SHADER_AUTO_MAP_LOCATIONS = (1 << 1), + GLSLANG_SHADER_VULKAN_RULES_RELAXED = (1 << 2), + LAST_ELEMENT_MARKER(GLSLANG_SHADER_COUNT), +} glslang_shader_options_t; + +/* TResourceType counterpart */ +typedef enum { + GLSLANG_RESOURCE_TYPE_SAMPLER, + GLSLANG_RESOURCE_TYPE_TEXTURE, + GLSLANG_RESOURCE_TYPE_IMAGE, + GLSLANG_RESOURCE_TYPE_UBO, + GLSLANG_RESOURCE_TYPE_SSBO, + GLSLANG_RESOURCE_TYPE_UAV, + LAST_ELEMENT_MARKER(GLSLANG_RESOURCE_TYPE_COUNT), +} glslang_resource_type_t; + +#undef LAST_ELEMENT_MARKER + +#endif diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/intermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/intermediate.h new file mode 100644 index 0000000..a024002 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Include/intermediate.h @@ -0,0 +1,1850 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Definition of the in-memory high-level intermediate representation +// of shaders. This is a tree that parser creates. +// +// Nodes in the tree are defined as a hierarchy of classes derived from +// TIntermNode. Each is a node in a tree. There is no preset branching factor; +// each node can have it's own type of list of children. +// + +#ifndef __INTERMEDIATE_H +#define __INTERMEDIATE_H + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' + #pragma warning(disable : 5026) // 'glslang::TIntermUnary': move constructor was implicitly defined as deleted +#endif + +#include "../Include/Common.h" +#include "../Include/Types.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + +class TIntermediate; + +// +// Operators used by the high-level (parse tree) representation. +// +enum TOperator { + EOpNull, // if in a node, should only mean a node is still being built + EOpSequence, // denotes a list of statements, or parameters, etc. + EOpScope, // Used by debugging to denote a scoped list of statements + EOpLinkerObjects, // for aggregate node of objects the linker may need, if not reference by the rest of the AST + EOpFunctionCall, + EOpFunction, // For function definition + EOpParameters, // an aggregate listing the parameters to a function +#ifndef GLSLANG_WEB + EOpSpirvInst, +#endif + + // + // Unary operators + // + + EOpNegative, + EOpLogicalNot, + EOpVectorLogicalNot, + EOpBitwiseNot, + + EOpPostIncrement, + EOpPostDecrement, + EOpPreIncrement, + EOpPreDecrement, + + EOpCopyObject, + + EOpDeclare, // Used by debugging to force declaration of variable in correct scope + + // (u)int* -> bool + EOpConvInt8ToBool, + EOpConvUint8ToBool, + EOpConvInt16ToBool, + EOpConvUint16ToBool, + EOpConvIntToBool, + EOpConvUintToBool, + EOpConvInt64ToBool, + EOpConvUint64ToBool, + + // float* -> bool + EOpConvFloat16ToBool, + EOpConvFloatToBool, + EOpConvDoubleToBool, + + // bool -> (u)int* + EOpConvBoolToInt8, + EOpConvBoolToUint8, + EOpConvBoolToInt16, + EOpConvBoolToUint16, + EOpConvBoolToInt, + EOpConvBoolToUint, + EOpConvBoolToInt64, + EOpConvBoolToUint64, + + // bool -> float* + EOpConvBoolToFloat16, + EOpConvBoolToFloat, + EOpConvBoolToDouble, + + // int8_t -> (u)int* + EOpConvInt8ToInt16, + EOpConvInt8ToInt, + EOpConvInt8ToInt64, + EOpConvInt8ToUint8, + EOpConvInt8ToUint16, + EOpConvInt8ToUint, + EOpConvInt8ToUint64, + + // uint8_t -> (u)int* + EOpConvUint8ToInt8, + EOpConvUint8ToInt16, + EOpConvUint8ToInt, + EOpConvUint8ToInt64, + EOpConvUint8ToUint16, + EOpConvUint8ToUint, + EOpConvUint8ToUint64, + + // int8_t -> float* + EOpConvInt8ToFloat16, + EOpConvInt8ToFloat, + EOpConvInt8ToDouble, + + // uint8_t -> float* + EOpConvUint8ToFloat16, + EOpConvUint8ToFloat, + EOpConvUint8ToDouble, + + // int16_t -> (u)int* + EOpConvInt16ToInt8, + EOpConvInt16ToInt, + EOpConvInt16ToInt64, + EOpConvInt16ToUint8, + EOpConvInt16ToUint16, + EOpConvInt16ToUint, + EOpConvInt16ToUint64, + + // uint16_t -> (u)int* + EOpConvUint16ToInt8, + EOpConvUint16ToInt16, + EOpConvUint16ToInt, + EOpConvUint16ToInt64, + EOpConvUint16ToUint8, + EOpConvUint16ToUint, + EOpConvUint16ToUint64, + + // int16_t -> float* + EOpConvInt16ToFloat16, + EOpConvInt16ToFloat, + EOpConvInt16ToDouble, + + // uint16_t -> float* + EOpConvUint16ToFloat16, + EOpConvUint16ToFloat, + EOpConvUint16ToDouble, + + // int32_t -> (u)int* + EOpConvIntToInt8, + EOpConvIntToInt16, + EOpConvIntToInt64, + EOpConvIntToUint8, + EOpConvIntToUint16, + EOpConvIntToUint, + EOpConvIntToUint64, + + // uint32_t -> (u)int* + EOpConvUintToInt8, + EOpConvUintToInt16, + EOpConvUintToInt, + EOpConvUintToInt64, + EOpConvUintToUint8, + EOpConvUintToUint16, + EOpConvUintToUint64, + + // int32_t -> float* + EOpConvIntToFloat16, + EOpConvIntToFloat, + EOpConvIntToDouble, + + // uint32_t -> float* + EOpConvUintToFloat16, + EOpConvUintToFloat, + EOpConvUintToDouble, + + // int64_t -> (u)int* + EOpConvInt64ToInt8, + EOpConvInt64ToInt16, + EOpConvInt64ToInt, + EOpConvInt64ToUint8, + EOpConvInt64ToUint16, + EOpConvInt64ToUint, + EOpConvInt64ToUint64, + + // uint64_t -> (u)int* + EOpConvUint64ToInt8, + EOpConvUint64ToInt16, + EOpConvUint64ToInt, + EOpConvUint64ToInt64, + EOpConvUint64ToUint8, + EOpConvUint64ToUint16, + EOpConvUint64ToUint, + + // int64_t -> float* + EOpConvInt64ToFloat16, + EOpConvInt64ToFloat, + EOpConvInt64ToDouble, + + // uint64_t -> float* + EOpConvUint64ToFloat16, + EOpConvUint64ToFloat, + EOpConvUint64ToDouble, + + // float16_t -> (u)int* + EOpConvFloat16ToInt8, + EOpConvFloat16ToInt16, + EOpConvFloat16ToInt, + EOpConvFloat16ToInt64, + EOpConvFloat16ToUint8, + EOpConvFloat16ToUint16, + EOpConvFloat16ToUint, + EOpConvFloat16ToUint64, + + // float16_t -> float* + EOpConvFloat16ToFloat, + EOpConvFloat16ToDouble, + + // float -> (u)int* + EOpConvFloatToInt8, + EOpConvFloatToInt16, + EOpConvFloatToInt, + EOpConvFloatToInt64, + EOpConvFloatToUint8, + EOpConvFloatToUint16, + EOpConvFloatToUint, + EOpConvFloatToUint64, + + // float -> float* + EOpConvFloatToFloat16, + EOpConvFloatToDouble, + + // float64 _t-> (u)int* + EOpConvDoubleToInt8, + EOpConvDoubleToInt16, + EOpConvDoubleToInt, + EOpConvDoubleToInt64, + EOpConvDoubleToUint8, + EOpConvDoubleToUint16, + EOpConvDoubleToUint, + EOpConvDoubleToUint64, + + // float64_t -> float* + EOpConvDoubleToFloat16, + EOpConvDoubleToFloat, + + // uint64_t <-> pointer + EOpConvUint64ToPtr, + EOpConvPtrToUint64, + + // uvec2 <-> pointer + EOpConvUvec2ToPtr, + EOpConvPtrToUvec2, + + // uint64_t -> accelerationStructureEXT + EOpConvUint64ToAccStruct, + + // uvec2 -> accelerationStructureEXT + EOpConvUvec2ToAccStruct, + + // + // binary operations + // + + EOpAdd, + EOpSub, + EOpMul, + EOpDiv, + EOpMod, + EOpRightShift, + EOpLeftShift, + EOpAnd, + EOpInclusiveOr, + EOpExclusiveOr, + EOpEqual, + EOpNotEqual, + EOpVectorEqual, + EOpVectorNotEqual, + EOpLessThan, + EOpGreaterThan, + EOpLessThanEqual, + EOpGreaterThanEqual, + EOpComma, + + EOpVectorTimesScalar, + EOpVectorTimesMatrix, + EOpMatrixTimesVector, + EOpMatrixTimesScalar, + + EOpLogicalOr, + EOpLogicalXor, + EOpLogicalAnd, + + EOpIndexDirect, + EOpIndexIndirect, + EOpIndexDirectStruct, + + EOpVectorSwizzle, + + EOpMethod, + EOpScoping, + + // + // Built-in functions mapped to operators + // + + EOpRadians, + EOpDegrees, + EOpSin, + EOpCos, + EOpTan, + EOpAsin, + EOpAcos, + EOpAtan, + EOpSinh, + EOpCosh, + EOpTanh, + EOpAsinh, + EOpAcosh, + EOpAtanh, + + EOpPow, + EOpExp, + EOpLog, + EOpExp2, + EOpLog2, + EOpSqrt, + EOpInverseSqrt, + + EOpAbs, + EOpSign, + EOpFloor, + EOpTrunc, + EOpRound, + EOpRoundEven, + EOpCeil, + EOpFract, + EOpModf, + EOpMin, + EOpMax, + EOpClamp, + EOpMix, + EOpStep, + EOpSmoothStep, + + EOpIsNan, + EOpIsInf, + + EOpFma, + + EOpFrexp, + EOpLdexp, + + EOpFloatBitsToInt, + EOpFloatBitsToUint, + EOpIntBitsToFloat, + EOpUintBitsToFloat, + EOpDoubleBitsToInt64, + EOpDoubleBitsToUint64, + EOpInt64BitsToDouble, + EOpUint64BitsToDouble, + EOpFloat16BitsToInt16, + EOpFloat16BitsToUint16, + EOpInt16BitsToFloat16, + EOpUint16BitsToFloat16, + EOpPackSnorm2x16, + EOpUnpackSnorm2x16, + EOpPackUnorm2x16, + EOpUnpackUnorm2x16, + EOpPackSnorm4x8, + EOpUnpackSnorm4x8, + EOpPackUnorm4x8, + EOpUnpackUnorm4x8, + EOpPackHalf2x16, + EOpUnpackHalf2x16, + EOpPackDouble2x32, + EOpUnpackDouble2x32, + EOpPackInt2x32, + EOpUnpackInt2x32, + EOpPackUint2x32, + EOpUnpackUint2x32, + EOpPackFloat2x16, + EOpUnpackFloat2x16, + EOpPackInt2x16, + EOpUnpackInt2x16, + EOpPackUint2x16, + EOpUnpackUint2x16, + EOpPackInt4x16, + EOpUnpackInt4x16, + EOpPackUint4x16, + EOpUnpackUint4x16, + EOpPack16, + EOpPack32, + EOpPack64, + EOpUnpack32, + EOpUnpack16, + EOpUnpack8, + + EOpLength, + EOpDistance, + EOpDot, + EOpCross, + EOpNormalize, + EOpFaceForward, + EOpReflect, + EOpRefract, + + EOpMin3, + EOpMax3, + EOpMid3, + + EOpDPdx, // Fragment only + EOpDPdy, // Fragment only + EOpFwidth, // Fragment only + EOpDPdxFine, // Fragment only + EOpDPdyFine, // Fragment only + EOpFwidthFine, // Fragment only + EOpDPdxCoarse, // Fragment only + EOpDPdyCoarse, // Fragment only + EOpFwidthCoarse, // Fragment only + + EOpInterpolateAtCentroid, // Fragment only + EOpInterpolateAtSample, // Fragment only + EOpInterpolateAtOffset, // Fragment only + EOpInterpolateAtVertex, + + EOpMatrixTimesMatrix, + EOpOuterProduct, + EOpDeterminant, + EOpMatrixInverse, + EOpTranspose, + + EOpFtransform, + + EOpNoise, + + EOpEmitVertex, // geometry only + EOpEndPrimitive, // geometry only + EOpEmitStreamVertex, // geometry only + EOpEndStreamPrimitive, // geometry only + + EOpBarrier, + EOpMemoryBarrier, + EOpMemoryBarrierAtomicCounter, + EOpMemoryBarrierBuffer, + EOpMemoryBarrierImage, + EOpMemoryBarrierShared, // compute only + EOpGroupMemoryBarrier, // compute only + + EOpBallot, + EOpReadInvocation, + EOpReadFirstInvocation, + + EOpAnyInvocation, + EOpAllInvocations, + EOpAllInvocationsEqual, + + EOpSubgroupGuardStart, + EOpSubgroupBarrier, + EOpSubgroupMemoryBarrier, + EOpSubgroupMemoryBarrierBuffer, + EOpSubgroupMemoryBarrierImage, + EOpSubgroupMemoryBarrierShared, // compute only + EOpSubgroupElect, + EOpSubgroupAll, + EOpSubgroupAny, + EOpSubgroupAllEqual, + EOpSubgroupBroadcast, + EOpSubgroupBroadcastFirst, + EOpSubgroupBallot, + EOpSubgroupInverseBallot, + EOpSubgroupBallotBitExtract, + EOpSubgroupBallotBitCount, + EOpSubgroupBallotInclusiveBitCount, + EOpSubgroupBallotExclusiveBitCount, + EOpSubgroupBallotFindLSB, + EOpSubgroupBallotFindMSB, + EOpSubgroupShuffle, + EOpSubgroupShuffleXor, + EOpSubgroupShuffleUp, + EOpSubgroupShuffleDown, + EOpSubgroupAdd, + EOpSubgroupMul, + EOpSubgroupMin, + EOpSubgroupMax, + EOpSubgroupAnd, + EOpSubgroupOr, + EOpSubgroupXor, + EOpSubgroupInclusiveAdd, + EOpSubgroupInclusiveMul, + EOpSubgroupInclusiveMin, + EOpSubgroupInclusiveMax, + EOpSubgroupInclusiveAnd, + EOpSubgroupInclusiveOr, + EOpSubgroupInclusiveXor, + EOpSubgroupExclusiveAdd, + EOpSubgroupExclusiveMul, + EOpSubgroupExclusiveMin, + EOpSubgroupExclusiveMax, + EOpSubgroupExclusiveAnd, + EOpSubgroupExclusiveOr, + EOpSubgroupExclusiveXor, + EOpSubgroupClusteredAdd, + EOpSubgroupClusteredMul, + EOpSubgroupClusteredMin, + EOpSubgroupClusteredMax, + EOpSubgroupClusteredAnd, + EOpSubgroupClusteredOr, + EOpSubgroupClusteredXor, + EOpSubgroupQuadBroadcast, + EOpSubgroupQuadSwapHorizontal, + EOpSubgroupQuadSwapVertical, + EOpSubgroupQuadSwapDiagonal, + + EOpSubgroupPartition, + EOpSubgroupPartitionedAdd, + EOpSubgroupPartitionedMul, + EOpSubgroupPartitionedMin, + EOpSubgroupPartitionedMax, + EOpSubgroupPartitionedAnd, + EOpSubgroupPartitionedOr, + EOpSubgroupPartitionedXor, + EOpSubgroupPartitionedInclusiveAdd, + EOpSubgroupPartitionedInclusiveMul, + EOpSubgroupPartitionedInclusiveMin, + EOpSubgroupPartitionedInclusiveMax, + EOpSubgroupPartitionedInclusiveAnd, + EOpSubgroupPartitionedInclusiveOr, + EOpSubgroupPartitionedInclusiveXor, + EOpSubgroupPartitionedExclusiveAdd, + EOpSubgroupPartitionedExclusiveMul, + EOpSubgroupPartitionedExclusiveMin, + EOpSubgroupPartitionedExclusiveMax, + EOpSubgroupPartitionedExclusiveAnd, + EOpSubgroupPartitionedExclusiveOr, + EOpSubgroupPartitionedExclusiveXor, + + EOpSubgroupGuardStop, + + EOpMinInvocations, + EOpMaxInvocations, + EOpAddInvocations, + EOpMinInvocationsNonUniform, + EOpMaxInvocationsNonUniform, + EOpAddInvocationsNonUniform, + EOpMinInvocationsInclusiveScan, + EOpMaxInvocationsInclusiveScan, + EOpAddInvocationsInclusiveScan, + EOpMinInvocationsInclusiveScanNonUniform, + EOpMaxInvocationsInclusiveScanNonUniform, + EOpAddInvocationsInclusiveScanNonUniform, + EOpMinInvocationsExclusiveScan, + EOpMaxInvocationsExclusiveScan, + EOpAddInvocationsExclusiveScan, + EOpMinInvocationsExclusiveScanNonUniform, + EOpMaxInvocationsExclusiveScanNonUniform, + EOpAddInvocationsExclusiveScanNonUniform, + EOpSwizzleInvocations, + EOpSwizzleInvocationsMasked, + EOpWriteInvocation, + EOpMbcnt, + + EOpCubeFaceIndex, + EOpCubeFaceCoord, + EOpTime, + + EOpAtomicAdd, + EOpAtomicSubtract, + EOpAtomicMin, + EOpAtomicMax, + EOpAtomicAnd, + EOpAtomicOr, + EOpAtomicXor, + EOpAtomicExchange, + EOpAtomicCompSwap, + EOpAtomicLoad, + EOpAtomicStore, + + EOpAtomicCounterIncrement, // results in pre-increment value + EOpAtomicCounterDecrement, // results in post-decrement value + EOpAtomicCounter, + EOpAtomicCounterAdd, + EOpAtomicCounterSubtract, + EOpAtomicCounterMin, + EOpAtomicCounterMax, + EOpAtomicCounterAnd, + EOpAtomicCounterOr, + EOpAtomicCounterXor, + EOpAtomicCounterExchange, + EOpAtomicCounterCompSwap, + + EOpAny, + EOpAll, + + EOpCooperativeMatrixLoad, + EOpCooperativeMatrixStore, + EOpCooperativeMatrixMulAdd, + + EOpBeginInvocationInterlock, // Fragment only + EOpEndInvocationInterlock, // Fragment only + + EOpIsHelperInvocation, + + EOpDebugPrintf, + + // + // Branch + // + + EOpKill, // Fragment only + EOpTerminateInvocation, // Fragment only + EOpDemote, // Fragment only + EOpTerminateRayKHR, // Any-hit only + EOpIgnoreIntersectionKHR, // Any-hit only + EOpReturn, + EOpBreak, + EOpContinue, + EOpCase, + EOpDefault, + + // + // Constructors + // + + EOpConstructGuardStart, + EOpConstructInt, // these first scalar forms also identify what implicit conversion is needed + EOpConstructUint, + EOpConstructInt8, + EOpConstructUint8, + EOpConstructInt16, + EOpConstructUint16, + EOpConstructInt64, + EOpConstructUint64, + EOpConstructBool, + EOpConstructFloat, + EOpConstructDouble, + // Keep vector and matrix constructors in a consistent relative order for + // TParseContext::constructBuiltIn, which converts between 8/16/32 bit + // vector constructors + EOpConstructVec2, + EOpConstructVec3, + EOpConstructVec4, + EOpConstructMat2x2, + EOpConstructMat2x3, + EOpConstructMat2x4, + EOpConstructMat3x2, + EOpConstructMat3x3, + EOpConstructMat3x4, + EOpConstructMat4x2, + EOpConstructMat4x3, + EOpConstructMat4x4, + EOpConstructDVec2, + EOpConstructDVec3, + EOpConstructDVec4, + EOpConstructBVec2, + EOpConstructBVec3, + EOpConstructBVec4, + EOpConstructI8Vec2, + EOpConstructI8Vec3, + EOpConstructI8Vec4, + EOpConstructU8Vec2, + EOpConstructU8Vec3, + EOpConstructU8Vec4, + EOpConstructI16Vec2, + EOpConstructI16Vec3, + EOpConstructI16Vec4, + EOpConstructU16Vec2, + EOpConstructU16Vec3, + EOpConstructU16Vec4, + EOpConstructIVec2, + EOpConstructIVec3, + EOpConstructIVec4, + EOpConstructUVec2, + EOpConstructUVec3, + EOpConstructUVec4, + EOpConstructI64Vec2, + EOpConstructI64Vec3, + EOpConstructI64Vec4, + EOpConstructU64Vec2, + EOpConstructU64Vec3, + EOpConstructU64Vec4, + EOpConstructDMat2x2, + EOpConstructDMat2x3, + EOpConstructDMat2x4, + EOpConstructDMat3x2, + EOpConstructDMat3x3, + EOpConstructDMat3x4, + EOpConstructDMat4x2, + EOpConstructDMat4x3, + EOpConstructDMat4x4, + EOpConstructIMat2x2, + EOpConstructIMat2x3, + EOpConstructIMat2x4, + EOpConstructIMat3x2, + EOpConstructIMat3x3, + EOpConstructIMat3x4, + EOpConstructIMat4x2, + EOpConstructIMat4x3, + EOpConstructIMat4x4, + EOpConstructUMat2x2, + EOpConstructUMat2x3, + EOpConstructUMat2x4, + EOpConstructUMat3x2, + EOpConstructUMat3x3, + EOpConstructUMat3x4, + EOpConstructUMat4x2, + EOpConstructUMat4x3, + EOpConstructUMat4x4, + EOpConstructBMat2x2, + EOpConstructBMat2x3, + EOpConstructBMat2x4, + EOpConstructBMat3x2, + EOpConstructBMat3x3, + EOpConstructBMat3x4, + EOpConstructBMat4x2, + EOpConstructBMat4x3, + EOpConstructBMat4x4, + EOpConstructFloat16, + EOpConstructF16Vec2, + EOpConstructF16Vec3, + EOpConstructF16Vec4, + EOpConstructF16Mat2x2, + EOpConstructF16Mat2x3, + EOpConstructF16Mat2x4, + EOpConstructF16Mat3x2, + EOpConstructF16Mat3x3, + EOpConstructF16Mat3x4, + EOpConstructF16Mat4x2, + EOpConstructF16Mat4x3, + EOpConstructF16Mat4x4, + EOpConstructStruct, + EOpConstructTextureSampler, + EOpConstructNonuniform, // expected to be transformed away, not present in final AST + EOpConstructReference, + EOpConstructCooperativeMatrix, + EOpConstructAccStruct, + EOpConstructGuardEnd, + + // + // moves + // + + EOpAssign, + EOpAddAssign, + EOpSubAssign, + EOpMulAssign, + EOpVectorTimesMatrixAssign, + EOpVectorTimesScalarAssign, + EOpMatrixTimesScalarAssign, + EOpMatrixTimesMatrixAssign, + EOpDivAssign, + EOpModAssign, + EOpAndAssign, + EOpInclusiveOrAssign, + EOpExclusiveOrAssign, + EOpLeftShiftAssign, + EOpRightShiftAssign, + + // + // Array operators + // + + // Can apply to arrays, vectors, or matrices. + // Can be decomposed to a constant at compile time, but this does not always happen, + // due to link-time effects. So, consumer can expect either a link-time sized or + // run-time sized array. + EOpArrayLength, + + // + // Image operations + // + + EOpImageGuardBegin, + + EOpImageQuerySize, + EOpImageQuerySamples, + EOpImageLoad, + EOpImageStore, + EOpImageLoadLod, + EOpImageStoreLod, + EOpImageAtomicAdd, + EOpImageAtomicMin, + EOpImageAtomicMax, + EOpImageAtomicAnd, + EOpImageAtomicOr, + EOpImageAtomicXor, + EOpImageAtomicExchange, + EOpImageAtomicCompSwap, + EOpImageAtomicLoad, + EOpImageAtomicStore, + + EOpSubpassLoad, + EOpSubpassLoadMS, + EOpSparseImageLoad, + EOpSparseImageLoadLod, + + EOpImageGuardEnd, + + // + // Texture operations + // + + EOpTextureGuardBegin, + + EOpTextureQuerySize, + EOpTextureQueryLod, + EOpTextureQueryLevels, + EOpTextureQuerySamples, + + EOpSamplingGuardBegin, + + EOpTexture, + EOpTextureProj, + EOpTextureLod, + EOpTextureOffset, + EOpTextureFetch, + EOpTextureFetchOffset, + EOpTextureProjOffset, + EOpTextureLodOffset, + EOpTextureProjLod, + EOpTextureProjLodOffset, + EOpTextureGrad, + EOpTextureGradOffset, + EOpTextureProjGrad, + EOpTextureProjGradOffset, + EOpTextureGather, + EOpTextureGatherOffset, + EOpTextureGatherOffsets, + EOpTextureClamp, + EOpTextureOffsetClamp, + EOpTextureGradClamp, + EOpTextureGradOffsetClamp, + EOpTextureGatherLod, + EOpTextureGatherLodOffset, + EOpTextureGatherLodOffsets, + EOpFragmentMaskFetch, + EOpFragmentFetch, + + EOpSparseTextureGuardBegin, + + EOpSparseTexture, + EOpSparseTextureLod, + EOpSparseTextureOffset, + EOpSparseTextureFetch, + EOpSparseTextureFetchOffset, + EOpSparseTextureLodOffset, + EOpSparseTextureGrad, + EOpSparseTextureGradOffset, + EOpSparseTextureGather, + EOpSparseTextureGatherOffset, + EOpSparseTextureGatherOffsets, + EOpSparseTexelsResident, + EOpSparseTextureClamp, + EOpSparseTextureOffsetClamp, + EOpSparseTextureGradClamp, + EOpSparseTextureGradOffsetClamp, + EOpSparseTextureGatherLod, + EOpSparseTextureGatherLodOffset, + EOpSparseTextureGatherLodOffsets, + + EOpSparseTextureGuardEnd, + + EOpImageFootprintGuardBegin, + EOpImageSampleFootprintNV, + EOpImageSampleFootprintClampNV, + EOpImageSampleFootprintLodNV, + EOpImageSampleFootprintGradNV, + EOpImageSampleFootprintGradClampNV, + EOpImageFootprintGuardEnd, + EOpSamplingGuardEnd, + EOpTextureGuardEnd, + + // + // Integer operations + // + + EOpAddCarry, + EOpSubBorrow, + EOpUMulExtended, + EOpIMulExtended, + EOpBitfieldExtract, + EOpBitfieldInsert, + EOpBitFieldReverse, + EOpBitCount, + EOpFindLSB, + EOpFindMSB, + + EOpCountLeadingZeros, + EOpCountTrailingZeros, + EOpAbsDifference, + EOpAddSaturate, + EOpSubSaturate, + EOpAverage, + EOpAverageRounded, + EOpMul32x16, + + EOpTraceNV, + EOpTraceRayMotionNV, + EOpTraceKHR, + EOpReportIntersection, + EOpIgnoreIntersectionNV, + EOpTerminateRayNV, + EOpExecuteCallableNV, + EOpExecuteCallableKHR, + EOpWritePackedPrimitiveIndices4x8NV, + EOpEmitMeshTasksEXT, + EOpSetMeshOutputsEXT, + + // + // GL_EXT_ray_query operations + // + + EOpRayQueryInitialize, + EOpRayQueryTerminate, + EOpRayQueryGenerateIntersection, + EOpRayQueryConfirmIntersection, + EOpRayQueryProceed, + EOpRayQueryGetIntersectionType, + EOpRayQueryGetRayTMin, + EOpRayQueryGetRayFlags, + EOpRayQueryGetIntersectionT, + EOpRayQueryGetIntersectionInstanceCustomIndex, + EOpRayQueryGetIntersectionInstanceId, + EOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffset, + EOpRayQueryGetIntersectionGeometryIndex, + EOpRayQueryGetIntersectionPrimitiveIndex, + EOpRayQueryGetIntersectionBarycentrics, + EOpRayQueryGetIntersectionFrontFace, + EOpRayQueryGetIntersectionCandidateAABBOpaque, + EOpRayQueryGetIntersectionObjectRayDirection, + EOpRayQueryGetIntersectionObjectRayOrigin, + EOpRayQueryGetWorldRayDirection, + EOpRayQueryGetWorldRayOrigin, + EOpRayQueryGetIntersectionObjectToWorld, + EOpRayQueryGetIntersectionWorldToObject, + + // + // HLSL operations + // + + EOpClip, // discard if input value < 0 + EOpIsFinite, + EOpLog10, // base 10 log + EOpRcp, // 1/x + EOpSaturate, // clamp from 0 to 1 + EOpSinCos, // sin and cos in out parameters + EOpGenMul, // mul(x,y) on any of mat/vec/scalars + EOpDst, // x = 1, y=src0.y * src1.y, z=src0.z, w=src1.w + EOpInterlockedAdd, // atomic ops, but uses [optional] out arg instead of return + EOpInterlockedAnd, // ... + EOpInterlockedCompareExchange, // ... + EOpInterlockedCompareStore, // ... + EOpInterlockedExchange, // ... + EOpInterlockedMax, // ... + EOpInterlockedMin, // ... + EOpInterlockedOr, // ... + EOpInterlockedXor, // ... + EOpAllMemoryBarrierWithGroupSync, // memory barriers without non-hlsl AST equivalents + EOpDeviceMemoryBarrier, // ... + EOpDeviceMemoryBarrierWithGroupSync, // ... + EOpWorkgroupMemoryBarrier, // ... + EOpWorkgroupMemoryBarrierWithGroupSync, // ... + EOpEvaluateAttributeSnapped, // InterpolateAtOffset with int position on 16x16 grid + EOpF32tof16, // HLSL conversion: half of a PackHalf2x16 + EOpF16tof32, // HLSL conversion: half of an UnpackHalf2x16 + EOpLit, // HLSL lighting coefficient vector + EOpTextureBias, // HLSL texture bias: will be lowered to EOpTexture + EOpAsDouble, // slightly different from EOpUint64BitsToDouble + EOpD3DCOLORtoUBYTE4, // convert and swizzle 4-component color to UBYTE4 range + + EOpMethodSample, // Texture object methods. These are translated to existing + EOpMethodSampleBias, // AST methods, and exist to represent HLSL semantics until that + EOpMethodSampleCmp, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodSampleCmpLevelZero, // ... + EOpMethodSampleGrad, // ... + EOpMethodSampleLevel, // ... + EOpMethodLoad, // ... + EOpMethodGetDimensions, // ... + EOpMethodGetSamplePosition, // ... + EOpMethodGather, // ... + EOpMethodCalculateLevelOfDetail, // ... + EOpMethodCalculateLevelOfDetailUnclamped, // ... + + // Load already defined above for textures + EOpMethodLoad2, // Structure buffer object methods. These are translated to existing + EOpMethodLoad3, // AST methods, and exist to represent HLSL semantics until that + EOpMethodLoad4, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodStore, // ... + EOpMethodStore2, // ... + EOpMethodStore3, // ... + EOpMethodStore4, // ... + EOpMethodIncrementCounter, // ... + EOpMethodDecrementCounter, // ... + // EOpMethodAppend is defined for geo shaders below + EOpMethodConsume, + + // SM5 texture methods + EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about + EOpMethodGatherGreen, // translation to existing AST opcodes. They exist temporarily + EOpMethodGatherBlue, // because HLSL arguments are slightly different. + EOpMethodGatherAlpha, // ... + EOpMethodGatherCmp, // ... + EOpMethodGatherCmpRed, // ... + EOpMethodGatherCmpGreen, // ... + EOpMethodGatherCmpBlue, // ... + EOpMethodGatherCmpAlpha, // ... + + // geometry methods + EOpMethodAppend, // Geometry shader methods + EOpMethodRestartStrip, // ... + + // matrix + EOpMatrixSwizzle, // select multiple matrix components (non-column) + + // SM6 wave ops + EOpWaveGetLaneCount, // Will decompose to gl_SubgroupSize. + EOpWaveGetLaneIndex, // Will decompose to gl_SubgroupInvocationID. + EOpWaveActiveCountBits, // Will decompose to subgroupBallotBitCount(subgroupBallot()). + EOpWavePrefixCountBits, // Will decompose to subgroupBallotInclusiveBitCount(subgroupBallot()). + + // Shader Clock Ops + EOpReadClockSubgroupKHR, + EOpReadClockDeviceKHR, +}; + +class TIntermTraverser; +class TIntermOperator; +class TIntermAggregate; +class TIntermUnary; +class TIntermBinary; +class TIntermConstantUnion; +class TIntermSelection; +class TIntermSwitch; +class TIntermBranch; +class TIntermTyped; +class TIntermMethod; +class TIntermSymbol; +class TIntermLoop; + +} // end namespace glslang + +// +// Base class for the tree nodes +// +// (Put outside the glslang namespace, as it's used as part of the external interface.) +// +class TIntermNode { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + + TIntermNode() { loc.init(); } + virtual const glslang::TSourceLoc& getLoc() const { return loc; } + virtual void setLoc(const glslang::TSourceLoc& l) { loc = l; } + virtual void traverse(glslang::TIntermTraverser*) = 0; + virtual glslang::TIntermTyped* getAsTyped() { return 0; } + virtual glslang::TIntermOperator* getAsOperator() { return 0; } + virtual glslang::TIntermConstantUnion* getAsConstantUnion() { return 0; } + virtual glslang::TIntermAggregate* getAsAggregate() { return 0; } + virtual glslang::TIntermUnary* getAsUnaryNode() { return 0; } + virtual glslang::TIntermBinary* getAsBinaryNode() { return 0; } + virtual glslang::TIntermSelection* getAsSelectionNode() { return 0; } + virtual glslang::TIntermSwitch* getAsSwitchNode() { return 0; } + virtual glslang::TIntermMethod* getAsMethodNode() { return 0; } + virtual glslang::TIntermSymbol* getAsSymbolNode() { return 0; } + virtual glslang::TIntermBranch* getAsBranchNode() { return 0; } + virtual glslang::TIntermLoop* getAsLoopNode() { return 0; } + + virtual const glslang::TIntermTyped* getAsTyped() const { return 0; } + virtual const glslang::TIntermOperator* getAsOperator() const { return 0; } + virtual const glslang::TIntermConstantUnion* getAsConstantUnion() const { return 0; } + virtual const glslang::TIntermAggregate* getAsAggregate() const { return 0; } + virtual const glslang::TIntermUnary* getAsUnaryNode() const { return 0; } + virtual const glslang::TIntermBinary* getAsBinaryNode() const { return 0; } + virtual const glslang::TIntermSelection* getAsSelectionNode() const { return 0; } + virtual const glslang::TIntermSwitch* getAsSwitchNode() const { return 0; } + virtual const glslang::TIntermMethod* getAsMethodNode() const { return 0; } + virtual const glslang::TIntermSymbol* getAsSymbolNode() const { return 0; } + virtual const glslang::TIntermBranch* getAsBranchNode() const { return 0; } + virtual const glslang::TIntermLoop* getAsLoopNode() const { return 0; } + virtual ~TIntermNode() { } + +protected: + TIntermNode(const TIntermNode&); + TIntermNode& operator=(const TIntermNode&); + glslang::TSourceLoc loc; +}; + +namespace glslang { + +// +// This is just to help yacc. +// +struct TIntermNodePair { + TIntermNode* node1; + TIntermNode* node2; +}; + +// +// Intermediate class for nodes that have a type. +// +class TIntermTyped : public TIntermNode { +public: + TIntermTyped(const TType& t) { type.shallowCopy(t); } + TIntermTyped(TBasicType basicType) { TType bt(basicType); type.shallowCopy(bt); } + virtual TIntermTyped* getAsTyped() { return this; } + virtual const TIntermTyped* getAsTyped() const { return this; } + virtual void setType(const TType& t) { type.shallowCopy(t); } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { return type; } + + virtual TBasicType getBasicType() const { return type.getBasicType(); } + virtual TQualifier& getQualifier() { return type.getQualifier(); } + virtual const TQualifier& getQualifier() const { return type.getQualifier(); } + virtual TArraySizes* getArraySizes() { return type.getArraySizes(); } + virtual const TArraySizes* getArraySizes() const { return type.getArraySizes(); } + virtual void propagatePrecision(TPrecisionQualifier); + virtual int getVectorSize() const { return type.getVectorSize(); } + virtual int getMatrixCols() const { return type.getMatrixCols(); } + virtual int getMatrixRows() const { return type.getMatrixRows(); } + virtual bool isMatrix() const { return type.isMatrix(); } + virtual bool isArray() const { return type.isArray(); } + virtual bool isVector() const { return type.isVector(); } + virtual bool isScalar() const { return type.isScalar(); } + virtual bool isStruct() const { return type.isStruct(); } + virtual bool isFloatingDomain() const { return type.isFloatingDomain(); } + virtual bool isIntegerDomain() const { return type.isIntegerDomain(); } + bool isAtomic() const { return type.isAtomic(); } + bool isReference() const { return type.isReference(); } + TString getCompleteString(bool enhanced = false) const { return type.getCompleteString(enhanced); } + +protected: + TIntermTyped& operator=(const TIntermTyped&); + TType type; +}; + +// +// Handle for, do-while, and while loops. +// +class TIntermLoop : public TIntermNode { +public: + TIntermLoop(TIntermNode* aBody, TIntermTyped* aTest, TIntermTyped* aTerminal, bool testFirst) : + body(aBody), + test(aTest), + terminal(aTerminal), + first(testFirst), + unroll(false), + dontUnroll(false), + dependency(0), + minIterations(0), + maxIterations(iterationsInfinite), + iterationMultiple(1), + peelCount(0), + partialCount(0) + { } + + virtual TIntermLoop* getAsLoopNode() { return this; } + virtual const TIntermLoop* getAsLoopNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TIntermNode* getBody() const { return body; } + TIntermTyped* getTest() const { return test; } + TIntermTyped* getTerminal() const { return terminal; } + bool testFirst() const { return first; } + + void setUnroll() { unroll = true; } + void setDontUnroll() { + dontUnroll = true; + peelCount = 0; + partialCount = 0; + } + bool getUnroll() const { return unroll; } + bool getDontUnroll() const { return dontUnroll; } + + static const unsigned int dependencyInfinite = 0xFFFFFFFF; + static const unsigned int iterationsInfinite = 0xFFFFFFFF; + void setLoopDependency(int d) { dependency = d; } + int getLoopDependency() const { return dependency; } + + void setMinIterations(unsigned int v) { minIterations = v; } + unsigned int getMinIterations() const { return minIterations; } + void setMaxIterations(unsigned int v) { maxIterations = v; } + unsigned int getMaxIterations() const { return maxIterations; } + void setIterationMultiple(unsigned int v) { iterationMultiple = v; } + unsigned int getIterationMultiple() const { return iterationMultiple; } + void setPeelCount(unsigned int v) { + peelCount = v; + dontUnroll = false; + } + unsigned int getPeelCount() const { return peelCount; } + void setPartialCount(unsigned int v) { + partialCount = v; + dontUnroll = false; + } + unsigned int getPartialCount() const { return partialCount; } + +protected: + TIntermNode* body; // code to loop over + TIntermTyped* test; // exit condition associated with loop, could be 0 for 'for' loops + TIntermTyped* terminal; // exists for for-loops + bool first; // true for while and for, not for do-while + bool unroll; // true if unroll requested + bool dontUnroll; // true if request to not unroll + unsigned int dependency; // loop dependency hint; 0 means not set or unknown + unsigned int minIterations; // as per the SPIR-V specification + unsigned int maxIterations; // as per the SPIR-V specification + unsigned int iterationMultiple; // as per the SPIR-V specification + unsigned int peelCount; // as per the SPIR-V specification + unsigned int partialCount; // as per the SPIR-V specification +}; + +// +// Handle case, break, continue, return, and kill. +// +class TIntermBranch : public TIntermNode { +public: + TIntermBranch(TOperator op, TIntermTyped* e) : + flowOp(op), + expression(e) { } + virtual TIntermBranch* getAsBranchNode() { return this; } + virtual const TIntermBranch* getAsBranchNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TOperator getFlowOp() const { return flowOp; } + TIntermTyped* getExpression() const { return expression; } + void setExpression(TIntermTyped* pExpression) { expression = pExpression; } + void updatePrecision(TPrecisionQualifier parentPrecision); +protected: + TOperator flowOp; + TIntermTyped* expression; +}; + +// +// Represent method names before seeing their calling signature +// or resolving them to operations. Just an expression as the base object +// and a textural name. +// +class TIntermMethod : public TIntermTyped { +public: + TIntermMethod(TIntermTyped* o, const TType& t, const TString& m) : TIntermTyped(t), object(o), method(m) { } + virtual TIntermMethod* getAsMethodNode() { return this; } + virtual const TIntermMethod* getAsMethodNode() const { return this; } + virtual const TString& getMethodName() const { return method; } + virtual TIntermTyped* getObject() const { return object; } + virtual void traverse(TIntermTraverser*); +protected: + TIntermTyped* object; + TString method; +}; + +// +// Nodes that correspond to symbols or constants in the source code. +// +class TIntermSymbol : public TIntermTyped { +public: + // if symbol is initialized as symbol(sym), the memory comes from the pool allocator of sym. If sym comes from + // per process threadPoolAllocator, then it causes increased memory usage per compile + // it is essential to use "symbol = sym" to assign to symbol + TIntermSymbol(long long i, const TString& n, const TType& t) + : TIntermTyped(t), id(i), +#ifndef GLSLANG_WEB + flattenSubset(-1), +#endif + constSubtree(nullptr) + { name = n; } + virtual long long getId() const { return id; } + virtual void changeId(long long i) { id = i; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual TIntermSymbol* getAsSymbolNode() { return this; } + virtual const TIntermSymbol* getAsSymbolNode() const { return this; } + void setConstArray(const TConstUnionArray& c) { constArray = c; } + const TConstUnionArray& getConstArray() const { return constArray; } + void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + TIntermTyped* getConstSubtree() const { return constSubtree; } +#ifndef GLSLANG_WEB + void setFlattenSubset(int subset) { flattenSubset = subset; } + virtual const TString& getAccessName() const; + + int getFlattenSubset() const { return flattenSubset; } // -1 means full object +#endif + + // This is meant for cases where a node has already been constructed, and + // later on, it becomes necessary to switch to a different symbol. + virtual void switchId(long long newId) { id = newId; } + +protected: + long long id; // the unique id of the symbol this node represents +#ifndef GLSLANG_WEB + int flattenSubset; // how deeply the flattened object rooted at id has been dereferenced +#endif + TString name; // the name of the symbol this node represents + TConstUnionArray constArray; // if the symbol is a front-end compile-time constant, this is its value + TIntermTyped* constSubtree; +}; + +class TIntermConstantUnion : public TIntermTyped { +public: + TIntermConstantUnion(const TConstUnionArray& ua, const TType& t) : TIntermTyped(t), constArray(ua), literal(false) { } + const TConstUnionArray& getConstArray() const { return constArray; } + virtual TIntermConstantUnion* getAsConstantUnion() { return this; } + virtual const TIntermConstantUnion* getAsConstantUnion() const { return this; } + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* fold(TOperator, const TIntermTyped*) const; + virtual TIntermTyped* fold(TOperator, const TType&) const; + void setLiteral() { literal = true; } + void setExpression() { literal = false; } + bool isLiteral() const { return literal; } + +protected: + TIntermConstantUnion& operator=(const TIntermConstantUnion&); + + const TConstUnionArray constArray; + bool literal; // true if node represents a literal in the source code +}; + +// Represent the independent aspects of a texturing TOperator +struct TCrackedTextureOp { + bool query; + bool proj; + bool lod; + bool fetch; + bool offset; + bool offsets; + bool gather; + bool grad; + bool subpass; + bool lodClamp; + bool fragMask; +}; + +// +// Intermediate class for node types that hold operators. +// +class TIntermOperator : public TIntermTyped { +public: + virtual TIntermOperator* getAsOperator() { return this; } + virtual const TIntermOperator* getAsOperator() const { return this; } + TOperator getOp() const { return op; } + void setOp(TOperator newOp) { op = newOp; } + bool modifiesState() const; + bool isConstructor() const; + bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; } + bool isSampling() const { return op > EOpSamplingGuardBegin && op < EOpSamplingGuardEnd; } +#ifdef GLSLANG_WEB + bool isImage() const { return false; } + bool isSparseTexture() const { return false; } + bool isImageFootprint() const { return false; } + bool isSparseImage() const { return false; } + bool isSubgroup() const { return false; } +#else + bool isImage() const { return op > EOpImageGuardBegin && op < EOpImageGuardEnd; } + bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; } + bool isImageFootprint() const { return op > EOpImageFootprintGuardBegin && op < EOpImageFootprintGuardEnd; } + bool isSparseImage() const { return op == EOpSparseImageLoad; } + bool isSubgroup() const { return op > EOpSubgroupGuardStart && op < EOpSubgroupGuardStop; } +#endif + + void setOperationPrecision(TPrecisionQualifier p) { operationPrecision = p; } + TPrecisionQualifier getOperationPrecision() const { return operationPrecision != EpqNone ? + operationPrecision : + type.getQualifier().precision; } + TString getCompleteString() const + { + TString cs = type.getCompleteString(); + if (getOperationPrecision() != type.getQualifier().precision) { + cs += ", operation at "; + cs += GetPrecisionQualifierString(getOperationPrecision()); + } + + return cs; + } + + // Crack the op into the individual dimensions of texturing operation. + void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const + { + cracked.query = false; + cracked.proj = false; + cracked.lod = false; + cracked.fetch = false; + cracked.offset = false; + cracked.offsets = false; + cracked.gather = false; + cracked.grad = false; + cracked.subpass = false; + cracked.lodClamp = false; + cracked.fragMask = false; + + switch (op) { + case EOpImageQuerySize: + case EOpImageQuerySamples: + case EOpTextureQuerySize: + case EOpTextureQueryLod: + case EOpTextureQueryLevels: + case EOpTextureQuerySamples: + case EOpSparseTexelsResident: + cracked.query = true; + break; + case EOpTexture: + case EOpSparseTexture: + break; + case EOpTextureProj: + cracked.proj = true; + break; + case EOpTextureLod: + case EOpSparseTextureLod: + cracked.lod = true; + break; + case EOpTextureOffset: + case EOpSparseTextureOffset: + cracked.offset = true; + break; + case EOpTextureFetch: + case EOpSparseTextureFetch: + cracked.fetch = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureFetchOffset: + case EOpSparseTextureFetchOffset: + cracked.fetch = true; + cracked.offset = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureProjOffset: + cracked.offset = true; + cracked.proj = true; + break; + case EOpTextureLodOffset: + case EOpSparseTextureLodOffset: + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureProjLod: + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureProjLodOffset: + cracked.offset = true; + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureGrad: + case EOpSparseTextureGrad: + cracked.grad = true; + break; + case EOpTextureGradOffset: + case EOpSparseTextureGradOffset: + cracked.grad = true; + cracked.offset = true; + break; + case EOpTextureProjGrad: + cracked.grad = true; + cracked.proj = true; + break; + case EOpTextureProjGradOffset: + cracked.grad = true; + cracked.offset = true; + cracked.proj = true; + break; +#ifndef GLSLANG_WEB + case EOpTextureClamp: + case EOpSparseTextureClamp: + cracked.lodClamp = true; + break; + case EOpTextureOffsetClamp: + case EOpSparseTextureOffsetClamp: + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGradClamp: + case EOpSparseTextureGradClamp: + cracked.grad = true; + cracked.lodClamp = true; + break; + case EOpTextureGradOffsetClamp: + case EOpSparseTextureGradOffsetClamp: + cracked.grad = true; + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGather: + case EOpSparseTextureGather: + cracked.gather = true; + break; + case EOpTextureGatherOffset: + case EOpSparseTextureGatherOffset: + cracked.gather = true; + cracked.offset = true; + break; + case EOpTextureGatherOffsets: + case EOpSparseTextureGatherOffsets: + cracked.gather = true; + cracked.offsets = true; + break; + case EOpTextureGatherLod: + case EOpSparseTextureGatherLod: + cracked.gather = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffset: + case EOpSparseTextureGatherLodOffset: + cracked.gather = true; + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffsets: + case EOpSparseTextureGatherLodOffsets: + cracked.gather = true; + cracked.offsets = true; + cracked.lod = true; + break; + case EOpImageLoadLod: + case EOpImageStoreLod: + case EOpSparseImageLoadLod: + cracked.lod = true; + break; + case EOpFragmentMaskFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpFragmentFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpImageSampleFootprintNV: + break; + case EOpImageSampleFootprintClampNV: + cracked.lodClamp = true; + break; + case EOpImageSampleFootprintLodNV: + cracked.lod = true; + break; + case EOpImageSampleFootprintGradNV: + cracked.grad = true; + break; + case EOpImageSampleFootprintGradClampNV: + cracked.lodClamp = true; + cracked.grad = true; + break; + case EOpSubpassLoad: + case EOpSubpassLoadMS: + cracked.subpass = true; + break; +#endif + default: + break; + } + } + +protected: + TIntermOperator(TOperator o) : TIntermTyped(EbtFloat), op(o), operationPrecision(EpqNone) {} + TIntermOperator(TOperator o, TType& t) : TIntermTyped(t), op(o), operationPrecision(EpqNone) {} + TOperator op; + // The result precision is in the inherited TType, and is usually meant to be both + // the operation precision and the result precision. However, some more complex things, + // like built-in function calls, distinguish between the two, in which case non-EqpNone + // 'operationPrecision' overrides the result precision as far as operation precision + // is concerned. + TPrecisionQualifier operationPrecision; +}; + +// +// Nodes for all the basic binary math operators. +// +class TIntermBinary : public TIntermOperator { +public: + TIntermBinary(TOperator o) : TIntermOperator(o) {} + virtual void traverse(TIntermTraverser*); + virtual void setLeft(TIntermTyped* n) { left = n; } + virtual void setRight(TIntermTyped* n) { right = n; } + virtual TIntermTyped* getLeft() const { return left; } + virtual TIntermTyped* getRight() const { return right; } + virtual TIntermBinary* getAsBinaryNode() { return this; } + virtual const TIntermBinary* getAsBinaryNode() const { return this; } + virtual void updatePrecision(); +protected: + TIntermTyped* left; + TIntermTyped* right; +}; + +// +// Nodes for unary math operators. +// +class TIntermUnary : public TIntermOperator { +public: + TIntermUnary(TOperator o, TType& t) : TIntermOperator(o, t), operand(0) {} + TIntermUnary(TOperator o) : TIntermOperator(o), operand(0) {} + virtual void traverse(TIntermTraverser*); + virtual void setOperand(TIntermTyped* o) { operand = o; } + virtual TIntermTyped* getOperand() { return operand; } + virtual const TIntermTyped* getOperand() const { return operand; } + virtual TIntermUnary* getAsUnaryNode() { return this; } + virtual const TIntermUnary* getAsUnaryNode() const { return this; } + virtual void updatePrecision(); +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermTyped* operand; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +typedef TVector TIntermSequence; +typedef TVector TQualifierList; +// +// Nodes that operate on an arbitrary sized set of children. +// +class TIntermAggregate : public TIntermOperator { +public: + TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(nullptr) { } + TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(nullptr) { } + ~TIntermAggregate() { delete pragmaTable; } + virtual TIntermAggregate* getAsAggregate() { return this; } + virtual const TIntermAggregate* getAsAggregate() const { return this; } + virtual void updatePrecision(); + virtual void setOperator(TOperator o) { op = o; } + virtual TIntermSequence& getSequence() { return sequence; } + virtual const TIntermSequence& getSequence() const { return sequence; } + virtual void setName(const TString& n) { name = n; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual void setUserDefined() { userDefined = true; } + virtual bool isUserDefined() { return userDefined; } + virtual TQualifierList& getQualifierList() { return qualifier; } + virtual const TQualifierList& getQualifierList() const { return qualifier; } + void setOptimize(bool o) { optimize = o; } + void setDebug(bool d) { debug = d; } + bool getOptimize() const { return optimize; } + bool getDebug() const { return debug; } + void setPragmaTable(const TPragmaTable& pTable); + const TPragmaTable& getPragmaTable() const { return *pragmaTable; } +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermAggregate(const TIntermAggregate&); // disallow copy constructor + TIntermAggregate& operator=(const TIntermAggregate&); // disallow assignment operator + TIntermSequence sequence; + TQualifierList qualifier; + TString name; + bool userDefined; // used for user defined function names + bool optimize; + bool debug; + TPragmaTable* pragmaTable; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +// +// For if tests. +// +class TIntermSelection : public TIntermTyped { +public: + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB) : + TIntermTyped(EbtVoid), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB, const TType& type) : + TIntermTyped(type), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* getCondition() const { return condition; } + virtual void setCondition(TIntermTyped* c) { condition = c; } + virtual TIntermNode* getTrueBlock() const { return trueBlock; } + virtual void setTrueBlock(TIntermTyped* tb) { trueBlock = tb; } + virtual TIntermNode* getFalseBlock() const { return falseBlock; } + virtual void setFalseBlock(TIntermTyped* fb) { falseBlock = fb; } + virtual TIntermSelection* getAsSelectionNode() { return this; } + virtual const TIntermSelection* getAsSelectionNode() const { return this; } + + void setNoShortCircuit() { shortCircuit = false; } + bool getShortCircuit() const { return shortCircuit; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermNode* trueBlock; + TIntermNode* falseBlock; + bool shortCircuit; // normally all if-then-else and all GLSL ?: short-circuit, but HLSL ?: does not + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +// +// For switch statements. Designed use is that a switch will have sequence of nodes +// that are either case/default nodes or a *single* node that represents all the code +// in between (if any) consecutive case/defaults. So, a traversal need only deal with +// 0 or 1 nodes per case/default statement. +// +class TIntermSwitch : public TIntermNode { +public: + TIntermSwitch(TIntermTyped* cond, TIntermAggregate* b) : condition(cond), body(b), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermNode* getCondition() const { return condition; } + virtual TIntermAggregate* getBody() const { return body; } + virtual TIntermSwitch* getAsSwitchNode() { return this; } + virtual const TIntermSwitch* getAsSwitchNode() const { return this; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermAggregate* body; + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +enum TVisit +{ + EvPreVisit, + EvInVisit, + EvPostVisit +}; + +// +// For traversing the tree. User should derive from this, +// put their traversal specific data in it, and then pass +// it to a Traverse method. +// +// When using this, just fill in the methods for nodes you want visited. +// Return false from a pre-visit to skip visiting that node's subtree. +// +// Explicitly set postVisit to true if you want post visiting, otherwise, +// filled in methods will only be called at pre-visit time (before processing +// the subtree). Similarly for inVisit for in-order visiting of nodes with +// multiple children. +// +// If you only want post-visits, explicitly turn off preVisit (and inVisit) +// and turn on postVisit. +// +// In general, for the visit*() methods, return true from interior nodes +// to have the traversal continue on to children. +// +// If you process children yourself, or don't want them processed, return false. +// +class TIntermTraverser { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + TIntermTraverser(bool preVisit = true, bool inVisit = false, bool postVisit = false, bool rightToLeft = false) : + preVisit(preVisit), + inVisit(inVisit), + postVisit(postVisit), + rightToLeft(rightToLeft), + depth(0), + maxDepth(0) { } + virtual ~TIntermTraverser() { } + + virtual void visitSymbol(TIntermSymbol*) { } + virtual void visitConstantUnion(TIntermConstantUnion*) { } + virtual bool visitBinary(TVisit, TIntermBinary*) { return true; } + virtual bool visitUnary(TVisit, TIntermUnary*) { return true; } + virtual bool visitSelection(TVisit, TIntermSelection*) { return true; } + virtual bool visitAggregate(TVisit, TIntermAggregate*) { return true; } + virtual bool visitLoop(TVisit, TIntermLoop*) { return true; } + virtual bool visitBranch(TVisit, TIntermBranch*) { return true; } + virtual bool visitSwitch(TVisit, TIntermSwitch*) { return true; } + + int getMaxDepth() const { return maxDepth; } + + void incrementDepth(TIntermNode *current) + { + depth++; + maxDepth = (std::max)(maxDepth, depth); + path.push_back(current); + } + + void decrementDepth() + { + depth--; + path.pop_back(); + } + + TIntermNode *getParentNode() + { + return path.size() == 0 ? NULL : path.back(); + } + + const bool preVisit; + const bool inVisit; + const bool postVisit; + const bool rightToLeft; + +protected: + TIntermTraverser& operator=(TIntermTraverser&); + + int depth; + int maxDepth; + + // All the nodes from root to the current node's parent during traversing. + TVector path; +}; + +// KHR_vulkan_glsl says "Two arrays sized with specialization constants are the same type only if +// sized with the same symbol, involving no operations" +inline bool SameSpecializationConstants(TIntermTyped* node1, TIntermTyped* node2) +{ + return node1->getAsSymbolNode() && node2->getAsSymbolNode() && + node1->getAsSymbolNode()->getId() == node2->getAsSymbolNode()->getId(); +} + +} // end namespace glslang + +#endif // __INTERMEDIATE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Initialize.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Initialize.h new file mode 100644 index 0000000..ac8ec33 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Initialize.h @@ -0,0 +1,112 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INITIALIZE_INCLUDED_ +#define _INITIALIZE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../Include/Common.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "Versions.h" + +namespace glslang { + +// +// This is made to hold parseable strings for almost all the built-in +// functions and variables for one specific combination of version +// and profile. (Some still need to be added programmatically.) +// This is a base class for language-specific derivations, which +// can be used for language independent builtins. +// +// The strings are organized by +// commonBuiltins: intersection of all stages' built-ins, processed just once +// stageBuiltins[]: anything a stage needs that's not in commonBuiltins +// +class TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltInParseables(); + virtual ~TBuiltInParseables(); + virtual void initialize(int version, EProfile, const SpvVersion& spvVersion) = 0; + virtual void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage) = 0; + virtual const TString& getCommonString() const { return commonBuiltins; } + virtual const TString& getStageString(EShLanguage language) const { return stageBuiltins[language]; } + + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable) = 0; + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources) = 0; + +protected: + TString commonBuiltins; + TString stageBuiltins[EShLangCount]; +}; + +// +// This is a GLSL specific derivation of TBuiltInParseables. To present a stable +// interface and match other similar code, it is called TBuiltIns, rather +// than TBuiltInParseablesGlsl. +// +class TBuiltIns : public TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltIns(); + virtual ~TBuiltIns(); + void initialize(int version, EProfile, const SpvVersion& spvVersion); + void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage); + + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable); + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources); + +protected: + void addTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion); + void relateTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage, TSymbolTable&); + void add2ndGenerationSamplingImaging(int version, EProfile profile, const SpvVersion& spvVersion); + void addSubpassSampling(TSampler, const TString& typeName, int version, EProfile profile); + void addQueryFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addImageFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addSamplingFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addGatherFunctions(TSampler, const TString& typeName, int version, EProfile profile); + + // Helpers for making textual representations of the permutations + // of texturing/imaging functions. + const char* postfixes[5]; + const char* prefixes[EbtNumTypes]; + int dimMap[EsdNumDims]; +}; + +} // end namespace glslang + +#endif // _INITIALIZE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/LiveTraverser.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/LiveTraverser.h new file mode 100644 index 0000000..9b39b59 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/LiveTraverser.h @@ -0,0 +1,168 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#include "../Include/Common.h" +#include "reflection.h" +#include "localintermediate.h" + +#include "gl_types.h" + +#include +#include + +namespace glslang { + +// +// The traverser: mostly pass through, except +// - processing function-call nodes to push live functions onto the stack of functions to process +// - processing selection nodes to trim semantically dead code +// +// This is in the glslang namespace directly so it can be a friend of TReflection. +// This can be derived from to implement reflection database traversers or +// binding mappers: anything that wants to traverse the live subset of the tree. +// + +class TLiveTraverser : public TIntermTraverser { +public: + TLiveTraverser(const TIntermediate& i, bool traverseAll = false, + bool preVisit = true, bool inVisit = false, bool postVisit = false) : + TIntermTraverser(preVisit, inVisit, postVisit), + intermediate(i), traverseAll(traverseAll) + { } + + // + // Given a function name, find its subroot in the tree, and push it onto the stack of + // functions left to process. + // + void pushFunction(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpFunction && candidate->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + + void pushGlobalReference(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpSequence && + candidate->getSequence().size() == 1 && + candidate->getSequence()[0]->getAsBinaryNode()) { + TIntermBinary* binary = candidate->getSequence()[0]->getAsBinaryNode(); + TIntermSymbol* symbol = binary->getLeft()->getAsSymbolNode(); + if (symbol && symbol->getQualifier().storage == EvqGlobal && + symbol->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + } + + typedef std::list TDestinationStack; + TDestinationStack destinations; + +protected: + // To catch which function calls are not dead, and hence which functions must be visited. + virtual bool visitAggregate(TVisit, TIntermAggregate* node) + { + if (!traverseAll) + if (node->getOp() == EOpFunctionCall) + addFunctionCall(node); + + return true; // traverse this subtree + } + + // To prune semantically dead paths. + virtual bool visitSelection(TVisit /* visit */, TIntermSelection* node) + { + if (traverseAll) + return true; // traverse all code + + TIntermConstantUnion* constant = node->getCondition()->getAsConstantUnion(); + if (constant) { + // cull the path that is dead + if (constant->getConstArray()[0].getBConst() == true && node->getTrueBlock()) + node->getTrueBlock()->traverse(this); + if (constant->getConstArray()[0].getBConst() == false && node->getFalseBlock()) + node->getFalseBlock()->traverse(this); + + return false; // don't traverse any more, we did it all above + } else + return true; // traverse the whole subtree + } + + // Track live functions as well as uniforms, so that we don't visit dead functions + // and only visit each function once. + void addFunctionCall(TIntermAggregate* call) + { + // just use the map to ensure we process each function at most once + if (liveFunctions.find(call->getName()) == liveFunctions.end()) { + liveFunctions.insert(call->getName()); + pushFunction(call->getName()); + } + } + + void addGlobalReference(const TString& name) + { + // just use the map to ensure we process each global at most once + if (liveGlobals.find(name) == liveGlobals.end()) { + liveGlobals.insert(name); + pushGlobalReference(name); + } + } + + const TIntermediate& intermediate; + typedef std::unordered_set TLiveFunctions; + TLiveFunctions liveFunctions; + typedef std::unordered_set TLiveGlobals; + TLiveGlobals liveGlobals; + bool traverseAll; + +private: + // prevent copy & copy construct + TLiveTraverser(TLiveTraverser&); + TLiveTraverser& operator=(TLiveTraverser&); +}; + +} // namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/ParseHelper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/ParseHelper.h new file mode 100644 index 0000000..885fd90 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/ParseHelper.h @@ -0,0 +1,588 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This header defines a two-level parse-helper hierarchy, derived from +// TParseVersions: +// - TParseContextBase: sharable across multiple parsers +// - TParseContext: GLSL specific helper +// + +#ifndef _PARSER_HELPER_INCLUDED_ +#define _PARSER_HELPER_INCLUDED_ + +#include +#include + +#include "parseVersions.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "localintermediate.h" +#include "Scan.h" +#include "attribute.h" + +namespace glslang { + +struct TPragma { + TPragma(bool o, bool d) : optimize(o), debug(d) { } + bool optimize; + bool debug; + TPragmaTable pragmaTable; +}; + +class TScanContext; +class TPpContext; + +typedef std::set TIdSetType; +typedef std::map> TStructRecord; + +// +// Sharable code (as well as what's in TParseVersions) across +// parse helpers. +// +class TParseContextBase : public TParseVersions { +public: + TParseContextBase(TSymbolTable& symbolTable, TIntermediate& interm, bool parsingBuiltins, int version, + EProfile profile, const SpvVersion& spvVersion, EShLanguage language, + TInfoSink& infoSink, bool forwardCompatible, EShMessages messages, + const TString* entryPoint = nullptr) + : TParseVersions(interm, version, profile, spvVersion, language, infoSink, forwardCompatible, messages), + scopeMangler("::"), + symbolTable(symbolTable), + statementNestingLevel(0), loopNestingLevel(0), structNestingLevel(0), blockNestingLevel(0), controlFlowNestingLevel(0), + currentFunctionType(nullptr), + postEntryPointReturn(false), + contextPragma(true, false), + beginInvocationInterlockCount(0), endInvocationInterlockCount(0), + parsingBuiltins(parsingBuiltins), scanContext(nullptr), ppContext(nullptr), + limits(resources.limits), + globalUniformBlock(nullptr), + globalUniformBinding(TQualifier::layoutBindingEnd), + globalUniformSet(TQualifier::layoutSetEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) + { + if (entryPoint != nullptr) + sourceEntryPointName = *entryPoint; + } + virtual ~TParseContextBase() { } + +#if !defined(GLSLANG_WEB) || defined(GLSLANG_WEB_DEVEL) + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); +#endif + + virtual void setLimits(const TBuiltInResource&) = 0; + + void checkIndex(const TSourceLoc&, const TType&, int& index); + + EShLanguage getLanguage() const { return language; } + void setScanContext(TScanContext* c) { scanContext = c; } + TScanContext* getScanContext() const { return scanContext; } + void setPpContext(TPpContext* c) { ppContext = c; } + TPpContext* getPpContext() const { return ppContext; } + + virtual void setLineCallback(const std::function& func) { lineCallback = func; } + virtual void setExtensionCallback(const std::function& func) { extensionCallback = func; } + virtual void setVersionCallback(const std::function& func) { versionCallback = func; } + virtual void setPragmaCallback(const std::function&)>& func) { pragmaCallback = func; } + virtual void setErrorCallback(const std::function& func) { errorCallback = func; } + + virtual void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) = 0; + virtual bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) = 0; + virtual bool lineDirectiveShouldSetNextLine() const = 0; + virtual void handlePragma(const TSourceLoc&, const TVector&) = 0; + + virtual bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) = 0; + + virtual void notifyVersion(int line, int version, const char* type_string) + { + if (versionCallback) + versionCallback(line, version, type_string); + } + virtual void notifyErrorDirective(int line, const char* error_message) + { + if (errorCallback) + errorCallback(line, error_message); + } + virtual void notifyLineDirective(int curLineNo, int newLineNo, bool hasSource, int sourceNum, const char* sourceName) + { + if (lineCallback) + lineCallback(curLineNo, newLineNo, hasSource, sourceNum, sourceName); + } + virtual void notifyExtensionDirective(int line, const char* extension, const char* behavior) + { + if (extensionCallback) + extensionCallback(line, extension, behavior); + } + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Manage global buffer (used for backing atomic counters in GLSL when using relaxed Vulkan semantics) + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Potentially rename shader entry point function + void renameShaderFunction(TString*& name) const + { + // Replace the entry point name given in the shader with the real entry point name, + // if there is a substitution. + if (name != nullptr && *name == sourceEntryPointName && intermediate.getEntryPointName().size() > 0) + name = NewPoolTString(intermediate.getEntryPointName().c_str()); + } + + virtual bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + virtual void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + + const char* const scopeMangler; + + // Basic parsing state, easily accessible to the grammar + + TSymbolTable& symbolTable; // symbol table that goes with the current language, version, and profile + int statementNestingLevel; // 0 if outside all flow control or compound statements + int loopNestingLevel; // 0 if outside all loops + int structNestingLevel; // 0 if outside structures + int blockNestingLevel; // 0 if outside blocks + int controlFlowNestingLevel; // 0 if outside all flow control + const TType* currentFunctionType; // the return type of the function that's currently being parsed + bool functionReturnsValue; // true if a non-void function has a return + // if inside a function, true if the function is the entry point and this is after a return statement + bool postEntryPointReturn; + // case, node, case, case, node, ...; ensure only one node between cases; stack of them for nesting + TList switchSequenceStack; + // the statementNestingLevel the current switch statement is at, which must match the level of its case statements + TList switchLevel; + struct TPragma contextPragma; + int beginInvocationInterlockCount; + int endInvocationInterlockCount; + +protected: + TParseContextBase(TParseContextBase&); + TParseContextBase& operator=(TParseContextBase&); + + const bool parsingBuiltins; // true if parsing built-in symbols/functions + TVector linkageSymbols; // will be transferred to 'linkage', after all editing is done, order preserving + TScanContext* scanContext; + TPpContext* ppContext; + TBuiltInResource resources; + TLimits& limits; + TString sourceEntryPointName; + + // These, if set, will be called when a line, pragma ... is preprocessed. + // They will be called with any parameters to the original directive. + std::function lineCallback; + std::function&)> pragmaCallback; + std::function versionCallback; + std::function extensionCallback; + std::function errorCallback; + + // see implementation for detail + const TFunction* selectFunction(const TVector, const TFunction&, + std::function, + std::function, + /* output */ bool& tie); + + virtual void parseSwizzleSelector(const TSourceLoc&, const TString&, int size, + TSwizzleSelectors&); + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + TVariable* globalUniformBlock; // the actual block, inserted into the symbol table + unsigned int globalUniformBinding; // the block's binding number + unsigned int globalUniformSet; // the block's set number + int firstNewMember; // the index of the first member not yet inserted into the symbol table + // override this to set the language-specific name + virtual const char* getGlobalUniformBlockName() const { return ""; } + virtual void setUniformBlockDefaults(TType&) const { } + virtual void finalizeGlobalUniformBlockLayout(TVariable&) {} + + // Manage the atomic counter block (used for atomic_uints with Vulkan-Relaxed) + TMap atomicCounterBuffers; + unsigned int atomicCounterBlockSet; + TMap atomicCounterBlockFirstNewMember; + // override this to set the language-specific name + virtual const char* getAtomicCounterBlockName() const { return ""; } + virtual void setAtomicCounterBlockDefaults(TType&) const {} + virtual void setInvariant(const TSourceLoc&, const char*) {} + virtual void finalizeAtomicCounterBlockLayout(TVariable&) {} + bool isAtomicCounterBlock(const TSymbol& symbol) { + const TVariable* var = symbol.getAsVariable(); + if (!var) + return false; + const auto& at = atomicCounterBuffers.find(var->getType().getQualifier().layoutBinding); + return (at != atomicCounterBuffers.end() && (*at).second->getType() == var->getType()); + } + + virtual void outputMessage(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, TPrefixType prefix, + va_list args); + virtual void trackLinkage(TSymbol& symbol); + virtual void makeEditable(TSymbol*&); + virtual TVariable* getEditableVariable(const char* name); + virtual void finish(); +}; + +// +// Manage the state for when to respect precision qualifiers and when to warn about +// the defaults being different than might be expected. +// +class TPrecisionManager { +public: + TPrecisionManager() : obey(false), warn(false), explicitIntDefault(false), explicitFloatDefault(false){ } + virtual ~TPrecisionManager() {} + + void respectPrecisionQualifiers() { obey = true; } + bool respectingPrecisionQualifiers() const { return obey; } + bool shouldWarnAboutDefaults() const { return warn; } + void defaultWarningGiven() { warn = false; } + void warnAboutDefaults() { warn = true; } + void explicitIntDefaultSeen() + { + explicitIntDefault = true; + if (explicitFloatDefault) + warn = false; + } + void explicitFloatDefaultSeen() + { + explicitFloatDefault = true; + if (explicitIntDefault) + warn = false; + } + +protected: + bool obey; // respect precision qualifiers + bool warn; // need to give a warning about the defaults + bool explicitIntDefault; // user set the default for int/uint + bool explicitFloatDefault; // user set the default for float +}; + +// +// GLSL-specific parse helper. Should have GLSL in the name, but that's +// too big of a change for comparing branches at the moment, and perhaps +// impacts downstream consumers as well. +// +class TParseContext : public TParseContextBase { +public: + TParseContext(TSymbolTable&, TIntermediate&, bool parsingBuiltins, int version, EProfile, const SpvVersion& spvVersion, EShLanguage, TInfoSink&, + bool forwardCompatible = false, EShMessages messages = EShMsgDefault, + const TString* entryPoint = nullptr); + virtual ~TParseContext(); + + bool obeyPrecisionQualifiers() const { return precisionManager.respectingPrecisionQualifiers(); } + void setPrecisionDefaults(); + + void setLimits(const TBuiltInResource&) override; + bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) override; + void parserError(const char* s); // for bison's yyerror + + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + + void reservedErrorCheck(const TSourceLoc&, const TString&); + void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) override; + bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) override; + bool lineDirectiveShouldSetNextLine() const override; + bool builtInName(const TString&); + + void handlePragma(const TSourceLoc&, const TVector&) override; + TIntermTyped* handleVariable(const TSourceLoc&, TSymbol* symbol, const TString* string); + TIntermTyped* handleBracketDereference(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + void handleIndexLimits(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + +#ifndef GLSLANG_WEB + void makeEditable(TSymbol*&) override; + void ioArrayCheck(const TSourceLoc&, const TType&, const TString& identifier); +#endif + bool isIoResizeArray(const TType&) const; + void fixIoArraySize(const TSourceLoc&, TType&); + void handleIoResizeArrayAccess(const TSourceLoc&, TIntermTyped* base); + void checkIoArraysConsistency(const TSourceLoc&, bool tailOnly = false); + int getIoArrayImplicitSize(const TQualifier&, TString* featureString = nullptr) const; + void checkIoArrayConsistency(const TSourceLoc&, int requiredSize, const char* feature, TType&, const TString&); + + TIntermTyped* handleBinaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* left, TIntermTyped* right); + TIntermTyped* handleUnaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* childNode); + TIntermTyped* handleDotDereference(const TSourceLoc&, TIntermTyped* base, const TString& field); + TIntermTyped* handleDotSwizzle(const TSourceLoc&, TIntermTyped* base, const TString& field); + void blockMemberExtensionCheck(const TSourceLoc&, const TIntermTyped* base, int member, const TString& memberName); + TFunction* handleFunctionDeclarator(const TSourceLoc&, TFunction& function, bool prototype); + TIntermAggregate* handleFunctionDefinition(const TSourceLoc&, TFunction&); + TIntermTyped* handleFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + TIntermTyped* handleBuiltInFunctionCall(TSourceLoc, TIntermNode* arguments, const TFunction& function); + void computeBuiltinPrecisions(TIntermTyped&, const TFunction&); + TIntermNode* handleReturnValue(const TSourceLoc&, TIntermTyped*); + void checkLocation(const TSourceLoc&, TOperator); + TIntermTyped* handleLengthMethod(const TSourceLoc&, TFunction*, TIntermNode*); + void addInputArgumentConversions(const TFunction&, TIntermNode*&) const; + TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermAggregate&) const; + TIntermTyped* addAssign(const TSourceLoc&, TOperator op, TIntermTyped* left, TIntermTyped* right); + void builtInOpCheck(const TSourceLoc&, const TFunction&, TIntermOperator&); + void nonOpBuiltInCheck(const TSourceLoc&, const TFunction&, TIntermAggregate&); + void userFunctionCallCheck(const TSourceLoc&, TIntermAggregate&); + void samplerConstructorLocationCheck(const TSourceLoc&, const char* token, TIntermNode*); + TFunction* handleConstructorCall(const TSourceLoc&, const TPublicType&); + void handlePrecisionQualifier(const TSourceLoc&, TQualifier&, TPrecisionQualifier); + void checkPrecisionQualifier(const TSourceLoc&, TPrecisionQualifier); + void memorySemanticsCheck(const TSourceLoc&, const TFunction&, const TIntermOperator& callNode); + + TIntermTyped* vkRelaxedRemapFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + // returns true if the variable was remapped to something else + bool vkRelaxedRemapUniformVariable(const TSourceLoc&, TString&, const TPublicType&, TArraySizes*, TIntermTyped*, TType&); + + void assignError(const TSourceLoc&, const char* op, TString left, TString right); + void unaryOpError(const TSourceLoc&, const char* op, TString operand); + void binaryOpError(const TSourceLoc&, const char* op, TString left, TString right); + void variableCheck(TIntermTyped*& nodePtr); + bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void constantValueCheck(TIntermTyped* node, const char* token); + void integerCheck(const TIntermTyped* node, const char* token); + void globalCheck(const TSourceLoc&, const char* token); + bool constructorError(const TSourceLoc&, TIntermNode*, TFunction&, TOperator, TType&); + bool constructorTextureSamplerError(const TSourceLoc&, const TFunction&); + void arraySizeCheck(const TSourceLoc&, TIntermTyped* expr, TArraySize&, const char *sizeType); + bool arrayQualifierError(const TSourceLoc&, const TQualifier&); + bool arrayError(const TSourceLoc&, const TType&); + void arraySizeRequiredCheck(const TSourceLoc&, const TArraySizes&); + void structArrayCheck(const TSourceLoc&, const TType& structure); + void arraySizesCheck(const TSourceLoc&, const TQualifier&, TArraySizes*, const TIntermTyped* initializer, bool lastMember); + void arrayOfArrayVersionCheck(const TSourceLoc&, const TArraySizes*); + bool voidErrorCheck(const TSourceLoc&, const TString&, TBasicType); + void boolCheck(const TSourceLoc&, const TIntermTyped*); + void boolCheck(const TSourceLoc&, const TPublicType&); + void samplerCheck(const TSourceLoc&, const TType&, const TString& identifier, TIntermTyped* initializer); + void atomicUintCheck(const TSourceLoc&, const TType&, const TString& identifier); + void accStructCheck(const TSourceLoc & loc, const TType & type, const TString & identifier); + void transparentOpaqueCheck(const TSourceLoc&, const TType&, const TString& identifier); + void memberQualifierCheck(glslang::TPublicType&); + void globalQualifierFixCheck(const TSourceLoc&, TQualifier&, bool isMemberCheck = false); + void globalQualifierTypeCheck(const TSourceLoc&, const TQualifier&, const TPublicType&); + bool structQualifierErrorCheck(const TSourceLoc&, const TPublicType& pType); + void mergeQualifiers(const TSourceLoc&, TQualifier& dst, const TQualifier& src, bool force); + void setDefaultPrecision(const TSourceLoc&, TPublicType&, TPrecisionQualifier); + int computeSamplerTypeIndex(TSampler&); + TPrecisionQualifier getDefaultPrecision(TPublicType&); + void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&); + void parameterTypeCheck(const TSourceLoc&, TStorageQualifier qualifier, const TType& type); + bool containsFieldWithBasicType(const TType& type ,TBasicType basicType); + TSymbol* redeclareBuiltinVariable(const TSourceLoc&, const TString&, const TQualifier&, const TShaderQualifiers&); + void redeclareBuiltinBlock(const TSourceLoc&, TTypeList& typeList, const TString& blockName, const TString* instanceName, TArraySizes* arraySizes); + void paramCheckFixStorage(const TSourceLoc&, const TStorageQualifier&, TType& type); + void paramCheckFix(const TSourceLoc&, const TQualifier&, TType& type); + void nestedBlockCheck(const TSourceLoc&); + void nestedStructCheck(const TSourceLoc&); + void arrayObjectCheck(const TSourceLoc&, const TType&, const char* op); + void opaqueCheck(const TSourceLoc&, const TType&, const char* op); + void referenceCheck(const TSourceLoc&, const TType&, const char* op); + void storage16BitAssignmentCheck(const TSourceLoc&, const TType&, const char* op); + void specializationCheck(const TSourceLoc&, const TType&, const char* op); + void structTypeCheck(const TSourceLoc&, TPublicType&); + void inductiveLoopCheck(const TSourceLoc&, TIntermNode* init, TIntermLoop* loop); + void arrayLimitCheck(const TSourceLoc&, const TString&, int size); + void limitCheck(const TSourceLoc&, int value, const char* limit, const char* feature); + + void inductiveLoopBodyCheck(TIntermNode*, long long loopIndexId, TSymbolTable&); + void constantIndexExpressionCheck(TIntermNode*); + + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&); + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&, const TIntermTyped*); + void mergeObjectLayoutQualifiers(TQualifier& dest, const TQualifier& src, bool inheritOnly); + void layoutObjectCheck(const TSourceLoc&, const TSymbol&); + void layoutMemberLocationArrayCheck(const TSourceLoc&, bool memberWithLocation, TArraySizes* arraySizes); + void layoutTypeCheck(const TSourceLoc&, const TType&); + void layoutQualifierCheck(const TSourceLoc&, const TQualifier&); + void checkNoShaderLayouts(const TSourceLoc&, const TShaderQualifiers&); + void fixOffset(const TSourceLoc&, TSymbol&); + + const TFunction* findFunction(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExact(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction120(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction400(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExplicitTypes(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + void declareTypeDefaults(const TSourceLoc&, const TPublicType&); + TIntermNode* declareVariable(const TSourceLoc&, TString& identifier, const TPublicType&, TArraySizes* typeArray = 0, TIntermTyped* initializer = 0); + TIntermTyped* addConstructor(const TSourceLoc&, TIntermNode*, const TType&); + TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&); + TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset); + void inheritMemoryQualifiers(const TQualifier& from, TQualifier& to); + void declareBlock(const TSourceLoc&, TTypeList& typeList, const TString* instanceName = 0, TArraySizes* arraySizes = 0); + void blockStorageRemap(const TSourceLoc&, const TString*, TQualifier&); + void blockStageIoCheck(const TSourceLoc&, const TQualifier&); + void blockQualifierCheck(const TSourceLoc&, const TQualifier&, bool instanceName); + void fixBlockLocations(const TSourceLoc&, TQualifier&, TTypeList&, bool memberWithLocation, bool memberWithoutLocation); + void fixXfbOffsets(TQualifier&, TTypeList&); + void fixBlockUniformOffsets(TQualifier&, TTypeList&); + void fixBlockUniformLayoutMatrix(TQualifier&, TTypeList*, TTypeList*); + void fixBlockUniformLayoutPacking(TQualifier&, TTypeList*, TTypeList*); + void addQualifierToExisting(const TSourceLoc&, TQualifier, const TString& identifier); + void addQualifierToExisting(const TSourceLoc&, TQualifier, TIdentifierList&); + void invariantCheck(const TSourceLoc&, const TQualifier&); + void updateStandaloneQualifierDefaults(const TSourceLoc&, const TPublicType&); + void wrapupSwitchSubsequence(TIntermAggregate* statements, TIntermNode* branchNode); + TIntermNode* addSwitch(const TSourceLoc&, TIntermTyped* expression, TIntermAggregate* body); + const TTypeList* recordStructCopy(TStructRecord&, const TType*, const TType*); + +#ifndef GLSLANG_WEB + TAttributeType attributeFromName(const TString& name) const; + TAttributes* makeAttributes(const TString& identifier) const; + TAttributes* makeAttributes(const TString& identifier, TIntermNode* node) const; + TAttributes* mergeAttributes(TAttributes*, TAttributes*) const; + + // Determine selection control from attributes + void handleSelectionAttributes(const TAttributes& attributes, TIntermNode*); + void handleSwitchAttributes(const TAttributes& attributes, TIntermNode*); + // Determine loop control from attributes + void handleLoopAttributes(const TAttributes& attributes, TIntermNode*); + // Function attributes + void handleFunctionAttributes(const TSourceLoc&, const TAttributes&); + + // GL_EXT_spirv_intrinsics + TSpirvRequirement* makeSpirvRequirement(const TSourceLoc& loc, const TString& name, + const TIntermAggregate* extensions, const TIntermAggregate* capabilities); + TSpirvRequirement* mergeSpirvRequirements(const TSourceLoc& loc, TSpirvRequirement* spirvReq1, + TSpirvRequirement* spirvReq2); + TSpirvTypeParameters* makeSpirvTypeParameters(const TSourceLoc& loc, const TIntermConstantUnion* constant); + TSpirvTypeParameters* mergeSpirvTypeParameters(TSpirvTypeParameters* spirvTypeParams1, + TSpirvTypeParameters* spirvTypeParams2); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, const TString& value); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, int value); + TSpirvInstruction* mergeSpirvInstruction(const TSourceLoc& loc, TSpirvInstruction* spirvInst1, + TSpirvInstruction* spirvInst2); +#endif + + void checkAndResizeMeshViewDim(const TSourceLoc&, TType&, bool isBlockMember); + +protected: + void nonInitConstCheck(const TSourceLoc&, TString& identifier, TType& type); + void inheritGlobalDefaults(TQualifier& dst) const; + TVariable* makeInternalVariable(const char* name, const TType&) const; + TVariable* declareNonArray(const TSourceLoc&, const TString& identifier, const TType&); + void declareArray(const TSourceLoc&, const TString& identifier, const TType&, TSymbol*&); + void checkRuntimeSizable(const TSourceLoc&, const TIntermTyped&); + bool isRuntimeLength(const TIntermTyped&) const; + TIntermNode* executeInitializer(const TSourceLoc&, TIntermTyped* initializer, TVariable* variable); + TIntermTyped* convertInitializerList(const TSourceLoc&, const TType&, TIntermTyped* initializer); +#ifndef GLSLANG_WEB + void finish() override; +#endif + + virtual const char* getGlobalUniformBlockName() const override; + virtual void finalizeGlobalUniformBlockLayout(TVariable&) override; + virtual void setUniformBlockDefaults(TType& block) const override; + + virtual const char* getAtomicCounterBlockName() const override; + virtual void finalizeAtomicCounterBlockLayout(TVariable&) override; + virtual void setAtomicCounterBlockDefaults(TType& block) const override; + virtual void setInvariant(const TSourceLoc& loc, const char* builtin) override; + +public: + // + // Generally, bison productions, the scanner, and the PP need read/write access to these; just give them direct access + // + + // Current state of parsing + bool inMain; // if inside a function, true if the function is main + const TString* blockName; + TQualifier currentBlockQualifier; + TPrecisionQualifier defaultPrecision[EbtNumTypes]; + TBuiltInResource resources; + TLimits& limits; + +protected: + TParseContext(TParseContext&); + TParseContext& operator=(TParseContext&); + + static const int maxSamplerIndex = EsdNumDims * (EbtNumTypes * (2 * 2 * 2 * 2 * 2)); // see computeSamplerTypeIndex() + TPrecisionQualifier defaultSamplerPrecision[maxSamplerIndex]; + TPrecisionManager precisionManager; + TQualifier globalBufferDefaults; + TQualifier globalUniformDefaults; + TQualifier globalInputDefaults; + TQualifier globalOutputDefaults; + TQualifier globalSharedDefaults; + TString currentCaller; // name of last function body entered (not valid when at global scope) +#ifndef GLSLANG_WEB + int* atomicUintOffsets; // to become an array of the right size to hold an offset per binding point + bool anyIndexLimits; + TIdSetType inductiveLoopIds; + TVector needsIndexLimitationChecking; + TStructRecord matrixFixRecord; + TStructRecord packingFixRecord; + + // + // Geometry shader input arrays: + // - array sizing is based on input primitive and/or explicit size + // + // Tessellation control output arrays: + // - array sizing is based on output layout(vertices=...) and/or explicit size + // + // Both: + // - array sizing is retroactive + // - built-in block redeclarations interact with this + // + // Design: + // - use a per-context "resize-list", a list of symbols whose array sizes + // can be fixed + // + // - the resize-list starts empty at beginning of user-shader compilation, it does + // not have built-ins in it + // + // - on built-in array use: copyUp() symbol and add it to the resize-list + // + // - on user array declaration: add it to the resize-list + // + // - on block redeclaration: copyUp() symbol and add it to the resize-list + // * note, that appropriately gives an error if redeclaring a block that + // was already used and hence already copied-up + // + // - on seeing a layout declaration that sizes the array, fix everything in the + // resize-list, giving errors for mismatch + // + // - on seeing an array size declaration, give errors on mismatch between it and previous + // array-sizing declarations + // + TVector ioArraySymbolResizeList; +#endif +}; + +} // end namespace glslang + +#endif // _PARSER_HELPER_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/RemoveTree.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/RemoveTree.h new file mode 100644 index 0000000..1ed0156 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/RemoveTree.h @@ -0,0 +1,41 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +namespace glslang { + +void RemoveAllTreeNodes(TIntermNode*); + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Scan.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Scan.h new file mode 100644 index 0000000..24b75cf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Scan.h @@ -0,0 +1,276 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _GLSLANG_SCAN_INCLUDED_ +#define _GLSLANG_SCAN_INCLUDED_ + +#include "Versions.h" + +namespace glslang { + +// Use a global end-of-input character, so no translation is needed across +// layers of encapsulation. Characters are all 8 bit, and positive, so there is +// no aliasing of character 255 onto -1, for example. +const int EndOfInput = -1; + +// +// A character scanner that seamlessly, on read-only strings, reads across an +// array of strings without assuming null termination. +// +class TInputScanner { +public: + TInputScanner(int n, const char* const s[], size_t L[], const char* const* names = nullptr, + int b = 0, int f = 0, bool single = false) : + numSources(n), + // up to this point, common usage is "char*", but now we need positive 8-bit characters + sources(reinterpret_cast(s)), + lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f), singleLogical(single), + endOfFileReached(false) + { + loc = new TSourceLoc[numSources]; + for (int i = 0; i < numSources; ++i) { + loc[i].init(i - stringBias); + } + if (names != nullptr) { + for (int i = 0; i < numSources; ++i) + loc[i].name = names[i] != nullptr ? NewPoolTString(names[i]) : nullptr; + } + loc[currentSource].line = 1; + logicalSourceLoc.init(1); + logicalSourceLoc.name = loc[0].name; + } + + virtual ~TInputScanner() + { + delete [] loc; + } + + // retrieve the next character and advance one character + int get() + { + int ret = peek(); + if (ret == EndOfInput) + return ret; + ++loc[currentSource].column; + ++logicalSourceLoc.column; + if (ret == '\n') { + ++loc[currentSource].line; + ++logicalSourceLoc.line; + logicalSourceLoc.column = 0; + loc[currentSource].column = 0; + } + advance(); + + return ret; + } + + // retrieve the next character, no advance + int peek() + { + if (currentSource >= numSources) { + endOfFileReached = true; + return EndOfInput; + } + // Make sure we do not read off the end of a string. + // N.B. Sources can have a length of 0. + int sourceToRead = currentSource; + size_t charToRead = currentChar; + while(charToRead >= lengths[sourceToRead]) { + charToRead = 0; + sourceToRead += 1; + if (sourceToRead >= numSources) { + return EndOfInput; + } + } + + // Here, we care about making negative valued characters positive + return sources[sourceToRead][charToRead]; + } + + // go back one character + void unget() + { + // Do not roll back once we've reached the end of the file. + if (endOfFileReached) + return; + + if (currentChar > 0) { + --currentChar; + --loc[currentSource].column; + --logicalSourceLoc.column; + if (loc[currentSource].column < 0) { + // We've moved back past a new line. Find the + // previous newline (or start of the file) to compute + // the column count on the now current line. + size_t chIndex = currentChar; + while (chIndex > 0) { + if (sources[currentSource][chIndex] == '\n') { + break; + } + --chIndex; + } + logicalSourceLoc.column = (int)(currentChar - chIndex); + loc[currentSource].column = (int)(currentChar - chIndex); + } + } else { + do { + --currentSource; + } while (currentSource > 0 && lengths[currentSource] == 0); + if (lengths[currentSource] == 0) { + // set to 0 if we've backed up to the start of an empty string + currentChar = 0; + } else + currentChar = lengths[currentSource] - 1; + } + if (peek() == '\n') { + --loc[currentSource].line; + --logicalSourceLoc.line; + } + } + + // for #line override + void setLine(int newLine) + { + logicalSourceLoc.line = newLine; + loc[getLastValidSourceIndex()].line = newLine; + } + + // for #line override in filename based parsing + void setFile(const char* filename) + { + TString* fn_tstr = NewPoolTString(filename); + logicalSourceLoc.name = fn_tstr; + loc[getLastValidSourceIndex()].name = fn_tstr; + } + + void setFile(const char* filename, int i) + { + TString* fn_tstr = NewPoolTString(filename); + if (i == getLastValidSourceIndex()) { + logicalSourceLoc.name = fn_tstr; + } + loc[i].name = fn_tstr; + } + + void setString(int newString) + { + logicalSourceLoc.string = newString; + loc[getLastValidSourceIndex()].string = newString; + logicalSourceLoc.name = nullptr; + loc[getLastValidSourceIndex()].name = nullptr; + } + + // for #include content indentation + void setColumn(int col) + { + logicalSourceLoc.column = col; + loc[getLastValidSourceIndex()].column = col; + } + + void setEndOfInput() + { + endOfFileReached = true; + currentSource = numSources; + } + + bool atEndOfInput() const { return endOfFileReached; } + + const TSourceLoc& getSourceLoc() const + { + if (singleLogical) { + return logicalSourceLoc; + } else { + return loc[std::max(0, std::min(currentSource, numSources - finale - 1))]; + } + } + // Returns the index (starting from 0) of the most recent valid source string we are reading from. + int getLastValidSourceIndex() const { return std::min(currentSource, numSources - 1); } + + void consumeWhiteSpace(bool& foundNonSpaceTab); + bool consumeComment(); + void consumeWhitespaceComment(bool& foundNonSpaceTab); + bool scanVersion(int& version, EProfile& profile, bool& notFirstToken); + +protected: + + // advance one character + void advance() + { + ++currentChar; + if (currentChar >= lengths[currentSource]) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + while (currentSource < numSources && lengths[currentSource] == 0) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + } + currentChar = 0; + } + } + + int numSources; // number of strings in source + const unsigned char* const *sources; // array of strings; must be converted to positive values on use, to avoid aliasing with -1 as EndOfInput + const size_t *lengths; // length of each string + int currentSource; + size_t currentChar; + + // This is for reporting what string/line an error occurred on, and can be overridden by #line. + // It remembers the last state of each source string as it is left for the next one, so unget() + // can restore that state. + TSourceLoc* loc; // an array + + int stringBias; // the first string that is the user's string number 0 + int finale; // number of internal strings after user's last string + + TSourceLoc logicalSourceLoc; + bool singleLogical; // treats the strings as a single logical string. + // locations will be reported from the first string. + + // Set to true once peek() returns EndOfFile, so that we won't roll back + // once we've reached EndOfFile. + bool endOfFileReached; +}; + +} // end namespace glslang + +#endif // _GLSLANG_SCAN_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/ScanContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/ScanContext.h new file mode 100644 index 0000000..74b2b3c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/ScanContext.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This holds context specific to the GLSL scanner, which +// sits between the preprocessor scanner and parser. +// + +#pragma once + +#include "ParseHelper.h" + +namespace glslang { + +class TPpContext; +class TPpToken; +class TParserToken; + +class TScanContext { +public: + explicit TScanContext(TParseContextBase& pc) : + parseContext(pc), + afterType(false), afterStruct(false), + field(false), afterBuffer(false) { } + virtual ~TScanContext() { } + + static void fillInKeywordMap(); + static void deleteKeywordMap(); + + int tokenize(TPpContext*, TParserToken&); + +protected: + TScanContext(TScanContext&); + TScanContext& operator=(TScanContext&); + + int tokenizeIdentifier(); + int identifierOrType(); + int reservedWord(); + int identifierOrReserved(bool reserved); + int es30ReservedFromGLSL(int version); + int nonreservedKeyword(int esVersion, int nonEsVersion); + int precisionKeyword(); + int matNxM(); + int dMat(); + int firstGenerationImage(bool inEs310); + int secondGenerationImage(); + + TParseContextBase& parseContext; + bool afterType; // true if we've recognized a type, so can only be looking for an identifier + bool afterStruct; // true if we've recognized the STRUCT keyword, so can only be looking for an identifier + bool field; // true if we're on a field, right after a '.' + bool afterBuffer; // true if we've recognized the BUFFER keyword + TSourceLoc loc; + TParserToken* parserToken; + TPpToken* ppToken; + + const char* tokenText; + int keyword; +}; + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/SymbolTable.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/SymbolTable.h new file mode 100644 index 0000000..0d45e48 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/SymbolTable.h @@ -0,0 +1,956 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SYMBOL_TABLE_INCLUDED_ +#define _SYMBOL_TABLE_INCLUDED_ + +// +// Symbol table for parsing. Has these design characteristics: +// +// * Same symbol table can be used to compile many shaders, to preserve +// effort of creating and loading with the large numbers of built-in +// symbols. +// +// --> This requires a copy mechanism, so initial pools used to create +// the shared information can be popped. Done through "clone" +// methods. +// +// * Name mangling will be used to give each function a unique name +// so that symbol table lookups are never ambiguous. This allows +// a simpler symbol table structure. +// +// * Pushing and popping of scope, so symbol table will really be a stack +// of symbol tables. Searched from the top, with new inserts going into +// the top. +// +// * Constants: Compile time constant symbols will keep their values +// in the symbol table. The parser can substitute constants at parse +// time, including doing constant folding and constant propagation. +// +// * No temporaries: Temporaries made from operations (+, --, .xy, etc.) +// are tracked in the intermediate representation, not the symbol table. +// + +#include "../Include/Common.h" +#include "../Include/intermediate.h" +#include "../Include/InfoSink.h" + +namespace glslang { + +// +// Symbol base class. (Can build functions or variables out of these...) +// + +class TVariable; +class TFunction; +class TAnonMember; + +typedef TVector TExtensionList; + +class TSymbol { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + explicit TSymbol(const TString *n) : name(n), uniqueId(0), extensions(0), writable(true) { } + virtual TSymbol* clone() const = 0; + virtual ~TSymbol() { } // rely on all symbol owned memory coming from the pool + + virtual const TString& getName() const { return *name; } + virtual void changeName(const TString* newName) { name = newName; } + virtual void addPrefix(const char* prefix) + { + TString newName(prefix); + newName.append(*name); + changeName(NewPoolTString(newName.c_str())); + } + virtual const TString& getMangledName() const { return getName(); } + virtual TFunction* getAsFunction() { return 0; } + virtual const TFunction* getAsFunction() const { return 0; } + virtual TVariable* getAsVariable() { return 0; } + virtual const TVariable* getAsVariable() const { return 0; } + virtual const TAnonMember* getAsAnonMember() const { return 0; } + virtual const TType& getType() const = 0; + virtual TType& getWritableType() = 0; + virtual void setUniqueId(long long id) { uniqueId = id; } + virtual long long getUniqueId() const { return uniqueId; } + virtual void setExtensions(int numExts, const char* const exts[]) + { + assert(extensions == 0); + assert(numExts > 0); + extensions = NewPoolObject(extensions); + for (int e = 0; e < numExts; ++e) + extensions->push_back(exts[e]); + } + virtual int getNumExtensions() const { return extensions == nullptr ? 0 : (int)extensions->size(); } + virtual const char** getExtensions() const { return extensions->data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const = 0; + void dumpExtensions(TInfoSink& infoSink) const; +#endif + + virtual bool isReadOnly() const { return ! writable; } + virtual void makeReadOnly() { writable = false; } + +protected: + explicit TSymbol(const TSymbol&); + TSymbol& operator=(const TSymbol&); + + const TString *name; + unsigned long long uniqueId; // For cross-scope comparing during code generation + + // For tracking what extensions must be present + // (don't use if correct version/profile is present). + TExtensionList* extensions; // an array of pointers to existing constant char strings + + // + // N.B.: Non-const functions that will be generally used should assert on this, + // to avoid overwriting shared symbol-table information. + // + bool writable; +}; + +// +// Variable class, meaning a symbol that's not a function. +// +// There could be a separate class hierarchy for Constant variables; +// Only one of int, bool, or float, (or none) is correct for +// any particular use, but it's easy to do this way, and doesn't +// seem worth having separate classes, and "getConst" can't simply return +// different values for different types polymorphically, so this is +// just simple and pragmatic. +// +class TVariable : public TSymbol { +public: + TVariable(const TString *name, const TType& t, bool uT = false ) + : TSymbol(name), + userType(uT), + constSubtree(nullptr), + memberExtensions(nullptr), + anonId(-1) + { type.shallowCopy(t); } + virtual TVariable* clone() const; + virtual ~TVariable() { } + + virtual TVariable* getAsVariable() { return this; } + virtual const TVariable* getAsVariable() const { return this; } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { assert(writable); return type; } + virtual bool isUserType() const { return userType; } + virtual const TConstUnionArray& getConstArray() const { return constArray; } + virtual TConstUnionArray& getWritableConstArray() { assert(writable); return constArray; } + virtual void setConstArray(const TConstUnionArray& array) { constArray = array; } + virtual void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + virtual TIntermTyped* getConstSubtree() const { return constSubtree; } + virtual void setAnonId(int i) { anonId = i; } + virtual int getAnonId() const { return anonId; } + + virtual void setMemberExtensions(int member, int numExts, const char* const exts[]) + { + assert(type.isStruct()); + assert(numExts > 0); + if (memberExtensions == nullptr) { + memberExtensions = NewPoolObject(memberExtensions); + memberExtensions->resize(type.getStruct()->size()); + } + for (int e = 0; e < numExts; ++e) + (*memberExtensions)[member].push_back(exts[e]); + } + virtual bool hasMemberExtensions() const { return memberExtensions != nullptr; } + virtual int getNumMemberExtensions(int member) const + { + return memberExtensions == nullptr ? 0 : (int)(*memberExtensions)[member].size(); + } + virtual const char** getMemberExtensions(int member) const { return (*memberExtensions)[member].data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + +protected: + explicit TVariable(const TVariable&); + TVariable& operator=(const TVariable&); + + TType type; + bool userType; + + // we are assuming that Pool Allocator will free the memory allocated to unionArray + // when this object is destroyed + + TConstUnionArray constArray; // for compile-time constant value + TIntermTyped* constSubtree; // for specialization constant computation + TVector* memberExtensions; // per-member extension list, allocated only when needed + int anonId; // the ID used for anonymous blocks: TODO: see if uniqueId could serve a dual purpose +}; + +// +// The function sub-class of symbols and the parser will need to +// share this definition of a function parameter. +// +struct TParameter { + TString *name; + TType* type; + TIntermTyped* defaultValue; + TParameter& copyParam(const TParameter& param) + { + if (param.name) + name = NewPoolTString(param.name->c_str()); + else + name = 0; + type = param.type->clone(); + defaultValue = param.defaultValue; + return *this; + } + TBuiltInVariable getDeclaredBuiltIn() const { return type->getQualifier().declaredBuiltIn; } +}; + +// +// The function sub-class of a symbol. +// +class TFunction : public TSymbol { +public: + explicit TFunction(TOperator o) : + TSymbol(0), + op(o), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) { } + TFunction(const TString *name, const TType& retType, TOperator tOp = EOpNull) : + TSymbol(name), + mangledName(*name + '('), + op(tOp), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) + { + returnType.shallowCopy(retType); + declaredBuiltIn = retType.getQualifier().builtIn; + } + virtual TFunction* clone() const override; + virtual ~TFunction(); + + virtual TFunction* getAsFunction() override { return this; } + virtual const TFunction* getAsFunction() const override { return this; } + + // Install 'p' as the (non-'this') last parameter. + // Non-'this' parameters are reflected in both the list of parameters and the + // mangled name. + virtual void addParameter(TParameter& p) + { + assert(writable); + parameters.push_back(p); + p.type->appendMangledName(mangledName); + + if (p.defaultValue != nullptr) + defaultParamCount++; + } + + // Install 'this' as the first parameter. + // 'this' is reflected in the list of parameters, but not the mangled name. + virtual void addThisParameter(TType& type, const char* name) + { + TParameter p = { NewPoolTString(name), new TType, nullptr }; + p.type->shallowCopy(type); + parameters.insert(parameters.begin(), p); + } + + virtual void addPrefix(const char* prefix) override + { + TSymbol::addPrefix(prefix); + mangledName.insert(0, prefix); + } + + virtual void removePrefix(const TString& prefix) + { + assert(mangledName.compare(0, prefix.size(), prefix) == 0); + mangledName.erase(0, prefix.size()); + } + + virtual const TString& getMangledName() const override { return mangledName; } + virtual const TType& getType() const override { return returnType; } + virtual TBuiltInVariable getDeclaredBuiltInType() const { return declaredBuiltIn; } + virtual TType& getWritableType() override { return returnType; } + virtual void relateToOperator(TOperator o) { assert(writable); op = o; } + virtual TOperator getBuiltInOp() const { return op; } + virtual void setDefined() { assert(writable); defined = true; } + virtual bool isDefined() const { return defined; } + virtual void setPrototyped() { assert(writable); prototyped = true; } + virtual bool isPrototyped() const { return prototyped; } + virtual void setImplicitThis() { assert(writable); implicitThis = true; } + virtual bool hasImplicitThis() const { return implicitThis; } + virtual void setIllegalImplicitThis() { assert(writable); illegalImplicitThis = true; } + virtual bool hasIllegalImplicitThis() const { return illegalImplicitThis; } + + // Return total number of parameters + virtual int getParamCount() const { return static_cast(parameters.size()); } + // Return number of parameters with default values. + virtual int getDefaultParamCount() const { return defaultParamCount; } + // Return number of fixed parameters (without default values) + virtual int getFixedParamCount() const { return getParamCount() - getDefaultParamCount(); } + + virtual TParameter& operator[](int i) { assert(writable); return parameters[i]; } + virtual const TParameter& operator[](int i) const { return parameters[i]; } + +#ifndef GLSLANG_WEB + virtual void setSpirvInstruction(const TSpirvInstruction& inst) + { + relateToOperator(EOpSpirvInst); + spirvInst = inst; + } + virtual const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TFunction(const TFunction&); + TFunction& operator=(const TFunction&); + + typedef TVector TParamList; + TParamList parameters; + TType returnType; + TBuiltInVariable declaredBuiltIn; + + TString mangledName; + TOperator op; + bool defined; + bool prototyped; + bool implicitThis; // True if this function is allowed to see all members of 'this' + bool illegalImplicitThis; // True if this function is not supposed to have access to dynamic members of 'this', + // even if it finds member variables in the symbol table. + // This is important for a static member function that has member variables in scope, + // but is not allowed to use them, or see hidden symbols instead. + int defaultParamCount; + +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; // SPIR-V instruction qualifiers +#endif +}; + +// +// Members of anonymous blocks are a kind of TSymbol. They are not hidden in +// the symbol table behind a container; rather they are visible and point to +// their anonymous container. (The anonymous container is found through the +// member, not the other way around.) +// +class TAnonMember : public TSymbol { +public: + TAnonMember(const TString* n, unsigned int m, TVariable& a, int an) : TSymbol(n), anonContainer(a), memberNumber(m), anonId(an) { } + virtual TAnonMember* clone() const override; + virtual ~TAnonMember() { } + + virtual const TAnonMember* getAsAnonMember() const override { return this; } + virtual const TVariable& getAnonContainer() const { return anonContainer; } + virtual unsigned int getMemberNumber() const { return memberNumber; } + + virtual const TType& getType() const override + { + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual TType& getWritableType() override + { + assert(writable); + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual void setExtensions(int numExts, const char* const exts[]) override + { + anonContainer.setMemberExtensions(memberNumber, numExts, exts); + } + virtual int getNumExtensions() const override { return anonContainer.getNumMemberExtensions(memberNumber); } + virtual const char** getExtensions() const override { return anonContainer.getMemberExtensions(memberNumber); } + + virtual int getAnonId() const { return anonId; } +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TAnonMember(const TAnonMember&); + TAnonMember& operator=(const TAnonMember&); + + TVariable& anonContainer; + unsigned int memberNumber; + int anonId; +}; + +class TSymbolTableLevel { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TSymbolTableLevel() : defaultPrecision(0), anonId(0), thisLevel(false) { } + ~TSymbolTableLevel(); + + bool insert(const TString& name, TSymbol* symbol) { + return level.insert(tLevelPair(name, symbol)).second; + } + + bool insert(TSymbol& symbol, bool separateNameSpaces, const TString& forcedKeyName = TString()) + { + // + // returning true means symbol was added to the table with no semantic errors + // + const TString& name = symbol.getName(); + if (forcedKeyName.length()) { + return level.insert(tLevelPair(forcedKeyName, &symbol)).second; + } + else if (name == "") { + symbol.getAsVariable()->setAnonId(anonId++); + // An empty name means an anonymous container, exposing its members to the external scope. + // Give it a name and insert its members in the symbol table, pointing to the container. + char buf[20]; + snprintf(buf, 20, "%s%d", AnonymousPrefix, symbol.getAsVariable()->getAnonId()); + symbol.changeName(NewPoolTString(buf)); + + return insertAnonymousMembers(symbol, 0); + } else { + // Check for redefinition errors: + // - STL itself will tell us if there is a direct name collision, with name mangling, at this level + // - additionally, check for function-redefining-variable name collisions + const TString& insertName = symbol.getMangledName(); + if (symbol.getAsFunction()) { + // make sure there isn't a variable of this name + if (! separateNameSpaces && level.find(name) != level.end()) + return false; + + // insert, and whatever happens is okay + level.insert(tLevelPair(insertName, &symbol)); + + return true; + } else + return level.insert(tLevelPair(insertName, &symbol)).second; + } + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + // Only supporting amend of anonymous blocks so far. + if (IsAnonymous(symbol.getName())) + return insertAnonymousMembers(symbol, firstNewMember); + else + return false; + } + + bool insertAnonymousMembers(TSymbol& symbol, int firstMember) + { + const TTypeList& types = *symbol.getAsVariable()->getType().getStruct(); + for (unsigned int m = firstMember; m < types.size(); ++m) { + TAnonMember* member = new TAnonMember(&types[m].type->getFieldName(), m, *symbol.getAsVariable(), symbol.getAsVariable()->getAnonId()); + if (! level.insert(tLevelPair(member->getMangledName(), member)).second) + return false; + } + + return true; + } + + void retargetSymbol(const TString& from, const TString& to) { + tLevel::const_iterator fromIt = level.find(from); + tLevel::const_iterator toIt = level.find(to); + if (fromIt == level.end() || toIt == level.end()) + return; + delete fromIt->second; + level[from] = toIt->second; + retargetedSymbols.push_back({from, to}); + } + + TSymbol* find(const TString& name) const + { + tLevel::const_iterator it = level.find(name); + if (it == level.end()) + return 0; + else + return (*it).second; + } + + void findFunctionNameList(const TString& name, TVector& list) + { + size_t parenAt = name.find_first_of('('); + TString base(name, 0, parenAt + 1); + + tLevel::const_iterator begin = level.lower_bound(base); + base[parenAt] = ')'; // assume ')' is lexically after '(' + tLevel::const_iterator end = level.upper_bound(base); + for (tLevel::const_iterator it = begin; it != end; ++it) + list.push_back(it->second->getAsFunction()); + } + + // See if there is already a function in the table having the given non-function-style name. + bool hasFunctionName(const TString& name) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt != candidateName.npos && candidateName.compare(0, parenAt, name) == 0) + + return true; + } + + return false; + } + + // See if there is a variable at this level having the given non-function-style name. + // Return true if name is found, and set variable to true if the name was a variable. + bool findFunctionVariableName(const TString& name, bool& variable) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt == candidateName.npos) { + // not a mangled name + if (candidateName == name) { + // found a variable name match + variable = true; + return true; + } + } else { + // a mangled name + if (candidateName.compare(0, parenAt, name) == 0) { + // found a function name match + variable = false; + return true; + } + } + } + + return false; + } + + // Use this to do a lazy 'push' of precision defaults the first time + // a precision statement is seen in a new scope. Leave it at 0 for + // when no push was needed. Thus, it is not the current defaults, + // it is what to restore the defaults to when popping a level. + void setPreviousDefaultPrecisions(const TPrecisionQualifier *p) + { + // can call multiple times at one scope, will only latch on first call, + // as we're tracking the previous scope's values, not the current values + if (defaultPrecision != 0) + return; + + defaultPrecision = new TPrecisionQualifier[EbtNumTypes]; + for (int t = 0; t < EbtNumTypes; ++t) + defaultPrecision[t] = p[t]; + } + + void getPreviousDefaultPrecisions(TPrecisionQualifier *p) + { + // can be called for table level pops that didn't set the + // defaults + if (defaultPrecision == 0 || p == 0) + return; + + for (int t = 0; t < EbtNumTypes; ++t) + p[t] = defaultPrecision[t]; + } + + void relateToOperator(const char* name, TOperator op); + void setFunctionExtensions(const char* name, int num, const char* const extensions[]); +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + TSymbolTableLevel* clone() const; + void readOnly(); + + void setThisLevel() { thisLevel = true; } + bool isThisLevel() const { return thisLevel; } + +protected: + explicit TSymbolTableLevel(TSymbolTableLevel&); + TSymbolTableLevel& operator=(TSymbolTableLevel&); + + typedef std::map, pool_allocator > > tLevel; + typedef const tLevel::value_type tLevelPair; + typedef std::pair tInsertResult; + + tLevel level; // named mappings + TPrecisionQualifier *defaultPrecision; + // pair + TVector> retargetedSymbols; + int anonId; + bool thisLevel; // True if this level of the symbol table is a structure scope containing member function + // that are supposed to see anonymous access to member variables. +}; + +class TSymbolTable { +public: + TSymbolTable() : uniqueId(0), noBuiltInRedeclarations(false), separateNameSpaces(false), adoptedLevels(0) + { + // + // This symbol table cannot be used until push() is called. + // + } + ~TSymbolTable() + { + // this can be called explicitly; safest to code it so it can be called multiple times + + // don't deallocate levels passed in from elsewhere + while (table.size() > adoptedLevels) + pop(0); + } + + void adoptLevels(TSymbolTable& symTable) + { + for (unsigned int level = 0; level < symTable.table.size(); ++level) { + table.push_back(symTable.table[level]); + ++adoptedLevels; + } + uniqueId = symTable.uniqueId; + noBuiltInRedeclarations = symTable.noBuiltInRedeclarations; + separateNameSpaces = symTable.separateNameSpaces; + } + + // + // While level adopting is generic, the methods below enact a the following + // convention for levels: + // 0: common built-ins shared across all stages, all compiles, only one copy for all symbol tables + // 1: per-stage built-ins, shared across all compiles, but a different copy per stage + // 2: built-ins specific to a compile, like resources that are context-dependent, or redeclared built-ins + // 3: user-shader globals + // +protected: + static const uint32_t LevelFlagBitOffset = 56; + static const int globalLevel = 3; + static bool isSharedLevel(int level) { return level <= 1; } // exclude all per-compile levels + static bool isBuiltInLevel(int level) { return level <= 2; } // exclude user globals + static bool isGlobalLevel(int level) { return level <= globalLevel; } // include user globals +public: + bool isEmpty() { return table.size() == 0; } + bool atBuiltInLevel() { return isBuiltInLevel(currentLevel()); } + bool atGlobalLevel() { return isGlobalLevel(currentLevel()); } + static bool isBuiltInSymbol(long long uniqueId) { + int level = static_cast(uniqueId >> LevelFlagBitOffset); + return isBuiltInLevel(level); + } + static constexpr uint64_t uniqueIdMask = (1LL << LevelFlagBitOffset) - 1; + static const uint32_t MaxLevelInUniqueID = 127; + void setNoBuiltInRedeclarations() { noBuiltInRedeclarations = true; } + void setSeparateNameSpaces() { separateNameSpaces = true; } + + void push() + { + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + } + + // Make a new symbol-table level to represent the scope introduced by a structure + // containing member functions, such that the member functions can find anonymous + // references to member variables. + // + // 'thisSymbol' should have a name of "" to trigger anonymous structure-member + // symbol finds. + void pushThis(TSymbol& thisSymbol) + { + assert(thisSymbol.getName().size() == 0); + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + table.back()->setThisLevel(); + insert(thisSymbol); + } + + void pop(TPrecisionQualifier *p) + { + table[currentLevel()]->getPreviousDefaultPrecisions(p); + delete table.back(); + table.pop_back(); + updateUniqueIdLevelFlag(); + } + + // + // Insert a visible symbol into the symbol table so it can + // be found later by name. + // + // Returns false if the was a name collision. + // + bool insert(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + + // make sure there isn't a function of this variable name + if (! separateNameSpaces && ! symbol.getAsFunction() && table[currentLevel()]->hasFunctionName(symbol.getName())) + return false; + + // check for not overloading or redefining a built-in function + if (noBuiltInRedeclarations) { + if (atGlobalLevel() && currentLevel() > 0) { + if (table[0]->hasFunctionName(symbol.getName())) + return false; + if (currentLevel() > 1 && table[1]->hasFunctionName(symbol.getName())) + return false; + } + } + + return table[currentLevel()]->insert(symbol, separateNameSpaces); + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + return table[currentLevel()]->amend(symbol, firstNewMember); + } + + // Update the level info in symbol's unique ID to current level + void amendSymbolIdLevel(TSymbol& symbol) + { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uint64_t symbolId = symbol.getUniqueId(); + symbolId &= uniqueIdMask; + symbolId |= (level << LevelFlagBitOffset); + symbol.setUniqueId(symbolId); + } + // + // To allocate an internal temporary, which will need to be uniquely + // identified by the consumer of the AST, but never need to + // found by doing a symbol table search by name, hence allowed an + // arbitrary name in the symbol with no worry of collision. + // + void makeInternalVariable(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + } + + // + // Copy a variable or anonymous member's structure from a shared level so that + // it can be added (soon after return) to the symbol table where it can be + // modified without impacting other users of the shared table. + // + TSymbol* copyUpDeferredInsert(TSymbol* shared) + { + if (shared->getAsVariable()) { + TSymbol* copy = shared->clone(); + copy->setUniqueId(shared->getUniqueId()); + return copy; + } else { + const TAnonMember* anon = shared->getAsAnonMember(); + assert(anon); + TVariable* container = anon->getAnonContainer().clone(); + container->changeName(NewPoolTString("")); + container->setUniqueId(anon->getAnonContainer().getUniqueId()); + return container; + } + } + + TSymbol* copyUp(TSymbol* shared) + { + TSymbol* copy = copyUpDeferredInsert(shared); + table[globalLevel]->insert(*copy, separateNameSpaces); + if (shared->getAsVariable()) + return copy; + else { + // return the copy of the anonymous member + return table[globalLevel]->find(shared->getName()); + } + } + + // Normal find of a symbol, that can optionally say whether the symbol was found + // at a built-in level or the current top-scope level. + TSymbol* find(const TString& name, bool* builtIn = 0, bool* currentScope = 0, int* thisDepthP = 0) + { + int level = currentLevel(); + TSymbol* symbol; + int thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == nullptr && level >= 0); + level++; + if (builtIn) + *builtIn = isBuiltInLevel(level); + if (currentScope) + *currentScope = isGlobalLevel(currentLevel()) || level == currentLevel(); // consider shared levels as "current scope" WRT user globals + if (thisDepthP != nullptr) { + if (! table[level]->isThisLevel()) + thisDepth = 0; + *thisDepthP = thisDepth; + } + + return symbol; + } + + void retargetSymbol(const TString& from, const TString& to) { + int level = currentLevel(); + table[level]->retargetSymbol(from, to); + } + + + // Find of a symbol that returns how many layers deep of nested + // structures-with-member-functions ('this' scopes) deep the symbol was + // found in. + TSymbol* find(const TString& name, int& thisDepth) + { + int level = currentLevel(); + TSymbol* symbol; + thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == 0 && level >= 0); + + if (! table[level + 1]->isThisLevel()) + thisDepth = 0; + + return symbol; + } + + bool isFunctionNameVariable(const TString& name) const + { + if (separateNameSpaces) + return false; + + int level = currentLevel(); + do { + bool variable; + bool found = table[level]->findFunctionVariableName(name, variable); + if (found) + return variable; + --level; + } while (level >= 0); + + return false; + } + + void findFunctionNameList(const TString& name, TVector& list, bool& builtIn) + { + // For user levels, return the set found in the first scope with a match + builtIn = false; + int level = currentLevel(); + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (list.empty() && level >= globalLevel); + + if (! list.empty()) + return; + + // Gather across all built-in levels; they don't hide each other + builtIn = true; + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (level >= 0); + } + + void relateToOperator(const char* name, TOperator op) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->relateToOperator(name, op); + } + + void setFunctionExtensions(const char* name, int num, const char* const extensions[]) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->setFunctionExtensions(name, num, extensions); + } + + void setVariableExtensions(const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(name)); + if (symbol == nullptr) + return; + + symbol->setExtensions(numExts, extensions); + } + + void setVariableExtensions(const char* blockName, const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(blockName)); + if (symbol == nullptr) + return; + TVariable* variable = symbol->getAsVariable(); + assert(variable != nullptr); + + const TTypeList& structure = *variable->getAsVariable()->getType().getStruct(); + for (int member = 0; member < (int)structure.size(); ++member) { + if (structure[member].type->getFieldName().compare(name) == 0) { + variable->setMemberExtensions(member, numExts, extensions); + return; + } + } + } + + long long getMaxSymbolId() { return uniqueId; } +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + void copyTable(const TSymbolTable& copyOf); + + void setPreviousDefaultPrecisions(TPrecisionQualifier *p) { table[currentLevel()]->setPreviousDefaultPrecisions(p); } + + void readOnly() + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->readOnly(); + } + + // Add current level in the high-bits of unique id + void updateUniqueIdLevelFlag() { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uniqueId &= uniqueIdMask; + uniqueId |= (level << LevelFlagBitOffset); + } + + void overwriteUniqueId(long long id) + { + uniqueId = id; + updateUniqueIdLevelFlag(); + } + +protected: + TSymbolTable(TSymbolTable&); + TSymbolTable& operator=(TSymbolTableLevel&); + + int currentLevel() const { return static_cast(table.size()) - 1; } + std::vector table; + long long uniqueId; // for unique identification in code generation + bool noBuiltInRedeclarations; + bool separateNameSpaces; + unsigned int adoptedLevels; +}; + +} // end namespace glslang + +#endif // _SYMBOL_TABLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Versions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Versions.h new file mode 100644 index 0000000..f06abdd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/Versions.h @@ -0,0 +1,359 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _VERSIONS_INCLUDED_ +#define _VERSIONS_INCLUDED_ + +#define LAST_ELEMENT_MARKER(x) x + +// +// Help manage multiple profiles, versions, extensions etc. +// + +// +// Profiles are set up for masking operations, so queries can be done on multiple +// profiles at the same time. +// +// Don't maintain an ordinal set of enums (0,1,2,3...) to avoid all possible +// defects from mixing the two different forms. +// +typedef enum : unsigned { + EBadProfile = 0, + ENoProfile = (1 << 0), // only for desktop, before profiles showed up + ECoreProfile = (1 << 1), + ECompatibilityProfile = (1 << 2), + EEsProfile = (1 << 3), + LAST_ELEMENT_MARKER(EProfileCount), +} EProfile; + +namespace glslang { + +// +// Map from profile enum to externally readable text name. +// +inline const char* ProfileName(EProfile profile) +{ + switch (profile) { + case ENoProfile: return "none"; + case ECoreProfile: return "core"; + case ECompatibilityProfile: return "compatibility"; + case EEsProfile: return "es"; + default: return "unknown profile"; + } +} + +// +// What source rules, validation rules, target language, etc. are needed or +// desired for SPIR-V? +// +// 0 means a target or rule set is not enabled (ignore rules from that entity). +// Non-0 means to apply semantic rules arising from that version of its rule set. +// The union of all requested rule sets will be applied. +// +struct SpvVersion { + SpvVersion() : spv(0), vulkanGlsl(0), vulkan(0), openGl(0), vulkanRelaxed(false) {} + unsigned int spv; // the version of SPIR-V to target, as defined by "word 1" of the SPIR-V binary header + int vulkanGlsl; // the version of GLSL semantics for Vulkan, from GL_KHR_vulkan_glsl, for "#define VULKAN XXX" + int vulkan; // the version of Vulkan, for which SPIR-V execution environment rules to use + int openGl; // the version of GLSL semantics for OpenGL, from GL_ARB_gl_spirv, for "#define GL_SPIRV XXX" + bool vulkanRelaxed; // relax changes to GLSL for Vulkan, allowing some GL-specific to be compiled to Vulkan SPIR-V target +}; + +// +// The behaviors from the GLSL "#extension extension_name : behavior" +// +typedef enum { + EBhMissing = 0, + EBhRequire, + EBhEnable, + EBhWarn, + EBhDisable, + EBhDisablePartial // use as initial state of an extension that is only partially implemented +} TExtensionBehavior; + +// +// Symbolic names for extensions. Strings may be directly used when calling the +// functions, but better to have the compiler do spelling checks. +// +const char* const E_GL_OES_texture_3D = "GL_OES_texture_3D"; +const char* const E_GL_OES_standard_derivatives = "GL_OES_standard_derivatives"; +const char* const E_GL_EXT_frag_depth = "GL_EXT_frag_depth"; +const char* const E_GL_OES_EGL_image_external = "GL_OES_EGL_image_external"; +const char* const E_GL_OES_EGL_image_external_essl3 = "GL_OES_EGL_image_external_essl3"; +const char* const E_GL_EXT_YUV_target = "GL_EXT_YUV_target"; +const char* const E_GL_EXT_shader_texture_lod = "GL_EXT_shader_texture_lod"; +const char* const E_GL_EXT_shadow_samplers = "GL_EXT_shadow_samplers"; + +const char* const E_GL_ARB_texture_rectangle = "GL_ARB_texture_rectangle"; +const char* const E_GL_3DL_array_objects = "GL_3DL_array_objects"; +const char* const E_GL_ARB_shading_language_420pack = "GL_ARB_shading_language_420pack"; +const char* const E_GL_ARB_texture_gather = "GL_ARB_texture_gather"; +const char* const E_GL_ARB_gpu_shader5 = "GL_ARB_gpu_shader5"; +const char* const E_GL_ARB_separate_shader_objects = "GL_ARB_separate_shader_objects"; +const char* const E_GL_ARB_compute_shader = "GL_ARB_compute_shader"; +const char* const E_GL_ARB_tessellation_shader = "GL_ARB_tessellation_shader"; +const char* const E_GL_ARB_enhanced_layouts = "GL_ARB_enhanced_layouts"; +const char* const E_GL_ARB_texture_cube_map_array = "GL_ARB_texture_cube_map_array"; +const char* const E_GL_ARB_texture_multisample = "GL_ARB_texture_multisample"; +const char* const E_GL_ARB_shader_texture_lod = "GL_ARB_shader_texture_lod"; +const char* const E_GL_ARB_explicit_attrib_location = "GL_ARB_explicit_attrib_location"; +const char* const E_GL_ARB_explicit_uniform_location = "GL_ARB_explicit_uniform_location"; +const char* const E_GL_ARB_shader_image_load_store = "GL_ARB_shader_image_load_store"; +const char* const E_GL_ARB_shader_atomic_counters = "GL_ARB_shader_atomic_counters"; +const char* const E_GL_ARB_shader_atomic_counter_ops = "GL_ARB_shader_atomic_counter_ops"; +const char* const E_GL_ARB_shader_draw_parameters = "GL_ARB_shader_draw_parameters"; +const char* const E_GL_ARB_shader_group_vote = "GL_ARB_shader_group_vote"; +const char* const E_GL_ARB_derivative_control = "GL_ARB_derivative_control"; +const char* const E_GL_ARB_shader_texture_image_samples = "GL_ARB_shader_texture_image_samples"; +const char* const E_GL_ARB_viewport_array = "GL_ARB_viewport_array"; +const char* const E_GL_ARB_gpu_shader_int64 = "GL_ARB_gpu_shader_int64"; +const char* const E_GL_ARB_gpu_shader_fp64 = "GL_ARB_gpu_shader_fp64"; +const char* const E_GL_ARB_shader_ballot = "GL_ARB_shader_ballot"; +const char* const E_GL_ARB_sparse_texture2 = "GL_ARB_sparse_texture2"; +const char* const E_GL_ARB_sparse_texture_clamp = "GL_ARB_sparse_texture_clamp"; +const char* const E_GL_ARB_shader_stencil_export = "GL_ARB_shader_stencil_export"; +// const char* const E_GL_ARB_cull_distance = "GL_ARB_cull_distance"; // present for 4.5, but need extension control over block members +const char* const E_GL_ARB_post_depth_coverage = "GL_ARB_post_depth_coverage"; +const char* const E_GL_ARB_shader_viewport_layer_array = "GL_ARB_shader_viewport_layer_array"; +const char* const E_GL_ARB_fragment_shader_interlock = "GL_ARB_fragment_shader_interlock"; +const char* const E_GL_ARB_shader_clock = "GL_ARB_shader_clock"; +const char* const E_GL_ARB_uniform_buffer_object = "GL_ARB_uniform_buffer_object"; +const char* const E_GL_ARB_sample_shading = "GL_ARB_sample_shading"; +const char* const E_GL_ARB_shader_bit_encoding = "GL_ARB_shader_bit_encoding"; +const char* const E_GL_ARB_shader_image_size = "GL_ARB_shader_image_size"; +const char* const E_GL_ARB_shader_storage_buffer_object = "GL_ARB_shader_storage_buffer_object"; +const char* const E_GL_ARB_shading_language_packing = "GL_ARB_shading_language_packing"; +const char* const E_GL_ARB_texture_query_lod = "GL_ARB_texture_query_lod"; +const char* const E_GL_ARB_vertex_attrib_64bit = "GL_ARB_vertex_attrib_64bit"; +const char* const E_GL_ARB_draw_instanced = "GL_ARB_draw_instanced"; +const char* const E_GL_ARB_fragment_coord_conventions = "GL_ARB_fragment_coord_conventions"; + +const char* const E_GL_KHR_shader_subgroup_basic = "GL_KHR_shader_subgroup_basic"; +const char* const E_GL_KHR_shader_subgroup_vote = "GL_KHR_shader_subgroup_vote"; +const char* const E_GL_KHR_shader_subgroup_arithmetic = "GL_KHR_shader_subgroup_arithmetic"; +const char* const E_GL_KHR_shader_subgroup_ballot = "GL_KHR_shader_subgroup_ballot"; +const char* const E_GL_KHR_shader_subgroup_shuffle = "GL_KHR_shader_subgroup_shuffle"; +const char* const E_GL_KHR_shader_subgroup_shuffle_relative = "GL_KHR_shader_subgroup_shuffle_relative"; +const char* const E_GL_KHR_shader_subgroup_clustered = "GL_KHR_shader_subgroup_clustered"; +const char* const E_GL_KHR_shader_subgroup_quad = "GL_KHR_shader_subgroup_quad"; +const char* const E_GL_KHR_memory_scope_semantics = "GL_KHR_memory_scope_semantics"; + +const char* const E_GL_EXT_shader_atomic_int64 = "GL_EXT_shader_atomic_int64"; + +const char* const E_GL_EXT_shader_non_constant_global_initializers = "GL_EXT_shader_non_constant_global_initializers"; +const char* const E_GL_EXT_shader_image_load_formatted = "GL_EXT_shader_image_load_formatted"; + +const char* const E_GL_EXT_shader_16bit_storage = "GL_EXT_shader_16bit_storage"; +const char* const E_GL_EXT_shader_8bit_storage = "GL_EXT_shader_8bit_storage"; + + +// EXT extensions +const char* const E_GL_EXT_device_group = "GL_EXT_device_group"; +const char* const E_GL_EXT_multiview = "GL_EXT_multiview"; +const char* const E_GL_EXT_post_depth_coverage = "GL_EXT_post_depth_coverage"; +const char* const E_GL_EXT_control_flow_attributes = "GL_EXT_control_flow_attributes"; +const char* const E_GL_EXT_nonuniform_qualifier = "GL_EXT_nonuniform_qualifier"; +const char* const E_GL_EXT_samplerless_texture_functions = "GL_EXT_samplerless_texture_functions"; +const char* const E_GL_EXT_scalar_block_layout = "GL_EXT_scalar_block_layout"; +const char* const E_GL_EXT_fragment_invocation_density = "GL_EXT_fragment_invocation_density"; +const char* const E_GL_EXT_buffer_reference = "GL_EXT_buffer_reference"; +const char* const E_GL_EXT_buffer_reference2 = "GL_EXT_buffer_reference2"; +const char* const E_GL_EXT_buffer_reference_uvec2 = "GL_EXT_buffer_reference_uvec2"; +const char* const E_GL_EXT_demote_to_helper_invocation = "GL_EXT_demote_to_helper_invocation"; +const char* const E_GL_EXT_shader_realtime_clock = "GL_EXT_shader_realtime_clock"; +const char* const E_GL_EXT_debug_printf = "GL_EXT_debug_printf"; +const char* const E_GL_EXT_ray_tracing = "GL_EXT_ray_tracing"; +const char* const E_GL_EXT_ray_query = "GL_EXT_ray_query"; +const char* const E_GL_EXT_ray_flags_primitive_culling = "GL_EXT_ray_flags_primitive_culling"; +const char* const E_GL_EXT_ray_cull_mask = "GL_EXT_ray_cull_mask"; +const char* const E_GL_EXT_blend_func_extended = "GL_EXT_blend_func_extended"; +const char* const E_GL_EXT_shader_implicit_conversions = "GL_EXT_shader_implicit_conversions"; +const char* const E_GL_EXT_fragment_shading_rate = "GL_EXT_fragment_shading_rate"; +const char* const E_GL_EXT_shader_image_int64 = "GL_EXT_shader_image_int64"; +const char* const E_GL_EXT_null_initializer = "GL_EXT_null_initializer"; +const char* const E_GL_EXT_shared_memory_block = "GL_EXT_shared_memory_block"; +const char* const E_GL_EXT_subgroup_uniform_control_flow = "GL_EXT_subgroup_uniform_control_flow"; +const char* const E_GL_EXT_spirv_intrinsics = "GL_EXT_spirv_intrinsics"; +const char* const E_GL_EXT_fragment_shader_barycentric = "GL_EXT_fragment_shader_barycentric"; +const char* const E_GL_EXT_mesh_shader = "GL_EXT_mesh_shader"; +const char* const E_GL_EXT_opacity_micromap = "GL_EXT_opacity_micromap"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const post_depth_coverageEXTs[] = { E_GL_ARB_post_depth_coverage, E_GL_EXT_post_depth_coverage }; +const int Num_post_depth_coverageEXTs = sizeof(post_depth_coverageEXTs) / sizeof(post_depth_coverageEXTs[0]); + +// Array of extensions to cover both extensions providing ray tracing capabilities. +const char* const ray_tracing_EXTs[] = { E_GL_EXT_ray_query, E_GL_EXT_ray_tracing }; +const int Num_ray_tracing_EXTs = sizeof(ray_tracing_EXTs) / sizeof(ray_tracing_EXTs[0]); + +// OVR extensions +const char* const E_GL_OVR_multiview = "GL_OVR_multiview"; +const char* const E_GL_OVR_multiview2 = "GL_OVR_multiview2"; + +const char* const OVR_multiview_EXTs[] = { E_GL_OVR_multiview, E_GL_OVR_multiview2 }; +const int Num_OVR_multiview_EXTs = sizeof(OVR_multiview_EXTs) / sizeof(OVR_multiview_EXTs[0]); + +// #line and #include +const char* const E_GL_GOOGLE_cpp_style_line_directive = "GL_GOOGLE_cpp_style_line_directive"; +const char* const E_GL_GOOGLE_include_directive = "GL_GOOGLE_include_directive"; + +const char* const E_GL_AMD_shader_ballot = "GL_AMD_shader_ballot"; +const char* const E_GL_AMD_shader_trinary_minmax = "GL_AMD_shader_trinary_minmax"; +const char* const E_GL_AMD_shader_explicit_vertex_parameter = "GL_AMD_shader_explicit_vertex_parameter"; +const char* const E_GL_AMD_gcn_shader = "GL_AMD_gcn_shader"; +const char* const E_GL_AMD_gpu_shader_half_float = "GL_AMD_gpu_shader_half_float"; +const char* const E_GL_AMD_texture_gather_bias_lod = "GL_AMD_texture_gather_bias_lod"; +const char* const E_GL_AMD_gpu_shader_int16 = "GL_AMD_gpu_shader_int16"; +const char* const E_GL_AMD_shader_image_load_store_lod = "GL_AMD_shader_image_load_store_lod"; +const char* const E_GL_AMD_shader_fragment_mask = "GL_AMD_shader_fragment_mask"; +const char* const E_GL_AMD_gpu_shader_half_float_fetch = "GL_AMD_gpu_shader_half_float_fetch"; +const char* const E_GL_AMD_shader_early_and_late_fragment_tests = "GL_AMD_shader_early_and_late_fragment_tests"; + +const char* const E_GL_INTEL_shader_integer_functions2 = "GL_INTEL_shader_integer_functions2"; + +const char* const E_GL_NV_sample_mask_override_coverage = "GL_NV_sample_mask_override_coverage"; +const char* const E_SPV_NV_geometry_shader_passthrough = "GL_NV_geometry_shader_passthrough"; +const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2"; +const char* const E_GL_NV_stereo_view_rendering = "GL_NV_stereo_view_rendering"; +const char* const E_GL_NVX_multiview_per_view_attributes = "GL_NVX_multiview_per_view_attributes"; +const char* const E_GL_NV_shader_atomic_int64 = "GL_NV_shader_atomic_int64"; +const char* const E_GL_NV_conservative_raster_underestimation = "GL_NV_conservative_raster_underestimation"; +const char* const E_GL_NV_shader_noperspective_interpolation = "GL_NV_shader_noperspective_interpolation"; +const char* const E_GL_NV_shader_subgroup_partitioned = "GL_NV_shader_subgroup_partitioned"; +const char* const E_GL_NV_shading_rate_image = "GL_NV_shading_rate_image"; +const char* const E_GL_NV_ray_tracing = "GL_NV_ray_tracing"; +const char* const E_GL_NV_ray_tracing_motion_blur = "GL_NV_ray_tracing_motion_blur"; +const char* const E_GL_NV_fragment_shader_barycentric = "GL_NV_fragment_shader_barycentric"; +const char* const E_GL_NV_compute_shader_derivatives = "GL_NV_compute_shader_derivatives"; +const char* const E_GL_NV_shader_texture_footprint = "GL_NV_shader_texture_footprint"; +const char* const E_GL_NV_mesh_shader = "GL_NV_mesh_shader"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const viewportEXTs[] = { E_GL_ARB_shader_viewport_layer_array, E_GL_NV_viewport_array2 }; +const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]); + +const char* const E_GL_NV_cooperative_matrix = "GL_NV_cooperative_matrix"; +const char* const E_GL_NV_shader_sm_builtins = "GL_NV_shader_sm_builtins"; +const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer_cooperative_matrix"; + +// AEP +const char* const E_GL_ANDROID_extension_pack_es31a = "GL_ANDROID_extension_pack_es31a"; +const char* const E_GL_KHR_blend_equation_advanced = "GL_KHR_blend_equation_advanced"; +const char* const E_GL_OES_sample_variables = "GL_OES_sample_variables"; +const char* const E_GL_OES_shader_image_atomic = "GL_OES_shader_image_atomic"; +const char* const E_GL_OES_shader_multisample_interpolation = "GL_OES_shader_multisample_interpolation"; +const char* const E_GL_OES_texture_storage_multisample_2d_array = "GL_OES_texture_storage_multisample_2d_array"; +const char* const E_GL_EXT_geometry_shader = "GL_EXT_geometry_shader"; +const char* const E_GL_EXT_geometry_point_size = "GL_EXT_geometry_point_size"; +const char* const E_GL_EXT_gpu_shader5 = "GL_EXT_gpu_shader5"; +const char* const E_GL_EXT_primitive_bounding_box = "GL_EXT_primitive_bounding_box"; +const char* const E_GL_EXT_shader_io_blocks = "GL_EXT_shader_io_blocks"; +const char* const E_GL_EXT_tessellation_shader = "GL_EXT_tessellation_shader"; +const char* const E_GL_EXT_tessellation_point_size = "GL_EXT_tessellation_point_size"; +const char* const E_GL_EXT_texture_buffer = "GL_EXT_texture_buffer"; +const char* const E_GL_EXT_texture_cube_map_array = "GL_EXT_texture_cube_map_array"; +const char* const E_GL_EXT_shader_integer_mix = "GL_EXT_shader_integer_mix"; + +// OES matching AEP +const char* const E_GL_OES_geometry_shader = "GL_OES_geometry_shader"; +const char* const E_GL_OES_geometry_point_size = "GL_OES_geometry_point_size"; +const char* const E_GL_OES_gpu_shader5 = "GL_OES_gpu_shader5"; +const char* const E_GL_OES_primitive_bounding_box = "GL_OES_primitive_bounding_box"; +const char* const E_GL_OES_shader_io_blocks = "GL_OES_shader_io_blocks"; +const char* const E_GL_OES_tessellation_shader = "GL_OES_tessellation_shader"; +const char* const E_GL_OES_tessellation_point_size = "GL_OES_tessellation_point_size"; +const char* const E_GL_OES_texture_buffer = "GL_OES_texture_buffer"; +const char* const E_GL_OES_texture_cube_map_array = "GL_OES_texture_cube_map_array"; + +// EXT +const char* const E_GL_EXT_shader_explicit_arithmetic_types = "GL_EXT_shader_explicit_arithmetic_types"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int8 = "GL_EXT_shader_explicit_arithmetic_types_int8"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int16 = "GL_EXT_shader_explicit_arithmetic_types_int16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int32 = "GL_EXT_shader_explicit_arithmetic_types_int32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int64 = "GL_EXT_shader_explicit_arithmetic_types_int64"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float16 = "GL_EXT_shader_explicit_arithmetic_types_float16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float32 = "GL_EXT_shader_explicit_arithmetic_types_float32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float64 = "GL_EXT_shader_explicit_arithmetic_types_float64"; + +const char* const E_GL_EXT_shader_subgroup_extended_types_int8 = "GL_EXT_shader_subgroup_extended_types_int8"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int16 = "GL_EXT_shader_subgroup_extended_types_int16"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int64 = "GL_EXT_shader_subgroup_extended_types_int64"; +const char* const E_GL_EXT_shader_subgroup_extended_types_float16 = "GL_EXT_shader_subgroup_extended_types_float16"; +const char* const E_GL_EXT_terminate_invocation = "GL_EXT_terminate_invocation"; + +const char* const E_GL_EXT_shader_atomic_float = "GL_EXT_shader_atomic_float"; +const char* const E_GL_EXT_shader_atomic_float2 = "GL_EXT_shader_atomic_float2"; + +// Arrays of extensions for the above AEP duplications + +const char* const AEP_geometry_shader[] = { E_GL_EXT_geometry_shader, E_GL_OES_geometry_shader }; +const int Num_AEP_geometry_shader = sizeof(AEP_geometry_shader)/sizeof(AEP_geometry_shader[0]); + +const char* const AEP_geometry_point_size[] = { E_GL_EXT_geometry_point_size, E_GL_OES_geometry_point_size }; +const int Num_AEP_geometry_point_size = sizeof(AEP_geometry_point_size)/sizeof(AEP_geometry_point_size[0]); + +const char* const AEP_gpu_shader5[] = { E_GL_EXT_gpu_shader5, E_GL_OES_gpu_shader5 }; +const int Num_AEP_gpu_shader5 = sizeof(AEP_gpu_shader5)/sizeof(AEP_gpu_shader5[0]); + +const char* const AEP_primitive_bounding_box[] = { E_GL_EXT_primitive_bounding_box, E_GL_OES_primitive_bounding_box }; +const int Num_AEP_primitive_bounding_box = sizeof(AEP_primitive_bounding_box)/sizeof(AEP_primitive_bounding_box[0]); + +const char* const AEP_shader_io_blocks[] = { E_GL_EXT_shader_io_blocks, E_GL_OES_shader_io_blocks }; +const int Num_AEP_shader_io_blocks = sizeof(AEP_shader_io_blocks)/sizeof(AEP_shader_io_blocks[0]); + +const char* const AEP_tessellation_shader[] = { E_GL_EXT_tessellation_shader, E_GL_OES_tessellation_shader }; +const int Num_AEP_tessellation_shader = sizeof(AEP_tessellation_shader)/sizeof(AEP_tessellation_shader[0]); + +const char* const AEP_tessellation_point_size[] = { E_GL_EXT_tessellation_point_size, E_GL_OES_tessellation_point_size }; +const int Num_AEP_tessellation_point_size = sizeof(AEP_tessellation_point_size)/sizeof(AEP_tessellation_point_size[0]); + +const char* const AEP_texture_buffer[] = { E_GL_EXT_texture_buffer, E_GL_OES_texture_buffer }; +const int Num_AEP_texture_buffer = sizeof(AEP_texture_buffer)/sizeof(AEP_texture_buffer[0]); + +const char* const AEP_texture_cube_map_array[] = { E_GL_EXT_texture_cube_map_array, E_GL_OES_texture_cube_map_array }; +const int Num_AEP_texture_cube_map_array = sizeof(AEP_texture_cube_map_array)/sizeof(AEP_texture_cube_map_array[0]); + +const char* const AEP_mesh_shader[] = { E_GL_NV_mesh_shader, E_GL_EXT_mesh_shader }; +const int Num_AEP_mesh_shader = sizeof(AEP_mesh_shader)/sizeof(AEP_mesh_shader[0]); + +} // end namespace glslang + +#endif // _VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/attribute.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/attribute.h new file mode 100644 index 0000000..c5b2917 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/attribute.h @@ -0,0 +1,150 @@ +// +// Copyright (C) 2017 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _ATTRIBUTE_INCLUDED_ +#define _ATTRIBUTE_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + + enum TAttributeType { + EatNone, + EatAllow_uav_condition, + EatBranch, + EatCall, + EatDomain, + EatEarlyDepthStencil, + EatFastOpt, + EatFlatten, + EatForceCase, + EatInstance, + EatMaxTessFactor, + EatNumThreads, + EatMaxVertexCount, + EatOutputControlPoints, + EatOutputTopology, + EatPartitioning, + EatPatchConstantFunc, + EatPatchSize, + EatUnroll, + EatLoop, + EatBinding, + EatGlobalBinding, + EatLocation, + EatInputAttachment, + EatBuiltIn, + EatPushConstant, + EatConstantId, + EatDependencyInfinite, + EatDependencyLength, + EatMinIterations, + EatMaxIterations, + EatIterationMultiple, + EatPeelCount, + EatPartialCount, + EatFormatRgba32f, + EatFormatRgba16f, + EatFormatR32f, + EatFormatRgba8, + EatFormatRgba8Snorm, + EatFormatRg32f, + EatFormatRg16f, + EatFormatR11fG11fB10f, + EatFormatR16f, + EatFormatRgba16, + EatFormatRgb10A2, + EatFormatRg16, + EatFormatRg8, + EatFormatR16, + EatFormatR8, + EatFormatRgba16Snorm, + EatFormatRg16Snorm, + EatFormatRg8Snorm, + EatFormatR16Snorm, + EatFormatR8Snorm, + EatFormatRgba32i, + EatFormatRgba16i, + EatFormatRgba8i, + EatFormatR32i, + EatFormatRg32i, + EatFormatRg16i, + EatFormatRg8i, + EatFormatR16i, + EatFormatR8i, + EatFormatRgba32ui, + EatFormatRgba16ui, + EatFormatRgba8ui, + EatFormatR32ui, + EatFormatRgb10a2ui, + EatFormatRg32ui, + EatFormatRg16ui, + EatFormatRg8ui, + EatFormatR16ui, + EatFormatR8ui, + EatFormatUnknown, + EatNonWritable, + EatNonReadable, + EatSubgroupUniformControlFlow, + }; + + class TIntermAggregate; + + struct TAttributeArgs { + TAttributeType name; + const TIntermAggregate* args; + + // Obtain attribute as integer + // Return false if it cannot be obtained + bool getInt(int& value, int argNum = 0) const; + + // Obtain attribute as string, with optional to-lower transform + // Return false if it cannot be obtained + bool getString(TString& value, int argNum = 0, bool convertToLower = true) const; + + // How many arguments were provided to the attribute? + int size() const; + + protected: + const TConstUnion* getConstUnion(TBasicType basicType, int argNum) const; + }; + + typedef TList TAttributes; + +} // end namespace glslang + +#endif // _ATTRIBUTE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/gl_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/gl_types.h new file mode 100644 index 0000000..d6c9393 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/gl_types.h @@ -0,0 +1,218 @@ +/* +** Copyright (c) 2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#pragma once + +#define GL_FLOAT 0x1406 +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 + +#define GL_DOUBLE 0x140A +#define GL_DOUBLE_VEC2 0x8FFC +#define GL_DOUBLE_VEC3 0x8FFD +#define GL_DOUBLE_VEC4 0x8FFE + +#define GL_INT 0x1404 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 + +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 + +#define GL_INT64_ARB 0x140E +#define GL_INT64_VEC2_ARB 0x8FE9 +#define GL_INT64_VEC3_ARB 0x8FEA +#define GL_INT64_VEC4_ARB 0x8FEB + +#define GL_UNSIGNED_INT64_ARB 0x140F +#define GL_UNSIGNED_INT64_VEC2_ARB 0x8FF5 +#define GL_UNSIGNED_INT64_VEC3_ARB 0x8FF6 +#define GL_UNSIGNED_INT64_VEC4_ARB 0x8FF7 +#define GL_UNSIGNED_INT16_VEC2_NV 0x8FF1 +#define GL_UNSIGNED_INT16_VEC3_NV 0x8FF2 +#define GL_UNSIGNED_INT16_VEC4_NV 0x8FF3 + +#define GL_INT16_NV 0x8FE4 +#define GL_INT16_VEC2_NV 0x8FE5 +#define GL_INT16_VEC3_NV 0x8FE6 +#define GL_INT16_VEC4_NV 0x8FE7 + +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 + +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A + +#define GL_DOUBLE_MAT2 0x8F46 +#define GL_DOUBLE_MAT3 0x8F47 +#define GL_DOUBLE_MAT4 0x8F48 +#define GL_DOUBLE_MAT2x3 0x8F49 +#define GL_DOUBLE_MAT2x4 0x8F4A +#define GL_DOUBLE_MAT3x2 0x8F4B +#define GL_DOUBLE_MAT3x4 0x8F4C +#define GL_DOUBLE_MAT4x2 0x8F4D +#define GL_DOUBLE_MAT4x3 0x8F4E + +// Those constants are borrowed from extension NV_gpu_shader5 +#define GL_FLOAT16_NV 0x8FF8 +#define GL_FLOAT16_VEC2_NV 0x8FF9 +#define GL_FLOAT16_VEC3_NV 0x8FFA +#define GL_FLOAT16_VEC4_NV 0x8FFB + +#define GL_FLOAT16_MAT2_AMD 0x91C5 +#define GL_FLOAT16_MAT3_AMD 0x91C6 +#define GL_FLOAT16_MAT4_AMD 0x91C7 +#define GL_FLOAT16_MAT2x3_AMD 0x91C8 +#define GL_FLOAT16_MAT2x4_AMD 0x91C9 +#define GL_FLOAT16_MAT3x2_AMD 0x91CA +#define GL_FLOAT16_MAT3x4_AMD 0x91CB +#define GL_FLOAT16_MAT4x2_AMD 0x91CC +#define GL_FLOAT16_MAT4x3_AMD 0x91CD + +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D +#define GL_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW_ARB 0x900D + +#define GL_FLOAT16_SAMPLER_1D_AMD 0x91CE +#define GL_FLOAT16_SAMPLER_2D_AMD 0x91CF +#define GL_FLOAT16_SAMPLER_3D_AMD 0x91D0 +#define GL_FLOAT16_SAMPLER_CUBE_AMD 0x91D1 +#define GL_FLOAT16_SAMPLER_2D_RECT_AMD 0x91D2 +#define GL_FLOAT16_SAMPLER_1D_ARRAY_AMD 0x91D3 +#define GL_FLOAT16_SAMPLER_2D_ARRAY_AMD 0x91D4 +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_AMD 0x91D5 +#define GL_FLOAT16_SAMPLER_BUFFER_AMD 0x91D6 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_AMD 0x91D7 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_ARRAY_AMD 0x91D8 + +#define GL_FLOAT16_SAMPLER_1D_SHADOW_AMD 0x91D9 +#define GL_FLOAT16_SAMPLER_2D_SHADOW_AMD 0x91DA +#define GL_FLOAT16_SAMPLER_2D_RECT_SHADOW_AMD 0x91DB +#define GL_FLOAT16_SAMPLER_1D_ARRAY_SHADOW_AMD 0x91DC +#define GL_FLOAT16_SAMPLER_2D_ARRAY_SHADOW_AMD 0x91DD +#define GL_FLOAT16_SAMPLER_CUBE_SHADOW_AMD 0x91DE +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_SHADOW_AMD 0x91DF + +#define GL_FLOAT16_IMAGE_1D_AMD 0x91E0 +#define GL_FLOAT16_IMAGE_2D_AMD 0x91E1 +#define GL_FLOAT16_IMAGE_3D_AMD 0x91E2 +#define GL_FLOAT16_IMAGE_2D_RECT_AMD 0x91E3 +#define GL_FLOAT16_IMAGE_CUBE_AMD 0x91E4 +#define GL_FLOAT16_IMAGE_1D_ARRAY_AMD 0x91E5 +#define GL_FLOAT16_IMAGE_2D_ARRAY_AMD 0x91E6 +#define GL_FLOAT16_IMAGE_CUBE_MAP_ARRAY_AMD 0x91E7 +#define GL_FLOAT16_IMAGE_BUFFER_AMD 0x91E8 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_AMD 0x91E9 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_ARRAY_AMD 0x91EA + +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900E + +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900F +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A + +#define GL_IMAGE_1D 0x904C +#define GL_IMAGE_2D 0x904D +#define GL_IMAGE_3D 0x904E +#define GL_IMAGE_2D_RECT 0x904F +#define GL_IMAGE_CUBE 0x9050 +#define GL_IMAGE_BUFFER 0x9051 +#define GL_IMAGE_1D_ARRAY 0x9052 +#define GL_IMAGE_2D_ARRAY 0x9053 +#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054 +#define GL_IMAGE_2D_MULTISAMPLE 0x9055 +#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056 +#define GL_INT_IMAGE_1D 0x9057 +#define GL_INT_IMAGE_2D 0x9058 +#define GL_INT_IMAGE_3D 0x9059 +#define GL_INT_IMAGE_2D_RECT 0x905A +#define GL_INT_IMAGE_CUBE 0x905B +#define GL_INT_IMAGE_BUFFER 0x905C +#define GL_INT_IMAGE_1D_ARRAY 0x905D +#define GL_INT_IMAGE_2D_ARRAY 0x905E +#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F +#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060 +#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061 +#define GL_UNSIGNED_INT_IMAGE_1D 0x9062 +#define GL_UNSIGNED_INT_IMAGE_2D 0x9063 +#define GL_UNSIGNED_INT_IMAGE_3D 0x9064 +#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065 +#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066 +#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067 +#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068 +#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069 +#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C + +#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/glslang_tab.cpp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/glslang_tab.cpp.h new file mode 100644 index 0000000..18cef46 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/glslang_tab.cpp.h @@ -0,0 +1,571 @@ +/* A Bison parser, made by GNU Bison 3.7.4. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +#ifndef YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +# define YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 1 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token kinds. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + YYEMPTY = -2, + YYEOF = 0, /* "end of file" */ + YYerror = 256, /* error */ + YYUNDEF = 257, /* "invalid token" */ + CONST = 258, /* CONST */ + BOOL = 259, /* BOOL */ + INT = 260, /* INT */ + UINT = 261, /* UINT */ + FLOAT = 262, /* FLOAT */ + BVEC2 = 263, /* BVEC2 */ + BVEC3 = 264, /* BVEC3 */ + BVEC4 = 265, /* BVEC4 */ + IVEC2 = 266, /* IVEC2 */ + IVEC3 = 267, /* IVEC3 */ + IVEC4 = 268, /* IVEC4 */ + UVEC2 = 269, /* UVEC2 */ + UVEC3 = 270, /* UVEC3 */ + UVEC4 = 271, /* UVEC4 */ + VEC2 = 272, /* VEC2 */ + VEC3 = 273, /* VEC3 */ + VEC4 = 274, /* VEC4 */ + MAT2 = 275, /* MAT2 */ + MAT3 = 276, /* MAT3 */ + MAT4 = 277, /* MAT4 */ + MAT2X2 = 278, /* MAT2X2 */ + MAT2X3 = 279, /* MAT2X3 */ + MAT2X4 = 280, /* MAT2X4 */ + MAT3X2 = 281, /* MAT3X2 */ + MAT3X3 = 282, /* MAT3X3 */ + MAT3X4 = 283, /* MAT3X4 */ + MAT4X2 = 284, /* MAT4X2 */ + MAT4X3 = 285, /* MAT4X3 */ + MAT4X4 = 286, /* MAT4X4 */ + SAMPLER2D = 287, /* SAMPLER2D */ + SAMPLER3D = 288, /* SAMPLER3D */ + SAMPLERCUBE = 289, /* SAMPLERCUBE */ + SAMPLER2DSHADOW = 290, /* SAMPLER2DSHADOW */ + SAMPLERCUBESHADOW = 291, /* SAMPLERCUBESHADOW */ + SAMPLER2DARRAY = 292, /* SAMPLER2DARRAY */ + SAMPLER2DARRAYSHADOW = 293, /* SAMPLER2DARRAYSHADOW */ + ISAMPLER2D = 294, /* ISAMPLER2D */ + ISAMPLER3D = 295, /* ISAMPLER3D */ + ISAMPLERCUBE = 296, /* ISAMPLERCUBE */ + ISAMPLER2DARRAY = 297, /* ISAMPLER2DARRAY */ + USAMPLER2D = 298, /* USAMPLER2D */ + USAMPLER3D = 299, /* USAMPLER3D */ + USAMPLERCUBE = 300, /* USAMPLERCUBE */ + USAMPLER2DARRAY = 301, /* USAMPLER2DARRAY */ + SAMPLER = 302, /* SAMPLER */ + SAMPLERSHADOW = 303, /* SAMPLERSHADOW */ + TEXTURE2D = 304, /* TEXTURE2D */ + TEXTURE3D = 305, /* TEXTURE3D */ + TEXTURECUBE = 306, /* TEXTURECUBE */ + TEXTURE2DARRAY = 307, /* TEXTURE2DARRAY */ + ITEXTURE2D = 308, /* ITEXTURE2D */ + ITEXTURE3D = 309, /* ITEXTURE3D */ + ITEXTURECUBE = 310, /* ITEXTURECUBE */ + ITEXTURE2DARRAY = 311, /* ITEXTURE2DARRAY */ + UTEXTURE2D = 312, /* UTEXTURE2D */ + UTEXTURE3D = 313, /* UTEXTURE3D */ + UTEXTURECUBE = 314, /* UTEXTURECUBE */ + UTEXTURE2DARRAY = 315, /* UTEXTURE2DARRAY */ + ATTRIBUTE = 316, /* ATTRIBUTE */ + VARYING = 317, /* VARYING */ + FLOAT16_T = 318, /* FLOAT16_T */ + FLOAT32_T = 319, /* FLOAT32_T */ + DOUBLE = 320, /* DOUBLE */ + FLOAT64_T = 321, /* FLOAT64_T */ + INT64_T = 322, /* INT64_T */ + UINT64_T = 323, /* UINT64_T */ + INT32_T = 324, /* INT32_T */ + UINT32_T = 325, /* UINT32_T */ + INT16_T = 326, /* INT16_T */ + UINT16_T = 327, /* UINT16_T */ + INT8_T = 328, /* INT8_T */ + UINT8_T = 329, /* UINT8_T */ + I64VEC2 = 330, /* I64VEC2 */ + I64VEC3 = 331, /* I64VEC3 */ + I64VEC4 = 332, /* I64VEC4 */ + U64VEC2 = 333, /* U64VEC2 */ + U64VEC3 = 334, /* U64VEC3 */ + U64VEC4 = 335, /* U64VEC4 */ + I32VEC2 = 336, /* I32VEC2 */ + I32VEC3 = 337, /* I32VEC3 */ + I32VEC4 = 338, /* I32VEC4 */ + U32VEC2 = 339, /* U32VEC2 */ + U32VEC3 = 340, /* U32VEC3 */ + U32VEC4 = 341, /* U32VEC4 */ + I16VEC2 = 342, /* I16VEC2 */ + I16VEC3 = 343, /* I16VEC3 */ + I16VEC4 = 344, /* I16VEC4 */ + U16VEC2 = 345, /* U16VEC2 */ + U16VEC3 = 346, /* U16VEC3 */ + U16VEC4 = 347, /* U16VEC4 */ + I8VEC2 = 348, /* I8VEC2 */ + I8VEC3 = 349, /* I8VEC3 */ + I8VEC4 = 350, /* I8VEC4 */ + U8VEC2 = 351, /* U8VEC2 */ + U8VEC3 = 352, /* U8VEC3 */ + U8VEC4 = 353, /* U8VEC4 */ + DVEC2 = 354, /* DVEC2 */ + DVEC3 = 355, /* DVEC3 */ + DVEC4 = 356, /* DVEC4 */ + DMAT2 = 357, /* DMAT2 */ + DMAT3 = 358, /* DMAT3 */ + DMAT4 = 359, /* DMAT4 */ + F16VEC2 = 360, /* F16VEC2 */ + F16VEC3 = 361, /* F16VEC3 */ + F16VEC4 = 362, /* F16VEC4 */ + F16MAT2 = 363, /* F16MAT2 */ + F16MAT3 = 364, /* F16MAT3 */ + F16MAT4 = 365, /* F16MAT4 */ + F32VEC2 = 366, /* F32VEC2 */ + F32VEC3 = 367, /* F32VEC3 */ + F32VEC4 = 368, /* F32VEC4 */ + F32MAT2 = 369, /* F32MAT2 */ + F32MAT3 = 370, /* F32MAT3 */ + F32MAT4 = 371, /* F32MAT4 */ + F64VEC2 = 372, /* F64VEC2 */ + F64VEC3 = 373, /* F64VEC3 */ + F64VEC4 = 374, /* F64VEC4 */ + F64MAT2 = 375, /* F64MAT2 */ + F64MAT3 = 376, /* F64MAT3 */ + F64MAT4 = 377, /* F64MAT4 */ + DMAT2X2 = 378, /* DMAT2X2 */ + DMAT2X3 = 379, /* DMAT2X3 */ + DMAT2X4 = 380, /* DMAT2X4 */ + DMAT3X2 = 381, /* DMAT3X2 */ + DMAT3X3 = 382, /* DMAT3X3 */ + DMAT3X4 = 383, /* DMAT3X4 */ + DMAT4X2 = 384, /* DMAT4X2 */ + DMAT4X3 = 385, /* DMAT4X3 */ + DMAT4X4 = 386, /* DMAT4X4 */ + F16MAT2X2 = 387, /* F16MAT2X2 */ + F16MAT2X3 = 388, /* F16MAT2X3 */ + F16MAT2X4 = 389, /* F16MAT2X4 */ + F16MAT3X2 = 390, /* F16MAT3X2 */ + F16MAT3X3 = 391, /* F16MAT3X3 */ + F16MAT3X4 = 392, /* F16MAT3X4 */ + F16MAT4X2 = 393, /* F16MAT4X2 */ + F16MAT4X3 = 394, /* F16MAT4X3 */ + F16MAT4X4 = 395, /* F16MAT4X4 */ + F32MAT2X2 = 396, /* F32MAT2X2 */ + F32MAT2X3 = 397, /* F32MAT2X3 */ + F32MAT2X4 = 398, /* F32MAT2X4 */ + F32MAT3X2 = 399, /* F32MAT3X2 */ + F32MAT3X3 = 400, /* F32MAT3X3 */ + F32MAT3X4 = 401, /* F32MAT3X4 */ + F32MAT4X2 = 402, /* F32MAT4X2 */ + F32MAT4X3 = 403, /* F32MAT4X3 */ + F32MAT4X4 = 404, /* F32MAT4X4 */ + F64MAT2X2 = 405, /* F64MAT2X2 */ + F64MAT2X3 = 406, /* F64MAT2X3 */ + F64MAT2X4 = 407, /* F64MAT2X4 */ + F64MAT3X2 = 408, /* F64MAT3X2 */ + F64MAT3X3 = 409, /* F64MAT3X3 */ + F64MAT3X4 = 410, /* F64MAT3X4 */ + F64MAT4X2 = 411, /* F64MAT4X2 */ + F64MAT4X3 = 412, /* F64MAT4X3 */ + F64MAT4X4 = 413, /* F64MAT4X4 */ + ATOMIC_UINT = 414, /* ATOMIC_UINT */ + ACCSTRUCTNV = 415, /* ACCSTRUCTNV */ + ACCSTRUCTEXT = 416, /* ACCSTRUCTEXT */ + RAYQUERYEXT = 417, /* RAYQUERYEXT */ + FCOOPMATNV = 418, /* FCOOPMATNV */ + ICOOPMATNV = 419, /* ICOOPMATNV */ + UCOOPMATNV = 420, /* UCOOPMATNV */ + SAMPLERCUBEARRAY = 421, /* SAMPLERCUBEARRAY */ + SAMPLERCUBEARRAYSHADOW = 422, /* SAMPLERCUBEARRAYSHADOW */ + ISAMPLERCUBEARRAY = 423, /* ISAMPLERCUBEARRAY */ + USAMPLERCUBEARRAY = 424, /* USAMPLERCUBEARRAY */ + SAMPLER1D = 425, /* SAMPLER1D */ + SAMPLER1DARRAY = 426, /* SAMPLER1DARRAY */ + SAMPLER1DARRAYSHADOW = 427, /* SAMPLER1DARRAYSHADOW */ + ISAMPLER1D = 428, /* ISAMPLER1D */ + SAMPLER1DSHADOW = 429, /* SAMPLER1DSHADOW */ + SAMPLER2DRECT = 430, /* SAMPLER2DRECT */ + SAMPLER2DRECTSHADOW = 431, /* SAMPLER2DRECTSHADOW */ + ISAMPLER2DRECT = 432, /* ISAMPLER2DRECT */ + USAMPLER2DRECT = 433, /* USAMPLER2DRECT */ + SAMPLERBUFFER = 434, /* SAMPLERBUFFER */ + ISAMPLERBUFFER = 435, /* ISAMPLERBUFFER */ + USAMPLERBUFFER = 436, /* USAMPLERBUFFER */ + SAMPLER2DMS = 437, /* SAMPLER2DMS */ + ISAMPLER2DMS = 438, /* ISAMPLER2DMS */ + USAMPLER2DMS = 439, /* USAMPLER2DMS */ + SAMPLER2DMSARRAY = 440, /* SAMPLER2DMSARRAY */ + ISAMPLER2DMSARRAY = 441, /* ISAMPLER2DMSARRAY */ + USAMPLER2DMSARRAY = 442, /* USAMPLER2DMSARRAY */ + SAMPLEREXTERNALOES = 443, /* SAMPLEREXTERNALOES */ + SAMPLEREXTERNAL2DY2YEXT = 444, /* SAMPLEREXTERNAL2DY2YEXT */ + ISAMPLER1DARRAY = 445, /* ISAMPLER1DARRAY */ + USAMPLER1D = 446, /* USAMPLER1D */ + USAMPLER1DARRAY = 447, /* USAMPLER1DARRAY */ + F16SAMPLER1D = 448, /* F16SAMPLER1D */ + F16SAMPLER2D = 449, /* F16SAMPLER2D */ + F16SAMPLER3D = 450, /* F16SAMPLER3D */ + F16SAMPLER2DRECT = 451, /* F16SAMPLER2DRECT */ + F16SAMPLERCUBE = 452, /* F16SAMPLERCUBE */ + F16SAMPLER1DARRAY = 453, /* F16SAMPLER1DARRAY */ + F16SAMPLER2DARRAY = 454, /* F16SAMPLER2DARRAY */ + F16SAMPLERCUBEARRAY = 455, /* F16SAMPLERCUBEARRAY */ + F16SAMPLERBUFFER = 456, /* F16SAMPLERBUFFER */ + F16SAMPLER2DMS = 457, /* F16SAMPLER2DMS */ + F16SAMPLER2DMSARRAY = 458, /* F16SAMPLER2DMSARRAY */ + F16SAMPLER1DSHADOW = 459, /* F16SAMPLER1DSHADOW */ + F16SAMPLER2DSHADOW = 460, /* F16SAMPLER2DSHADOW */ + F16SAMPLER1DARRAYSHADOW = 461, /* F16SAMPLER1DARRAYSHADOW */ + F16SAMPLER2DARRAYSHADOW = 462, /* F16SAMPLER2DARRAYSHADOW */ + F16SAMPLER2DRECTSHADOW = 463, /* F16SAMPLER2DRECTSHADOW */ + F16SAMPLERCUBESHADOW = 464, /* F16SAMPLERCUBESHADOW */ + F16SAMPLERCUBEARRAYSHADOW = 465, /* F16SAMPLERCUBEARRAYSHADOW */ + IMAGE1D = 466, /* IMAGE1D */ + IIMAGE1D = 467, /* IIMAGE1D */ + UIMAGE1D = 468, /* UIMAGE1D */ + IMAGE2D = 469, /* IMAGE2D */ + IIMAGE2D = 470, /* IIMAGE2D */ + UIMAGE2D = 471, /* UIMAGE2D */ + IMAGE3D = 472, /* IMAGE3D */ + IIMAGE3D = 473, /* IIMAGE3D */ + UIMAGE3D = 474, /* UIMAGE3D */ + IMAGE2DRECT = 475, /* IMAGE2DRECT */ + IIMAGE2DRECT = 476, /* IIMAGE2DRECT */ + UIMAGE2DRECT = 477, /* UIMAGE2DRECT */ + IMAGECUBE = 478, /* IMAGECUBE */ + IIMAGECUBE = 479, /* IIMAGECUBE */ + UIMAGECUBE = 480, /* UIMAGECUBE */ + IMAGEBUFFER = 481, /* IMAGEBUFFER */ + IIMAGEBUFFER = 482, /* IIMAGEBUFFER */ + UIMAGEBUFFER = 483, /* UIMAGEBUFFER */ + IMAGE1DARRAY = 484, /* IMAGE1DARRAY */ + IIMAGE1DARRAY = 485, /* IIMAGE1DARRAY */ + UIMAGE1DARRAY = 486, /* UIMAGE1DARRAY */ + IMAGE2DARRAY = 487, /* IMAGE2DARRAY */ + IIMAGE2DARRAY = 488, /* IIMAGE2DARRAY */ + UIMAGE2DARRAY = 489, /* UIMAGE2DARRAY */ + IMAGECUBEARRAY = 490, /* IMAGECUBEARRAY */ + IIMAGECUBEARRAY = 491, /* IIMAGECUBEARRAY */ + UIMAGECUBEARRAY = 492, /* UIMAGECUBEARRAY */ + IMAGE2DMS = 493, /* IMAGE2DMS */ + IIMAGE2DMS = 494, /* IIMAGE2DMS */ + UIMAGE2DMS = 495, /* UIMAGE2DMS */ + IMAGE2DMSARRAY = 496, /* IMAGE2DMSARRAY */ + IIMAGE2DMSARRAY = 497, /* IIMAGE2DMSARRAY */ + UIMAGE2DMSARRAY = 498, /* UIMAGE2DMSARRAY */ + F16IMAGE1D = 499, /* F16IMAGE1D */ + F16IMAGE2D = 500, /* F16IMAGE2D */ + F16IMAGE3D = 501, /* F16IMAGE3D */ + F16IMAGE2DRECT = 502, /* F16IMAGE2DRECT */ + F16IMAGECUBE = 503, /* F16IMAGECUBE */ + F16IMAGE1DARRAY = 504, /* F16IMAGE1DARRAY */ + F16IMAGE2DARRAY = 505, /* F16IMAGE2DARRAY */ + F16IMAGECUBEARRAY = 506, /* F16IMAGECUBEARRAY */ + F16IMAGEBUFFER = 507, /* F16IMAGEBUFFER */ + F16IMAGE2DMS = 508, /* F16IMAGE2DMS */ + F16IMAGE2DMSARRAY = 509, /* F16IMAGE2DMSARRAY */ + I64IMAGE1D = 510, /* I64IMAGE1D */ + U64IMAGE1D = 511, /* U64IMAGE1D */ + I64IMAGE2D = 512, /* I64IMAGE2D */ + U64IMAGE2D = 513, /* U64IMAGE2D */ + I64IMAGE3D = 514, /* I64IMAGE3D */ + U64IMAGE3D = 515, /* U64IMAGE3D */ + I64IMAGE2DRECT = 516, /* I64IMAGE2DRECT */ + U64IMAGE2DRECT = 517, /* U64IMAGE2DRECT */ + I64IMAGECUBE = 518, /* I64IMAGECUBE */ + U64IMAGECUBE = 519, /* U64IMAGECUBE */ + I64IMAGEBUFFER = 520, /* I64IMAGEBUFFER */ + U64IMAGEBUFFER = 521, /* U64IMAGEBUFFER */ + I64IMAGE1DARRAY = 522, /* I64IMAGE1DARRAY */ + U64IMAGE1DARRAY = 523, /* U64IMAGE1DARRAY */ + I64IMAGE2DARRAY = 524, /* I64IMAGE2DARRAY */ + U64IMAGE2DARRAY = 525, /* U64IMAGE2DARRAY */ + I64IMAGECUBEARRAY = 526, /* I64IMAGECUBEARRAY */ + U64IMAGECUBEARRAY = 527, /* U64IMAGECUBEARRAY */ + I64IMAGE2DMS = 528, /* I64IMAGE2DMS */ + U64IMAGE2DMS = 529, /* U64IMAGE2DMS */ + I64IMAGE2DMSARRAY = 530, /* I64IMAGE2DMSARRAY */ + U64IMAGE2DMSARRAY = 531, /* U64IMAGE2DMSARRAY */ + TEXTURECUBEARRAY = 532, /* TEXTURECUBEARRAY */ + ITEXTURECUBEARRAY = 533, /* ITEXTURECUBEARRAY */ + UTEXTURECUBEARRAY = 534, /* UTEXTURECUBEARRAY */ + TEXTURE1D = 535, /* TEXTURE1D */ + ITEXTURE1D = 536, /* ITEXTURE1D */ + UTEXTURE1D = 537, /* UTEXTURE1D */ + TEXTURE1DARRAY = 538, /* TEXTURE1DARRAY */ + ITEXTURE1DARRAY = 539, /* ITEXTURE1DARRAY */ + UTEXTURE1DARRAY = 540, /* UTEXTURE1DARRAY */ + TEXTURE2DRECT = 541, /* TEXTURE2DRECT */ + ITEXTURE2DRECT = 542, /* ITEXTURE2DRECT */ + UTEXTURE2DRECT = 543, /* UTEXTURE2DRECT */ + TEXTUREBUFFER = 544, /* TEXTUREBUFFER */ + ITEXTUREBUFFER = 545, /* ITEXTUREBUFFER */ + UTEXTUREBUFFER = 546, /* UTEXTUREBUFFER */ + TEXTURE2DMS = 547, /* TEXTURE2DMS */ + ITEXTURE2DMS = 548, /* ITEXTURE2DMS */ + UTEXTURE2DMS = 549, /* UTEXTURE2DMS */ + TEXTURE2DMSARRAY = 550, /* TEXTURE2DMSARRAY */ + ITEXTURE2DMSARRAY = 551, /* ITEXTURE2DMSARRAY */ + UTEXTURE2DMSARRAY = 552, /* UTEXTURE2DMSARRAY */ + F16TEXTURE1D = 553, /* F16TEXTURE1D */ + F16TEXTURE2D = 554, /* F16TEXTURE2D */ + F16TEXTURE3D = 555, /* F16TEXTURE3D */ + F16TEXTURE2DRECT = 556, /* F16TEXTURE2DRECT */ + F16TEXTURECUBE = 557, /* F16TEXTURECUBE */ + F16TEXTURE1DARRAY = 558, /* F16TEXTURE1DARRAY */ + F16TEXTURE2DARRAY = 559, /* F16TEXTURE2DARRAY */ + F16TEXTURECUBEARRAY = 560, /* F16TEXTURECUBEARRAY */ + F16TEXTUREBUFFER = 561, /* F16TEXTUREBUFFER */ + F16TEXTURE2DMS = 562, /* F16TEXTURE2DMS */ + F16TEXTURE2DMSARRAY = 563, /* F16TEXTURE2DMSARRAY */ + SUBPASSINPUT = 564, /* SUBPASSINPUT */ + SUBPASSINPUTMS = 565, /* SUBPASSINPUTMS */ + ISUBPASSINPUT = 566, /* ISUBPASSINPUT */ + ISUBPASSINPUTMS = 567, /* ISUBPASSINPUTMS */ + USUBPASSINPUT = 568, /* USUBPASSINPUT */ + USUBPASSINPUTMS = 569, /* USUBPASSINPUTMS */ + F16SUBPASSINPUT = 570, /* F16SUBPASSINPUT */ + F16SUBPASSINPUTMS = 571, /* F16SUBPASSINPUTMS */ + SPIRV_INSTRUCTION = 572, /* SPIRV_INSTRUCTION */ + SPIRV_EXECUTION_MODE = 573, /* SPIRV_EXECUTION_MODE */ + SPIRV_EXECUTION_MODE_ID = 574, /* SPIRV_EXECUTION_MODE_ID */ + SPIRV_DECORATE = 575, /* SPIRV_DECORATE */ + SPIRV_DECORATE_ID = 576, /* SPIRV_DECORATE_ID */ + SPIRV_DECORATE_STRING = 577, /* SPIRV_DECORATE_STRING */ + SPIRV_TYPE = 578, /* SPIRV_TYPE */ + SPIRV_STORAGE_CLASS = 579, /* SPIRV_STORAGE_CLASS */ + SPIRV_BY_REFERENCE = 580, /* SPIRV_BY_REFERENCE */ + SPIRV_LITERAL = 581, /* SPIRV_LITERAL */ + LEFT_OP = 582, /* LEFT_OP */ + RIGHT_OP = 583, /* RIGHT_OP */ + INC_OP = 584, /* INC_OP */ + DEC_OP = 585, /* DEC_OP */ + LE_OP = 586, /* LE_OP */ + GE_OP = 587, /* GE_OP */ + EQ_OP = 588, /* EQ_OP */ + NE_OP = 589, /* NE_OP */ + AND_OP = 590, /* AND_OP */ + OR_OP = 591, /* OR_OP */ + XOR_OP = 592, /* XOR_OP */ + MUL_ASSIGN = 593, /* MUL_ASSIGN */ + DIV_ASSIGN = 594, /* DIV_ASSIGN */ + ADD_ASSIGN = 595, /* ADD_ASSIGN */ + MOD_ASSIGN = 596, /* MOD_ASSIGN */ + LEFT_ASSIGN = 597, /* LEFT_ASSIGN */ + RIGHT_ASSIGN = 598, /* RIGHT_ASSIGN */ + AND_ASSIGN = 599, /* AND_ASSIGN */ + XOR_ASSIGN = 600, /* XOR_ASSIGN */ + OR_ASSIGN = 601, /* OR_ASSIGN */ + SUB_ASSIGN = 602, /* SUB_ASSIGN */ + STRING_LITERAL = 603, /* STRING_LITERAL */ + LEFT_PAREN = 604, /* LEFT_PAREN */ + RIGHT_PAREN = 605, /* RIGHT_PAREN */ + LEFT_BRACKET = 606, /* LEFT_BRACKET */ + RIGHT_BRACKET = 607, /* RIGHT_BRACKET */ + LEFT_BRACE = 608, /* LEFT_BRACE */ + RIGHT_BRACE = 609, /* RIGHT_BRACE */ + DOT = 610, /* DOT */ + COMMA = 611, /* COMMA */ + COLON = 612, /* COLON */ + EQUAL = 613, /* EQUAL */ + SEMICOLON = 614, /* SEMICOLON */ + BANG = 615, /* BANG */ + DASH = 616, /* DASH */ + TILDE = 617, /* TILDE */ + PLUS = 618, /* PLUS */ + STAR = 619, /* STAR */ + SLASH = 620, /* SLASH */ + PERCENT = 621, /* PERCENT */ + LEFT_ANGLE = 622, /* LEFT_ANGLE */ + RIGHT_ANGLE = 623, /* RIGHT_ANGLE */ + VERTICAL_BAR = 624, /* VERTICAL_BAR */ + CARET = 625, /* CARET */ + AMPERSAND = 626, /* AMPERSAND */ + QUESTION = 627, /* QUESTION */ + INVARIANT = 628, /* INVARIANT */ + HIGH_PRECISION = 629, /* HIGH_PRECISION */ + MEDIUM_PRECISION = 630, /* MEDIUM_PRECISION */ + LOW_PRECISION = 631, /* LOW_PRECISION */ + PRECISION = 632, /* PRECISION */ + PACKED = 633, /* PACKED */ + RESOURCE = 634, /* RESOURCE */ + SUPERP = 635, /* SUPERP */ + FLOATCONSTANT = 636, /* FLOATCONSTANT */ + INTCONSTANT = 637, /* INTCONSTANT */ + UINTCONSTANT = 638, /* UINTCONSTANT */ + BOOLCONSTANT = 639, /* BOOLCONSTANT */ + IDENTIFIER = 640, /* IDENTIFIER */ + TYPE_NAME = 641, /* TYPE_NAME */ + CENTROID = 642, /* CENTROID */ + IN = 643, /* IN */ + OUT = 644, /* OUT */ + INOUT = 645, /* INOUT */ + STRUCT = 646, /* STRUCT */ + VOID = 647, /* VOID */ + WHILE = 648, /* WHILE */ + BREAK = 649, /* BREAK */ + CONTINUE = 650, /* CONTINUE */ + DO = 651, /* DO */ + ELSE = 652, /* ELSE */ + FOR = 653, /* FOR */ + IF = 654, /* IF */ + DISCARD = 655, /* DISCARD */ + RETURN = 656, /* RETURN */ + SWITCH = 657, /* SWITCH */ + CASE = 658, /* CASE */ + DEFAULT = 659, /* DEFAULT */ + TERMINATE_INVOCATION = 660, /* TERMINATE_INVOCATION */ + TERMINATE_RAY = 661, /* TERMINATE_RAY */ + IGNORE_INTERSECTION = 662, /* IGNORE_INTERSECTION */ + UNIFORM = 663, /* UNIFORM */ + SHARED = 664, /* SHARED */ + BUFFER = 665, /* BUFFER */ + FLAT = 666, /* FLAT */ + SMOOTH = 667, /* SMOOTH */ + LAYOUT = 668, /* LAYOUT */ + DOUBLECONSTANT = 669, /* DOUBLECONSTANT */ + INT16CONSTANT = 670, /* INT16CONSTANT */ + UINT16CONSTANT = 671, /* UINT16CONSTANT */ + FLOAT16CONSTANT = 672, /* FLOAT16CONSTANT */ + INT32CONSTANT = 673, /* INT32CONSTANT */ + UINT32CONSTANT = 674, /* UINT32CONSTANT */ + INT64CONSTANT = 675, /* INT64CONSTANT */ + UINT64CONSTANT = 676, /* UINT64CONSTANT */ + SUBROUTINE = 677, /* SUBROUTINE */ + DEMOTE = 678, /* DEMOTE */ + PAYLOADNV = 679, /* PAYLOADNV */ + PAYLOADINNV = 680, /* PAYLOADINNV */ + HITATTRNV = 681, /* HITATTRNV */ + CALLDATANV = 682, /* CALLDATANV */ + CALLDATAINNV = 683, /* CALLDATAINNV */ + PAYLOADEXT = 684, /* PAYLOADEXT */ + PAYLOADINEXT = 685, /* PAYLOADINEXT */ + HITATTREXT = 686, /* HITATTREXT */ + CALLDATAEXT = 687, /* CALLDATAEXT */ + CALLDATAINEXT = 688, /* CALLDATAINEXT */ + PATCH = 689, /* PATCH */ + SAMPLE = 690, /* SAMPLE */ + NONUNIFORM = 691, /* NONUNIFORM */ + COHERENT = 692, /* COHERENT */ + VOLATILE = 693, /* VOLATILE */ + RESTRICT = 694, /* RESTRICT */ + READONLY = 695, /* READONLY */ + WRITEONLY = 696, /* WRITEONLY */ + DEVICECOHERENT = 697, /* DEVICECOHERENT */ + QUEUEFAMILYCOHERENT = 698, /* QUEUEFAMILYCOHERENT */ + WORKGROUPCOHERENT = 699, /* WORKGROUPCOHERENT */ + SUBGROUPCOHERENT = 700, /* SUBGROUPCOHERENT */ + NONPRIVATE = 701, /* NONPRIVATE */ + SHADERCALLCOHERENT = 702, /* SHADERCALLCOHERENT */ + NOPERSPECTIVE = 703, /* NOPERSPECTIVE */ + EXPLICITINTERPAMD = 704, /* EXPLICITINTERPAMD */ + PERVERTEXEXT = 705, /* PERVERTEXEXT */ + PERVERTEXNV = 706, /* PERVERTEXNV */ + PERPRIMITIVENV = 707, /* PERPRIMITIVENV */ + PERVIEWNV = 708, /* PERVIEWNV */ + PERTASKNV = 709, /* PERTASKNV */ + PERPRIMITIVEEXT = 710, /* PERPRIMITIVEEXT */ + TASKPAYLOADWORKGROUPEXT = 711, /* TASKPAYLOADWORKGROUPEXT */ + PRECISE = 712 /* PRECISE */ + }; + typedef enum yytokentype yytoken_kind_t; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +union YYSTYPE +{ +#line 97 "MachineIndependent/glslang.y" + + struct { + glslang::TSourceLoc loc; + union { + glslang::TString *string; + int i; + unsigned int u; + long long i64; + unsigned long long u64; + bool b; + double d; + }; + glslang::TSymbol* symbol; + } lex; + struct { + glslang::TSourceLoc loc; + glslang::TOperator op; + union { + TIntermNode* intermNode; + glslang::TIntermNodePair nodePair; + glslang::TIntermTyped* intermTypedNode; + glslang::TAttributes* attributes; + glslang::TSpirvRequirement* spirvReq; + glslang::TSpirvInstruction* spirvInst; + glslang::TSpirvTypeParameters* spirvTypeParams; + }; + union { + glslang::TPublicType type; + glslang::TFunction* function; + glslang::TParameter param; + glslang::TTypeLoc typeLine; + glslang::TTypeList* typeList; + glslang::TArraySizes* arraySizes; + glslang::TIdentifierList* identifierList; + }; + glslang::TArraySizes* typeParameters; + } interm; + +#line 560 "MachineIndependent/glslang_tab.cpp.h" + +}; +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + +int yyparse (glslang::TParseContext* pParseContext); + +#endif /* !YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/iomapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/iomapper.h new file mode 100644 index 0000000..0003a74 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/iomapper.h @@ -0,0 +1,361 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _IOMAPPER_INCLUDED +#define _IOMAPPER_INCLUDED + +#include +#include "LiveTraverser.h" +#include +#include +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +class TInfoSink; + +namespace glslang { + +class TIntermediate; +struct TVarEntryInfo { + long long id; + TIntermSymbol* symbol; + bool live; + int newBinding; + int newSet; + int newLocation; + int newComponent; + int newIndex; + EShLanguage stage; + + void clearNewAssignments() { + newBinding = -1; + newSet = -1; + newLocation = -1; + newComponent = -1; + newIndex = -1; + } + + struct TOrderById { + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { return l.id < r.id; } + }; + + struct TOrderByPriority { + // ordering: + // 1) has both binding and set + // 2) has binding but no set + // 3) has no binding but set + // 4) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (lPoints == rPoints) + return l.id < r.id; + return lPoints > rPoints; + } + }; + + struct TOrderByPriorityAndLive { + // ordering: + // 1) do live variables first + // 2) has both binding and set + // 3) has binding but no set + // 4) has no binding but set + // 5) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (l.live != r.live) + return l.live > r.live; + + if (lPoints != rPoints) + return lPoints > rPoints; + + return l.id < r.id; + } + }; +}; + +// Base class for shared TIoMapResolver services, used by several derivations. +struct TDefaultIoResolverBase : public glslang::TIoMapResolver { +public: + TDefaultIoResolverBase(const TIntermediate& intermediate); + typedef std::vector TSlotSet; + typedef std::unordered_map TSlotSetMap; + + // grow the reflection stage by stage + void notifyBinding(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void notifyInOut(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void beginNotifications(EShLanguage) override {} + void endNotifications(EShLanguage) override {} + void beginResolve(EShLanguage) override {} + void endResolve(EShLanguage) override {} + void beginCollect(EShLanguage) override {} + void endCollect(EShLanguage) override {} + void reserverResourceSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + void reserverStorageSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + int getBaseBinding(EShLanguage stage, TResourceType res, unsigned int set) const; + const std::vector& getResourceSetBinding(EShLanguage stage) const; + virtual TResourceType getResourceType(const glslang::TType& type) = 0; + bool doAutoBindingMapping() const; + bool doAutoLocationMapping() const; + TSlotSet::iterator findSlot(int set, int slot); + bool checkEmpty(int set, int slot); + bool validateInOut(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + int reserveSlot(int set, int slot, int size = 1); + int getFreeSlot(int set, int base, int size = 1); + int resolveSet(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveInOutComponent(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutIndex(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void addStage(EShLanguage stage, TIntermediate& stageIntermediate) override { + if (stage < EShLangCount) { + stageMask[stage] = true; + stageIntermediates[stage] = &stageIntermediate; + } + } + uint32_t computeTypeLocationSize(const TType& type, EShLanguage stage); + + TSlotSetMap slots; + bool hasError = false; + +protected: + TDefaultIoResolverBase(TDefaultIoResolverBase&); + TDefaultIoResolverBase& operator=(TDefaultIoResolverBase&); + const TIntermediate& referenceIntermediate; + int nextUniformLocation; + int nextInputLocation; + int nextOutputLocation; + bool stageMask[EShLangCount + 1]; + const TIntermediate* stageIntermediates[EShLangCount]; + + // Return descriptor set specific base if there is one, and the generic base otherwise. + int selectBaseBinding(int base, int descriptorSetBase) const { + return descriptorSetBase != -1 ? descriptorSetBase : base; + } + + static int getLayoutSet(const glslang::TType& type) { + if (type.getQualifier().hasSet()) + return type.getQualifier().layoutSet; + else + return 0; + } + + static bool isSamplerType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isPureSampler(); + } + + static bool isTextureType(const glslang::TType& type) { + return (type.getBasicType() == glslang::EbtSampler && + (type.getSampler().isTexture() || type.getSampler().isSubpass())); + } + + static bool isUboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqUniform; + } + + static bool isImageType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage(); + } + + static bool isSsboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a SRV (shader resource view) type: + static bool isSrvType(const glslang::TType& type) { + return isTextureType(type) || type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a UAV (unordered access view) type: + static bool isUavType(const glslang::TType& type) { + if (type.getQualifier().isReadOnly()) + return false; + return (type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage()) || + (type.getQualifier().storage == EvqBuffer); + } +}; + +// Default I/O resolver for OpenGL +struct TDefaultGlslIoResolver : public TDefaultIoResolverBase { +public: + typedef std::map TVarSlotMap; // + typedef std::map TSlotMap; // + TDefaultGlslIoResolver(const TIntermediate& intermediate); + bool validateBinding(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + TResourceType getResourceType(const glslang::TType& type) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveBinding(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void beginResolve(EShLanguage /*stage*/) override; + void endResolve(EShLanguage stage) override; + void beginCollect(EShLanguage) override; + void endCollect(EShLanguage) override; + void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + // in/out symbol and uniform symbol are stored in the same resourceSlotMap, the storage key is used to identify each type of symbol. + // We use stage and storage qualifier to construct a storage key. it can help us identify the same storage resource used in different stage. + // if a resource is a program resource and we don't need know it usage stage, we can use same stage to build storage key. + // Note: both stage and type must less then 0xffff. + int buildStorageKey(EShLanguage stage, TStorageQualifier type) { + assert(static_cast(stage) <= 0x0000ffff && static_cast(type) <= 0x0000ffff); + return (stage << 16) | type; + } + +protected: + // Use for mark pre stage, to get more interface symbol information. + EShLanguage preStage; + // Use for mark current shader stage for resolver + EShLanguage currentStage; + // Slot map for storage resource(location of uniform and interface symbol) It's a program share slot + TSlotMap resourceSlotMap; + // Slot map for other resource(image, ubo, ssbo), It's a program share slot. + TSlotMap storageSlotMap; +}; + +typedef std::map TVarLiveMap; + +// override function "operator=", if a vector being sort, +// when use vc++, the sort function will call : +// pair& operator=(const pair<_Other1, _Other2>& _Right) +// { +// first = _Right.first; +// second = _Right.second; +// return (*this); +// } +// that will make a const type handing on left. +// override this function can avoid a compiler error. +// In the future, if the vc++ compiler can handle such a situation, +// this part of the code will be removed. +struct TVarLivePair : std::pair { + TVarLivePair(const std::pair& _Right) : pair(_Right.first, _Right.second) {} + TVarLivePair& operator=(const TVarLivePair& _Right) { + const_cast(first) = _Right.first; + second = _Right.second; + return (*this); + } + TVarLivePair(const TVarLivePair& src) : pair(src) { } +}; +typedef std::vector TVarLiveVector; + +// I/O mapper +class TIoMapper { +public: + TIoMapper() {} + virtual ~TIoMapper() {} + // grow the reflection stage by stage + bool virtual addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*); + bool virtual doMap(TIoMapResolver*, TInfoSink&) { return true; } +}; + +// I/O mapper for GLSL +class TGlslIoMapper : public TIoMapper { +public: + TGlslIoMapper() { + memset(inVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(outVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(uniformVarMap, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(intermediates, 0, sizeof(TIntermediate*) * (EShLangCount + 1)); + profile = ENoProfile; + version = 0; + autoPushConstantMaxSize = 128; + autoPushConstantBlockPacking = ElpStd430; + } + virtual ~TGlslIoMapper() { + for (size_t stage = 0; stage < EShLangCount; stage++) { + if (inVarMaps[stage] != nullptr) { + delete inVarMaps[stage]; + inVarMaps[stage] = nullptr; + } + if (outVarMaps[stage] != nullptr) { + delete outVarMaps[stage]; + outVarMaps[stage] = nullptr; + } + if (uniformVarMap[stage] != nullptr) { + delete uniformVarMap[stage]; + uniformVarMap[stage] = nullptr; + } + if (intermediates[stage] != nullptr) + intermediates[stage] = nullptr; + } + } + // If set, the uniform block with the given name will be changed to be backed by + // push_constant if it's size is <= maxSize + void setAutoPushConstantBlock(const char* name, unsigned int maxSize, TLayoutPacking packing) { + autoPushConstantBlockName = name; + autoPushConstantMaxSize = maxSize; + autoPushConstantBlockPacking = packing; + } + // grow the reflection stage by stage + bool addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*) override; + bool doMap(TIoMapResolver*, TInfoSink&) override; + TVarLiveMap *inVarMaps[EShLangCount], *outVarMaps[EShLangCount], + *uniformVarMap[EShLangCount]; + TIntermediate* intermediates[EShLangCount]; + bool hadError = false; + EProfile profile; + int version; + +private: + TString autoPushConstantBlockName; + unsigned int autoPushConstantMaxSize; + TLayoutPacking autoPushConstantBlockPacking; +}; + +} // end namespace glslang + +#endif // _IOMAPPER_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/localintermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/localintermediate.h new file mode 100644 index 0000000..1bb4e97 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/localintermediate.h @@ -0,0 +1,1222 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _LOCAL_INTERMEDIATE_INCLUDED_ +#define _LOCAL_INTERMEDIATE_INCLUDED_ + +#include "../Include/intermediate.h" +#include "../Public/ShaderLang.h" +#include "Versions.h" + +#include +#include +#include +#include +#include + +class TInfoSink; + +namespace glslang { + +struct TMatrixSelector { + int coord1; // stay agnostic about column/row; this is parse order + int coord2; +}; + +typedef int TVectorSelector; + +const int MaxSwizzleSelectors = 4; + +template +class TSwizzleSelectors { +public: + TSwizzleSelectors() : size_(0) { } + + void push_back(selectorType comp) + { + if (size_ < MaxSwizzleSelectors) + components[size_++] = comp; + } + void resize(int s) + { + assert(s <= size_); + size_ = s; + } + int size() const { return size_; } + selectorType operator[](int i) const + { + assert(i < MaxSwizzleSelectors); + return components[i]; + } + +private: + int size_; + selectorType components[MaxSwizzleSelectors]; +}; + +// +// Some helper structures for TIntermediate. Their contents are encapsulated +// by TIntermediate. +// + +// Used for call-graph algorithms for detecting recursion, missing bodies, and dead bodies. +// A "call" is a pair: . +// There can be duplicates. General assumption is the list is small. +struct TCall { + TCall(const TString& pCaller, const TString& pCallee) : caller(pCaller), callee(pCallee) { } + TString caller; + TString callee; + bool visited; + bool currentPath; + bool errorGiven; + int calleeBodyPosition; +}; + +// A generic 1-D range. +struct TRange { + TRange(int start, int last) : start(start), last(last) { } + bool overlap(const TRange& rhs) const + { + return last >= rhs.start && start <= rhs.last; + } + int start; + int last; +}; + +// An IO range is a 3-D rectangle; the set of (location, component, index) triples all lying +// within the same location range, component range, and index value. Locations don't alias unless +// all other dimensions of their range overlap. +struct TIoRange { + TIoRange(TRange location, TRange component, TBasicType basicType, int index) + : location(location), component(component), basicType(basicType), index(index) { } + bool overlap(const TIoRange& rhs) const + { + return location.overlap(rhs.location) && component.overlap(rhs.component) && index == rhs.index; + } + TRange location; + TRange component; + TBasicType basicType; + int index; +}; + +// An offset range is a 2-D rectangle; the set of (binding, offset) pairs all lying +// within the same binding and offset range. +struct TOffsetRange { + TOffsetRange(TRange binding, TRange offset) + : binding(binding), offset(offset) { } + bool overlap(const TOffsetRange& rhs) const + { + return binding.overlap(rhs.binding) && offset.overlap(rhs.offset); + } + TRange binding; + TRange offset; +}; + +#ifndef GLSLANG_WEB +// Things that need to be tracked per xfb buffer. +struct TXfbBuffer { + TXfbBuffer() : stride(TQualifier::layoutXfbStrideEnd), implicitStride(0), contains64BitType(false), + contains32BitType(false), contains16BitType(false) { } + std::vector ranges; // byte offsets that have already been assigned + unsigned int stride; + unsigned int implicitStride; + bool contains64BitType; + bool contains32BitType; + bool contains16BitType; +}; +#endif + +// Track a set of strings describing how the module was processed. +// This includes command line options, transforms, etc., ideally inclusive enough +// to reproduce the steps used to transform the input source to the output. +// E.g., see SPIR-V OpModuleProcessed. +// Each "process" or "transform" uses is expressed in the form: +// process arg0 arg1 arg2 ... +// process arg0 arg1 arg2 ... +// where everything is textual, and there can be zero or more arguments +class TProcesses { +public: + TProcesses() {} + ~TProcesses() {} + + void addProcess(const char* process) + { + processes.push_back(process); + } + void addProcess(const std::string& process) + { + processes.push_back(process); + } + void addArgument(int arg) + { + processes.back().append(" "); + std::string argString = std::to_string(arg); + processes.back().append(argString); + } + void addArgument(const char* arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addArgument(const std::string& arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addIfNonZero(const char* process, int value) + { + if (value != 0) { + addProcess(process); + addArgument(value); + } + } + + const std::vector& getProcesses() const { return processes; } + +private: + std::vector processes; +}; + +class TSymbolTable; +class TSymbol; +class TVariable; + +// +// Texture and Sampler transformation mode. +// +enum ComputeDerivativeMode { + LayoutDerivativeNone, // default layout as SPV_NV_compute_shader_derivatives not enabled + LayoutDerivativeGroupQuads, // derivative_group_quadsNV + LayoutDerivativeGroupLinear, // derivative_group_linearNV +}; + +class TIdMaps { +public: + TMap& operator[](long long i) { return maps[i]; } + const TMap& operator[](long long i) const { return maps[i]; } +private: + TMap maps[EsiCount]; +}; + +class TNumericFeatures { +public: + TNumericFeatures() : features(0) { } + TNumericFeatures(const TNumericFeatures&) = delete; + TNumericFeatures& operator=(const TNumericFeatures&) = delete; + typedef enum : unsigned int { + shader_explicit_arithmetic_types = 1 << 0, + shader_explicit_arithmetic_types_int8 = 1 << 1, + shader_explicit_arithmetic_types_int16 = 1 << 2, + shader_explicit_arithmetic_types_int32 = 1 << 3, + shader_explicit_arithmetic_types_int64 = 1 << 4, + shader_explicit_arithmetic_types_float16 = 1 << 5, + shader_explicit_arithmetic_types_float32 = 1 << 6, + shader_explicit_arithmetic_types_float64 = 1 << 7, + shader_implicit_conversions = 1 << 8, + gpu_shader_fp64 = 1 << 9, + gpu_shader_int16 = 1 << 10, + gpu_shader_half_float = 1 << 11, + } feature; + void insert(feature f) { features |= f; } + void erase(feature f) { features &= ~f; } + bool contains(feature f) const { return (features & f) != 0; } +private: + unsigned int features; +}; + +// MustBeAssigned wraps a T, asserting that it has been assigned with +// operator =() before attempting to read with operator T() or operator ->(). +// Used to catch cases where fields are read before they have been assigned. +template +class MustBeAssigned +{ +public: + MustBeAssigned() = default; + MustBeAssigned(const T& v) : value(v) {} + operator const T&() const { assert(isSet); return value; } + const T* operator ->() const { assert(isSet); return &value; } + MustBeAssigned& operator = (const T& v) { value = v; isSet = true; return *this; } +private: + T value; + bool isSet = false; +}; + +// +// Set of helper functions to help parse and build the tree. +// +class TIntermediate { +public: + explicit TIntermediate(EShLanguage l, int v = 0, EProfile p = ENoProfile) : + language(l), + profile(p), version(v), + treeRoot(0), + resources(TBuiltInResource{}), + numEntryPoints(0), numErrors(0), numPushConstants(0), recursive(false), + invertY(false), + dxPositionW(false), + enhancedMsgs(false), + debugInfo(false), + useStorageBuffer(false), + invariantAll(false), + nanMinMaxClamp(false), + depthReplacing(false), + stencilReplacing(false), + uniqueId(0), + globalUniformBlockName(""), + atomicCounterBlockName(""), + globalUniformBlockSet(TQualifier::layoutSetEnd), + globalUniformBlockBinding(TQualifier::layoutBindingEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) +#ifndef GLSLANG_WEB + , + implicitThisName("@this"), implicitCounterName("@count"), + source(EShSourceNone), + useVulkanMemoryModel(false), + invocations(TQualifier::layoutNotSet), vertices(TQualifier::layoutNotSet), + inputPrimitive(ElgNone), outputPrimitive(ElgNone), + pixelCenterInteger(false), originUpperLeft(false),texCoordBuiltinRedeclared(false), + vertexSpacing(EvsNone), vertexOrder(EvoNone), interlockOrdering(EioNone), pointMode(false), earlyFragmentTests(false), + postDepthCoverage(false), earlyAndLateFragmentTestsAMD(false), depthLayout(EldNone), stencilLayout(ElsNone), + hlslFunctionality1(false), + blendEquations(0), xfbMode(false), multiStream(false), + layoutOverrideCoverage(false), + geoPassthroughEXT(false), + numShaderRecordBlocks(0), + computeDerivativeMode(LayoutDerivativeNone), + primitives(TQualifier::layoutNotSet), + numTaskNVBlocks(0), + layoutPrimitiveCulling(false), + numTaskEXTPayloads(0), + autoMapBindings(false), + autoMapLocations(false), + flattenUniformArrays(false), + useUnknownFormat(false), + hlslOffsets(false), + hlslIoMapping(false), + useVariablePointers(false), + textureSamplerTransformMode(EShTexSampTransKeep), + needToLegalize(false), + binaryDoubleOutput(false), + subgroupUniformControlFlow(false), + usePhysicalStorageBuffer(false), + spirvRequirement(nullptr), + spirvExecutionMode(nullptr), + uniformLocationBase(0) +#endif + { + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + xfbBuffers.resize(TQualifier::layoutXfbBufferEnd); + shiftBinding.fill(0); +#endif + } + + void setVersion(int v) + { + version = v; + } + void setProfile(EProfile p) + { + profile = p; + } + + int getVersion() const { return version; } + EProfile getProfile() const { return profile; } + void setSpv(const SpvVersion& s) + { + spvVersion = s; + + // client processes + if (spvVersion.vulkan > 0) + processes.addProcess("client vulkan100"); + if (spvVersion.openGl > 0) + processes.addProcess("client opengl100"); + + // target SPV + switch (spvVersion.spv) { + case 0: + break; + case EShTargetSpv_1_0: + break; + case EShTargetSpv_1_1: + processes.addProcess("target-env spirv1.1"); + break; + case EShTargetSpv_1_2: + processes.addProcess("target-env spirv1.2"); + break; + case EShTargetSpv_1_3: + processes.addProcess("target-env spirv1.3"); + break; + case EShTargetSpv_1_4: + processes.addProcess("target-env spirv1.4"); + break; + case EShTargetSpv_1_5: + processes.addProcess("target-env spirv1.5"); + break; + case EShTargetSpv_1_6: + processes.addProcess("target-env spirv1.6"); + break; + default: + processes.addProcess("target-env spirvUnknown"); + break; + } + + // target-environment processes + switch (spvVersion.vulkan) { + case 0: + break; + case EShTargetVulkan_1_0: + processes.addProcess("target-env vulkan1.0"); + break; + case EShTargetVulkan_1_1: + processes.addProcess("target-env vulkan1.1"); + break; + case EShTargetVulkan_1_2: + processes.addProcess("target-env vulkan1.2"); + break; + case EShTargetVulkan_1_3: + processes.addProcess("target-env vulkan1.3"); + break; + default: + processes.addProcess("target-env vulkanUnknown"); + break; + } + if (spvVersion.openGl > 0) + processes.addProcess("target-env opengl"); + } + const SpvVersion& getSpv() const { return spvVersion; } + EShLanguage getStage() const { return language; } + void addRequestedExtension(const char* extension) { requestedExtensions.insert(extension); } + const std::set& getRequestedExtensions() const { return requestedExtensions; } + bool isRayTracingStage() const { + return language >= EShLangRayGen && language <= EShLangCallableNV; + } + + void setTreeRoot(TIntermNode* r) { treeRoot = r; } + TIntermNode* getTreeRoot() const { return treeRoot; } + void incrementEntryPointCount() { ++numEntryPoints; } + int getNumEntryPoints() const { return numEntryPoints; } + int getNumErrors() const { return numErrors; } + void addPushConstantCount() { ++numPushConstants; } + void setLimits(const TBuiltInResource& r) { resources = r; } + const TBuiltInResource& getLimits() const { return resources; } + + bool postProcess(TIntermNode*, EShLanguage); + void removeTree(); + + void setEntryPointName(const char* ep) + { + entryPointName = ep; + processes.addProcess("entry-point"); + processes.addArgument(entryPointName); + } + void setEntryPointMangledName(const char* ep) { entryPointMangledName = ep; } + const std::string& getEntryPointName() const { return entryPointName; } + const std::string& getEntryPointMangledName() const { return entryPointMangledName; } + + void setDebugInfo(bool debuginfo) + { + debugInfo = debuginfo; + } + bool getDebugInfo() const { return debugInfo; } + + void setInvertY(bool invert) + { + invertY = invert; + if (invertY) + processes.addProcess("invert-y"); + } + bool getInvertY() const { return invertY; } + + void setDxPositionW(bool dxPosW) + { + dxPositionW = dxPosW; + if (dxPositionW) + processes.addProcess("dx-position-w"); + } + bool getDxPositionW() const { return dxPositionW; } + + void setEnhancedMsgs() + { + enhancedMsgs = true; + } + bool getEnhancedMsgs() const { return enhancedMsgs && getSource() == EShSourceGlsl; } + +#ifdef ENABLE_HLSL + void setSource(EShSource s) { source = s; } + EShSource getSource() const { return source; } +#else + void setSource(EShSource s) { assert(s == EShSourceGlsl); (void)s; } + EShSource getSource() const { return EShSourceGlsl; } +#endif + + bool isRecursive() const { return recursive; } + + TIntermSymbol* addSymbol(const TVariable&); + TIntermSymbol* addSymbol(const TVariable&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TType&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TIntermSymbol&); + TIntermTyped* addConversion(TOperator, const TType&, TIntermTyped*); + std::tuple addPairConversion(TOperator op, TIntermTyped* node0, TIntermTyped* node1); + TIntermTyped* addUniShapeConversion(TOperator, const TType&, TIntermTyped*); + TIntermTyped* addConversion(TBasicType convertTo, TIntermTyped* node) const; + void addBiShapeConversion(TOperator, TIntermTyped*& lhsNode, TIntermTyped*& rhsNode); + TIntermTyped* addShapeConversion(const TType&, TIntermTyped*); + TIntermTyped* addBinaryMath(TOperator, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addAssign(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addIndex(TOperator op, TIntermTyped* base, TIntermTyped* index, const TSourceLoc&); + TIntermTyped* addUnaryMath(TOperator, TIntermTyped* child, const TSourceLoc&); + TIntermTyped* addBuiltInFunctionCall(const TSourceLoc& line, TOperator, bool unary, TIntermNode*, const TType& returnType); + bool canImplicitlyPromote(TBasicType from, TBasicType to, TOperator op = EOpNull) const; + bool isIntegralPromotion(TBasicType from, TBasicType to) const; + bool isFPPromotion(TBasicType from, TBasicType to) const; + bool isIntegralConversion(TBasicType from, TBasicType to) const; + bool isFPConversion(TBasicType from, TBasicType to) const; + bool isFPIntegralConversion(TBasicType from, TBasicType to) const; + TOperator mapTypeToConstructorOp(const TType&) const; + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right); + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right, const TSourceLoc&); + TIntermAggregate* makeAggregate(TIntermNode* node); + TIntermAggregate* makeAggregate(TIntermNode* node, const TSourceLoc&); + TIntermAggregate* makeAggregate(const TSourceLoc&); + TIntermTyped* setAggregateOperator(TIntermNode*, TOperator, const TType& type, const TSourceLoc&); + bool areAllChildConst(TIntermAggregate* aggrNode); + TIntermSelection* addSelection(TIntermTyped* cond, TIntermNodePair code, const TSourceLoc&); + TIntermTyped* addSelection(TIntermTyped* cond, TIntermTyped* trueBlock, TIntermTyped* falseBlock, const TSourceLoc&); + TIntermTyped* addComma(TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addMethod(TIntermTyped*, const TType&, const TString*, const TSourceLoc&); + TIntermConstantUnion* addConstantUnion(const TConstUnionArray&, const TType&, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(bool, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(double, TBasicType, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(const TString*, const TSourceLoc&, bool literal = false) const; + TIntermTyped* promoteConstantUnion(TBasicType, TIntermConstantUnion*) const; + bool parseConstTree(TIntermNode*, TConstUnionArray, TOperator, const TType&, bool singleConstantParam = false); + TIntermLoop* addLoop(TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, const TSourceLoc&); + TIntermAggregate* addForLoop(TIntermNode*, TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, + const TSourceLoc&, TIntermLoop*&); + TIntermBranch* addBranch(TOperator, const TSourceLoc&); + TIntermBranch* addBranch(TOperator, TIntermTyped*, const TSourceLoc&); + template TIntermTyped* addSwizzle(TSwizzleSelectors&, const TSourceLoc&); + + // Low level functions to add nodes (no conversions or other higher level transformations) + // If a type is provided, the node's type will be set to it. + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&) const; + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&, + const TType&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&, const TType&) const; + + // Constant folding (in Constant.cpp) + TIntermTyped* fold(TIntermAggregate* aggrNode); + TIntermTyped* foldConstructor(TIntermAggregate* aggrNode); + TIntermTyped* foldDereference(TIntermTyped* node, int index, const TSourceLoc&); + TIntermTyped* foldSwizzle(TIntermTyped* node, TSwizzleSelectors& fields, const TSourceLoc&); + + // Tree ops + static const TIntermTyped* findLValueBase(const TIntermTyped*, bool swizzleOkay , bool BufferReferenceOk = false); + + // Linkage related + void addSymbolLinkageNodes(TIntermAggregate*& linkage, EShLanguage, TSymbolTable&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, const TSymbol&); + TIntermAggregate* findLinkerObjects() const; + + void setGlobalUniformBlockName(const char* name) { globalUniformBlockName = std::string(name); } + const char* getGlobalUniformBlockName() const { return globalUniformBlockName.c_str(); } + void setGlobalUniformSet(unsigned int set) { globalUniformBlockSet = set; } + unsigned int getGlobalUniformSet() const { return globalUniformBlockSet; } + void setGlobalUniformBinding(unsigned int binding) { globalUniformBlockBinding = binding; } + unsigned int getGlobalUniformBinding() const { return globalUniformBlockBinding; } + + void setAtomicCounterBlockName(const char* name) { atomicCounterBlockName = std::string(name); } + const char* getAtomicCounterBlockName() const { return atomicCounterBlockName.c_str(); } + void setAtomicCounterBlockSet(unsigned int set) { atomicCounterBlockSet = set; } + unsigned int getAtomicCounterBlockSet() const { return atomicCounterBlockSet; } + + + void setUseStorageBuffer() { useStorageBuffer = true; } + bool usingStorageBuffer() const { return useStorageBuffer; } + void setInvariantAll() { invariantAll = true; } + bool isInvariantAll() const { return invariantAll; } + void setDepthReplacing() { depthReplacing = true; } + bool isDepthReplacing() const { return depthReplacing; } + void setStencilReplacing() { stencilReplacing = true; } + bool isStencilReplacing() const { return stencilReplacing; } + bool setLocalSize(int dim, int size) + { + if (localSizeNotDefault[dim]) + return size == localSize[dim]; + localSizeNotDefault[dim] = true; + localSize[dim] = size; + return true; + } + unsigned int getLocalSize(int dim) const { return localSize[dim]; } + bool isLocalSizeSet() const + { + // Return true if any component has been set (i.e. any component is not default). + return localSizeNotDefault[0] || localSizeNotDefault[1] || localSizeNotDefault[2]; + } + bool setLocalSizeSpecId(int dim, int id) + { + if (localSizeSpecId[dim] != TQualifier::layoutNotSet) + return id == localSizeSpecId[dim]; + localSizeSpecId[dim] = id; + return true; + } + int getLocalSizeSpecId(int dim) const { return localSizeSpecId[dim]; } + bool isLocalSizeSpecialized() const + { + // Return true if any component has been specialized. + return localSizeSpecId[0] != TQualifier::layoutNotSet || + localSizeSpecId[1] != TQualifier::layoutNotSet || + localSizeSpecId[2] != TQualifier::layoutNotSet; + } +#ifdef GLSLANG_WEB + void output(TInfoSink&, bool tree) { } + + bool isEsProfile() const { return false; } + bool getXfbMode() const { return false; } + bool isMultiStream() const { return false; } + TLayoutGeometry getOutputPrimitive() const { return ElgNone; } + bool getPostDepthCoverage() const { return false; } + bool getEarlyFragmentTests() const { return false; } + TLayoutDepth getDepth() const { return EldNone; } + bool getPixelCenterInteger() const { return false; } + void setOriginUpperLeft() { } + bool getOriginUpperLeft() const { return true; } + TInterlockOrdering getInterlockOrdering() const { return EioNone; } + + bool getAutoMapBindings() const { return false; } + bool getAutoMapLocations() const { return false; } + int getNumPushConstants() const { return 0; } + void addShaderRecordCount() { } + void addTaskNVCount() { } + void addTaskPayloadEXTCount() { } + void setUseVulkanMemoryModel() { } + bool usingVulkanMemoryModel() const { return false; } + bool usingPhysicalStorageBuffer() const { return false; } + bool usingVariablePointers() const { return false; } + unsigned getXfbStride(int buffer) const { return 0; } + bool hasLayoutDerivativeModeNone() const { return false; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return LayoutDerivativeNone; } +#else + void output(TInfoSink&, bool tree); + + bool isEsProfile() const { return profile == EEsProfile; } + + void setShiftBinding(TResourceType res, unsigned int shift) + { + shiftBinding[res] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) + processes.addIfNonZero(name, shift); + } + + unsigned int getShiftBinding(TResourceType res) const { return shiftBinding[res]; } + + void setShiftBindingForSet(TResourceType res, unsigned int shift, unsigned int set) + { + if (shift == 0) // ignore if there's no shift: it's a no-op. + return; + + shiftBindingForSet[res][set] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) { + processes.addProcess(name); + processes.addArgument(shift); + processes.addArgument(set); + } + } + + int getShiftBindingForSet(TResourceType res, unsigned int set) const + { + const auto shift = shiftBindingForSet[res].find(set); + return shift == shiftBindingForSet[res].end() ? -1 : shift->second; + } + bool hasShiftBindingForSet(TResourceType res) const { return !shiftBindingForSet[res].empty(); } + + void setResourceSetBinding(const std::vector& shift) + { + resourceSetBinding = shift; + if (shift.size() > 0) { + processes.addProcess("resource-set-binding"); + for (int s = 0; s < (int)shift.size(); ++s) + processes.addArgument(shift[s]); + } + } + const std::vector& getResourceSetBinding() const { return resourceSetBinding; } + void setAutoMapBindings(bool map) + { + autoMapBindings = map; + if (autoMapBindings) + processes.addProcess("auto-map-bindings"); + } + bool getAutoMapBindings() const { return autoMapBindings; } + void setAutoMapLocations(bool map) + { + autoMapLocations = map; + if (autoMapLocations) + processes.addProcess("auto-map-locations"); + } + bool getAutoMapLocations() const { return autoMapLocations; } + +#ifdef ENABLE_HLSL + void setFlattenUniformArrays(bool flatten) + { + flattenUniformArrays = flatten; + if (flattenUniformArrays) + processes.addProcess("flatten-uniform-arrays"); + } + bool getFlattenUniformArrays() const { return flattenUniformArrays; } +#endif + void setNoStorageFormat(bool b) + { + useUnknownFormat = b; + if (useUnknownFormat) + processes.addProcess("no-storage-format"); + } + bool getNoStorageFormat() const { return useUnknownFormat; } + void setUseVulkanMemoryModel() + { + useVulkanMemoryModel = true; + processes.addProcess("use-vulkan-memory-model"); + } + bool usingVulkanMemoryModel() const { return useVulkanMemoryModel; } + void setUsePhysicalStorageBuffer() + { + usePhysicalStorageBuffer = true; + } + bool usingPhysicalStorageBuffer() const { return usePhysicalStorageBuffer; } + void setUseVariablePointers() + { + useVariablePointers = true; + processes.addProcess("use-variable-pointers"); + } + bool usingVariablePointers() const { return useVariablePointers; } + +#ifdef ENABLE_HLSL + template T addCounterBufferName(const T& name) const { return name + implicitCounterName; } + bool hasCounterBufferName(const TString& name) const { + size_t len = strlen(implicitCounterName); + return name.size() > len && + name.compare(name.size() - len, len, implicitCounterName) == 0; + } +#endif + + void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode) { textureSamplerTransformMode = mode; } + int getNumPushConstants() const { return numPushConstants; } + void addShaderRecordCount() { ++numShaderRecordBlocks; } + void addTaskNVCount() { ++numTaskNVBlocks; } + void addTaskPayloadEXTCount() { ++numTaskEXTPayloads; } + + bool setInvocations(int i) + { + if (invocations != TQualifier::layoutNotSet) + return invocations == i; + invocations = i; + return true; + } + int getInvocations() const { return invocations; } + bool setVertices(int m) + { + if (vertices != TQualifier::layoutNotSet) + return vertices == m; + vertices = m; + return true; + } + int getVertices() const { return vertices; } + bool setInputPrimitive(TLayoutGeometry p) + { + if (inputPrimitive != ElgNone) + return inputPrimitive == p; + inputPrimitive = p; + return true; + } + TLayoutGeometry getInputPrimitive() const { return inputPrimitive; } + bool setVertexSpacing(TVertexSpacing s) + { + if (vertexSpacing != EvsNone) + return vertexSpacing == s; + vertexSpacing = s; + return true; + } + TVertexSpacing getVertexSpacing() const { return vertexSpacing; } + bool setVertexOrder(TVertexOrder o) + { + if (vertexOrder != EvoNone) + return vertexOrder == o; + vertexOrder = o; + return true; + } + TVertexOrder getVertexOrder() const { return vertexOrder; } + void setPointMode() { pointMode = true; } + bool getPointMode() const { return pointMode; } + + bool setInterlockOrdering(TInterlockOrdering o) + { + if (interlockOrdering != EioNone) + return interlockOrdering == o; + interlockOrdering = o; + return true; + } + TInterlockOrdering getInterlockOrdering() const { return interlockOrdering; } + + void setXfbMode() { xfbMode = true; } + bool getXfbMode() const { return xfbMode; } + void setMultiStream() { multiStream = true; } + bool isMultiStream() const { return multiStream; } + bool setOutputPrimitive(TLayoutGeometry p) + { + if (outputPrimitive != ElgNone) + return outputPrimitive == p; + outputPrimitive = p; + return true; + } + TLayoutGeometry getOutputPrimitive() const { return outputPrimitive; } + void setPostDepthCoverage() { postDepthCoverage = true; } + bool getPostDepthCoverage() const { return postDepthCoverage; } + void setEarlyFragmentTests() { earlyFragmentTests = true; } + void setEarlyAndLateFragmentTestsAMD() { earlyAndLateFragmentTestsAMD = true; } + bool getEarlyFragmentTests() const { return earlyFragmentTests; } + bool getEarlyAndLateFragmentTestsAMD() const { return earlyAndLateFragmentTestsAMD; } + bool setDepth(TLayoutDepth d) + { + if (depthLayout != EldNone) + return depthLayout == d; + depthLayout = d; + return true; + } + bool setStencil(TLayoutStencil s) + { + if (stencilLayout != ElsNone) + return stencilLayout == s; + stencilLayout = s; + return true; + } + TLayoutDepth getDepth() const { return depthLayout; } + TLayoutStencil getStencil() const { return stencilLayout; } + void setOriginUpperLeft() { originUpperLeft = true; } + bool getOriginUpperLeft() const { return originUpperLeft; } + void setPixelCenterInteger() { pixelCenterInteger = true; } + bool getPixelCenterInteger() const { return pixelCenterInteger; } + void setTexCoordRedeclared() { texCoordBuiltinRedeclared = true; } + bool getTexCoordRedeclared() const { return texCoordBuiltinRedeclared; } + void addBlendEquation(TBlendEquationShift b) { blendEquations |= (1 << b); } + unsigned int getBlendEquations() const { return blendEquations; } + bool setXfbBufferStride(int buffer, unsigned stride) + { + if (xfbBuffers[buffer].stride != TQualifier::layoutXfbStrideEnd) + return xfbBuffers[buffer].stride == stride; + xfbBuffers[buffer].stride = stride; + return true; + } + unsigned getXfbStride(int buffer) const { return xfbBuffers[buffer].stride; } + int addXfbBufferOffset(const TType&); + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType, bool& contains32BitType, bool& contains16BitType) const; + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType) const; + void setLayoutOverrideCoverage() { layoutOverrideCoverage = true; } + bool getLayoutOverrideCoverage() const { return layoutOverrideCoverage; } + void setGeoPassthroughEXT() { geoPassthroughEXT = true; } + bool getGeoPassthroughEXT() const { return geoPassthroughEXT; } + void setLayoutDerivativeMode(ComputeDerivativeMode mode) { computeDerivativeMode = mode; } + bool hasLayoutDerivativeModeNone() const { return computeDerivativeMode != LayoutDerivativeNone; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return computeDerivativeMode; } + void setLayoutPrimitiveCulling() { layoutPrimitiveCulling = true; } + bool getLayoutPrimitiveCulling() const { return layoutPrimitiveCulling; } + bool setPrimitives(int m) + { + if (primitives != TQualifier::layoutNotSet) + return primitives == m; + primitives = m; + return true; + } + int getPrimitives() const { return primitives; } + const char* addSemanticName(const TString& name) + { + return semanticNameSet.insert(name).first->c_str(); + } + void addUniformLocationOverride(const char* nameStr, int location) + { + std::string name = nameStr; + uniformLocationOverrides[name] = location; + } + + int getUniformLocationOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = uniformLocationOverrides.find(name); + if (pos == uniformLocationOverrides.end()) + return -1; + else + return pos->second; + } + + void setUniformLocationBase(int base) { uniformLocationBase = base; } + int getUniformLocationBase() const { return uniformLocationBase; } + + void setNeedsLegalization() { needToLegalize = true; } + bool needsLegalization() const { return needToLegalize; } + + void setBinaryDoubleOutput() { binaryDoubleOutput = true; } + bool getBinaryDoubleOutput() { return binaryDoubleOutput; } + + void setSubgroupUniformControlFlow() { subgroupUniformControlFlow = true; } + bool getSubgroupUniformControlFlow() const { return subgroupUniformControlFlow; } + + // GL_EXT_spirv_intrinsics + void insertSpirvRequirement(const TSpirvRequirement* spirvReq); + bool hasSpirvRequirement() const { return spirvRequirement != nullptr; } + const TSpirvRequirement& getSpirvRequirement() const { return *spirvRequirement; } + void insertSpirvExecutionMode(int executionMode, const TIntermAggregate* args = nullptr); + void insertSpirvExecutionModeId(int executionMode, const TIntermAggregate* args); + bool hasSpirvExecutionMode() const { return spirvExecutionMode != nullptr; } + const TSpirvExecutionMode& getSpirvExecutionMode() const { return *spirvExecutionMode; } +#endif // GLSLANG_WEB + + void addBlockStorageOverride(const char* nameStr, TBlockStorageClass backing) + { + std::string name(nameStr); + blockBackingOverrides[name] = backing; + } + TBlockStorageClass getBlockStorageOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = blockBackingOverrides.find(name); + if (pos == blockBackingOverrides.end()) + return EbsNone; + else + return pos->second; + } +#ifdef ENABLE_HLSL + void setHlslFunctionality1() { hlslFunctionality1 = true; } + bool getHlslFunctionality1() const { return hlslFunctionality1; } + void setHlslOffsets() + { + hlslOffsets = true; + if (hlslOffsets) + processes.addProcess("hlsl-offsets"); + } + bool usingHlslOffsets() const { return hlslOffsets; } + void setHlslIoMapping(bool b) + { + hlslIoMapping = b; + if (hlslIoMapping) + processes.addProcess("hlsl-iomap"); + } + bool usingHlslIoMapping() { return hlslIoMapping; } +#else + bool getHlslFunctionality1() const { return false; } + bool usingHlslOffsets() const { return false; } + bool usingHlslIoMapping() { return false; } +#endif + + bool usingScalarBlockLayout() const { + for (auto extIt = requestedExtensions.begin(); extIt != requestedExtensions.end(); ++extIt) { + if (*extIt == E_GL_EXT_scalar_block_layout) + return true; + } + return false; + } + + bool IsRequestedExtension(const char* extension) const + { + return (requestedExtensions.find(extension) != requestedExtensions.end()); + } + + void addToCallGraph(TInfoSink&, const TString& caller, const TString& callee); + void merge(TInfoSink&, TIntermediate&); + void finalCheck(TInfoSink&, bool keepUncalled); + + void mergeGlobalUniformBlocks(TInfoSink& infoSink, TIntermediate& unit, bool mergeExistingOnly); + void mergeUniformObjects(TInfoSink& infoSink, TIntermediate& unit); + void checkStageIO(TInfoSink&, TIntermediate&); + + bool buildConvertOp(TBasicType dst, TBasicType src, TOperator& convertOp) const; + TIntermTyped* createConversion(TBasicType convertTo, TIntermTyped* node) const; + + void addIoAccessed(const TString& name) { ioAccessed.insert(name); } + bool inIoAccessed(const TString& name) const { return ioAccessed.find(name) != ioAccessed.end(); } + + int addUsedLocation(const TQualifier&, const TType&, bool& typeCollision); + int checkLocationRange(int set, const TIoRange& range, const TType&, bool& typeCollision); + int checkLocationRT(int set, int location); + int addUsedOffsets(int binding, int offset, int numOffsets); + bool addUsedConstantId(int id); + static int computeTypeLocationSize(const TType&, EShLanguage); + static int computeTypeUniformLocationSize(const TType&); + + static int getBaseAlignmentScalar(const TType&, int& size); + static int getBaseAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static int getScalarAlignment(const TType&, int& size, int& stride, bool rowMajor); + static int getMemberAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static bool improperStraddle(const TType& type, int size, int offset); + static void updateOffset(const TType& parentType, const TType& memberType, int& offset, int& memberSize); + static int getOffset(const TType& type, int index); + static int getBlockSize(const TType& blockType); + static int computeBufferReferenceTypeSize(const TType&); + static bool isIoResizeArray(const TType& type, EShLanguage language); + + bool promote(TIntermOperator*); + void setNanMinMaxClamp(bool setting) { nanMinMaxClamp = setting; } + bool getNanMinMaxClamp() const { return nanMinMaxClamp; } + + void setSourceFile(const char* file) { if (file != nullptr) sourceFile = file; } + const std::string& getSourceFile() const { return sourceFile; } + void addSourceText(const char* text, size_t len) { sourceText.append(text, len); } + const std::string& getSourceText() const { return sourceText; } + const std::map& getIncludeText() const { return includeText; } + void addIncludeText(const char* name, const char* text, size_t len) { includeText[name].assign(text,len); } + void addProcesses(const std::vector& p) + { + for (int i = 0; i < (int)p.size(); ++i) + processes.addProcess(p[i]); + } + void addProcess(const std::string& process) { processes.addProcess(process); } + void addProcessArgument(const std::string& arg) { processes.addArgument(arg); } + const std::vector& getProcesses() const { return processes.getProcesses(); } + unsigned long long getUniqueId() const { return uniqueId; } + void setUniqueId(unsigned long long id) { uniqueId = id; } + + // Certain explicit conversions are allowed conditionally +#ifdef GLSLANG_WEB + bool getArithemeticInt8Enabled() const { return false; } + bool getArithemeticInt16Enabled() const { return false; } + bool getArithemeticFloat16Enabled() const { return false; } + void updateNumericFeature(TNumericFeatures::feature f, bool on) { } +#else + bool getArithemeticInt8Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int8); + } + bool getArithemeticInt16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int16); + } + + bool getArithemeticFloat16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_half_float) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_float16); + } + void updateNumericFeature(TNumericFeatures::feature f, bool on) + { on ? numericFeatures.insert(f) : numericFeatures.erase(f); } +#endif + +protected: + TIntermSymbol* addSymbol(long long Id, const TString&, const TType&, const TConstUnionArray&, TIntermTyped* subtree, const TSourceLoc&); + void error(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void warn(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void mergeCallGraphs(TInfoSink&, TIntermediate&); + void mergeModes(TInfoSink&, TIntermediate&); + void mergeTrees(TInfoSink&, TIntermediate&); + void seedIdMap(TIdMaps& idMaps, long long& IdShift); + void remapIds(const TIdMaps& idMaps, long long idShift, TIntermediate&); + void mergeBodies(TInfoSink&, TIntermSequence& globals, const TIntermSequence& unitGlobals); + void mergeLinkerObjects(TInfoSink&, TIntermSequence& linkerObjects, const TIntermSequence& unitLinkerObjects, EShLanguage); + void mergeBlockDefinitions(TInfoSink&, TIntermSymbol* block, TIntermSymbol* unitBlock, TIntermediate* unitRoot); + void mergeImplicitArraySizes(TType&, const TType&); + void mergeErrorCheck(TInfoSink&, const TIntermSymbol&, const TIntermSymbol&, EShLanguage); + void checkCallGraphCycles(TInfoSink&); + void checkCallGraphBodies(TInfoSink&, bool keepUncalled); + void inOutLocationCheck(TInfoSink&); + void sharedBlockCheck(TInfoSink&); + bool userOutputUsed() const; + bool isSpecializationOperation(const TIntermOperator&) const; + bool isNonuniformPropagating(TOperator) const; + bool promoteUnary(TIntermUnary&); + bool promoteBinary(TIntermBinary&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, TSymbolTable&, const TString&); + bool promoteAggregate(TIntermAggregate&); + void pushSelector(TIntermSequence&, const TVectorSelector&, const TSourceLoc&); + void pushSelector(TIntermSequence&, const TMatrixSelector&, const TSourceLoc&); + bool specConstantPropagates(const TIntermTyped&, const TIntermTyped&); + void performTextureUpgradeAndSamplerRemovalTransformation(TIntermNode* root); + bool isConversionAllowed(TOperator op, TIntermTyped* node) const; + std::tuple getConversionDestinationType(TBasicType type0, TBasicType type1, TOperator op) const; + + static const char* getResourceName(TResourceType); + + const EShLanguage language; // stage, known at construction time + std::string entryPointName; + std::string entryPointMangledName; + typedef std::list TGraph; + TGraph callGraph; + + EProfile profile; // source profile + int version; // source version + SpvVersion spvVersion; + TIntermNode* treeRoot; + std::set requestedExtensions; // cumulation of all enabled or required extensions; not connected to what subset of the shader used them + MustBeAssigned resources; + int numEntryPoints; + int numErrors; + int numPushConstants; + bool recursive; + bool invertY; + bool dxPositionW; + bool enhancedMsgs; + bool debugInfo; + bool useStorageBuffer; + bool invariantAll; + bool nanMinMaxClamp; // true if desiring min/max/clamp to favor non-NaN over NaN + bool depthReplacing; + bool stencilReplacing; + int localSize[3]; + bool localSizeNotDefault[3]; + int localSizeSpecId[3]; + unsigned long long uniqueId; + + std::string globalUniformBlockName; + std::string atomicCounterBlockName; + unsigned int globalUniformBlockSet; + unsigned int globalUniformBlockBinding; + unsigned int atomicCounterBlockSet; + +#ifndef GLSLANG_WEB +public: + const char* const implicitThisName; + const char* const implicitCounterName; +protected: + EShSource source; // source language, known a bit later + bool useVulkanMemoryModel; + int invocations; + int vertices; + TLayoutGeometry inputPrimitive; + TLayoutGeometry outputPrimitive; + bool pixelCenterInteger; + bool originUpperLeft; + bool texCoordBuiltinRedeclared; + TVertexSpacing vertexSpacing; + TVertexOrder vertexOrder; + TInterlockOrdering interlockOrdering; + bool pointMode; + bool earlyFragmentTests; + bool postDepthCoverage; + bool earlyAndLateFragmentTestsAMD; + TLayoutDepth depthLayout; + TLayoutStencil stencilLayout; + bool hlslFunctionality1; + int blendEquations; // an 'or'ing of masks of shifts of TBlendEquationShift + bool xfbMode; + std::vector xfbBuffers; // all the data we need to track per xfb buffer + bool multiStream; + bool layoutOverrideCoverage; + bool geoPassthroughEXT; + int numShaderRecordBlocks; + ComputeDerivativeMode computeDerivativeMode; + int primitives; + int numTaskNVBlocks; + bool layoutPrimitiveCulling; + int numTaskEXTPayloads; + + // Base shift values + std::array shiftBinding; + + // Per-descriptor-set shift values + std::array, EResCount> shiftBindingForSet; + + std::vector resourceSetBinding; + bool autoMapBindings; + bool autoMapLocations; + bool flattenUniformArrays; + bool useUnknownFormat; + bool hlslOffsets; + bool hlslIoMapping; + bool useVariablePointers; + + std::set semanticNameSet; + + EShTextureSamplerTransformMode textureSamplerTransformMode; + + bool needToLegalize; + bool binaryDoubleOutput; + bool subgroupUniformControlFlow; + bool usePhysicalStorageBuffer; + + TSpirvRequirement* spirvRequirement; + TSpirvExecutionMode* spirvExecutionMode; + + std::unordered_map uniformLocationOverrides; + int uniformLocationBase; + TNumericFeatures numericFeatures; +#endif + std::unordered_map blockBackingOverrides; + + std::unordered_set usedConstantId; // specialization constant ids used + std::vector usedAtomics; // sets of bindings used by atomic counters + std::vector usedIo[4]; // sets of used locations, one for each of in, out, uniform, and buffers + std::vector usedIoRT[2]; // sets of used location, one for rayPayload/rayPayloadIN and other + // for callableData/callableDataIn + // set of names of statically read/written I/O that might need extra checking + std::set ioAccessed; + + // source code of shader, useful as part of debug information + std::string sourceFile; + std::string sourceText; + + // Included text. First string is a name, second is the included text + std::map includeText; + + // for OpModuleProcessed, or equivalent + TProcesses processes; + +private: + void operator=(TIntermediate&); // prevent assignments +}; + +} // end namespace glslang + +#endif // _LOCAL_INTERMEDIATE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/parseVersions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/parseVersions.h new file mode 100644 index 0000000..3c52ff1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/parseVersions.h @@ -0,0 +1,240 @@ +// +// Copyright (C) 2015-2018 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// This is implemented in Versions.cpp + +#ifndef _PARSE_VERSIONS_INCLUDED_ +#define _PARSE_VERSIONS_INCLUDED_ + +#include "../Public/ShaderLang.h" +#include "../Include/InfoSink.h" +#include "Scan.h" + +#include + +namespace glslang { + +// +// Base class for parse helpers. +// This just has version-related information and checking. +// This class should be sufficient for preprocessing. +// +class TParseVersions { +public: + TParseVersions(TIntermediate& interm, int version, EProfile profile, + const SpvVersion& spvVersion, EShLanguage language, TInfoSink& infoSink, + bool forwardCompatible, EShMessages messages) + : +#if !defined(GLSLANG_WEB) + forwardCompatible(forwardCompatible), + profile(profile), +#endif + infoSink(infoSink), version(version), + language(language), + spvVersion(spvVersion), + intermediate(interm), messages(messages), numErrors(0), currentScanner(0) { } + virtual ~TParseVersions() { } + void requireStage(const TSourceLoc&, EShLanguageMask, const char* featureDesc); + void requireStage(const TSourceLoc&, EShLanguage, const char* featureDesc); +#ifdef GLSLANG_WEB + const EProfile profile = EEsProfile; + bool isEsProfile() const { return true; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc) + { + if (! (EEsProfile & profileMask)) + error(loc, "not supported with this profile:", featureDesc, ProfileName(profile)); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc) + { + if ((EEsProfile & profileMask) && (minVersion == 0 || version < minVersion)) + error(loc, "not supported for this version or the enabled extensions", featureDesc, ""); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc) + { + profileRequires(loc, profileMask, minVersion, extension ? 1 : 0, &extension, featureDesc); + } + void initializeExtensionBehavior() { } + void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc) { } + void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc) { } + void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + TExtensionBehavior getExtensionBehavior(const char*) { return EBhMissing; } + bool extensionTurnedOn(const char* const extension) { return false; } + bool extensionsTurnedOn(int numExtensions, const char* const extensions[]) { return false; } + void updateExtensionBehavior(int line, const char* const extension, const char* behavior) { } + void updateExtensionBehavior(const char* const extension, TExtensionBehavior) { } + void checkExtensionStage(const TSourceLoc&, const char* const extension) { } + void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior) { } + void fullIntegerCheck(const TSourceLoc&, const char* op) { } + void doubleCheck(const TSourceLoc&, const char* op) { } + bool float16Arithmetic() { return false; } + void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int16Arithmetic() { return false; } + void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int8Arithmetic() { return false; } + void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + void int64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + bool relaxedErrors() const { return false; } + bool suppressWarnings() const { return true; } + bool isForwardCompatible() const { return false; } +#else + bool forwardCompatible; // true if errors are to be given for use of deprecated features + EProfile profile; // the declared profile in the shader (core by default) + bool isEsProfile() const { return profile == EEsProfile; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc); + virtual void initializeExtensionBehavior(); + virtual void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc); + virtual void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc); + virtual void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual TExtensionBehavior getExtensionBehavior(const char*); + virtual bool extensionTurnedOn(const char* const extension); + virtual bool extensionsTurnedOn(int numExtensions, const char* const extensions[]); + virtual void updateExtensionBehavior(int line, const char* const extension, const char* behavior); + virtual void updateExtensionBehavior(const char* const extension, TExtensionBehavior); + virtual bool checkExtensionsRequested(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void checkExtensionStage(const TSourceLoc&, const char* const extension); + virtual void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior); + virtual void fullIntegerCheck(const TSourceLoc&, const char* op); + + virtual void unimplemented(const TSourceLoc&, const char* featureDesc); + virtual void doubleCheck(const TSourceLoc&, const char* op); + virtual void float16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void float16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool float16Arithmetic(); + virtual void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int16Arithmetic(); + virtual void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int8ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int8Arithmetic(); + virtual void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void float16OpaqueCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void int64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt8Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void fcoopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void intcoopmatCheck(const TSourceLoc&, const char *op, bool builtIn = false); + bool relaxedErrors() const { return (messages & EShMsgRelaxedErrors) != 0; } + bool suppressWarnings() const { return (messages & EShMsgSuppressWarnings) != 0; } + bool isForwardCompatible() const { return forwardCompatible; } +#endif // GLSLANG_WEB + virtual void spvRemoved(const TSourceLoc&, const char* op); + virtual void vulkanRemoved(const TSourceLoc&, const char* op); + virtual void requireVulkan(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char *op, unsigned int version); + + +#if defined(GLSLANG_WEB) && !defined(GLSLANG_WEB_DEVEL) + void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } + void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } +#else + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; +#endif + + void addError() { ++numErrors; } + int getNumErrors() const { return numErrors; } + + void setScanner(TInputScanner* scanner) { currentScanner = scanner; } + TInputScanner* getScanner() const { return currentScanner; } + const TSourceLoc& getCurrentLoc() const { return currentScanner->getSourceLoc(); } + void setCurrentLine(int line) { currentScanner->setLine(line); } + void setCurrentColumn(int col) { currentScanner->setColumn(col); } + void setCurrentSourceName(const char* name) { currentScanner->setFile(name); } + void setCurrentString(int string) { currentScanner->setString(string); } + + void getPreamble(std::string&); +#ifdef ENABLE_HLSL + bool isReadingHLSL() const { return (messages & EShMsgReadHlsl) == EShMsgReadHlsl; } + bool hlslEnable16BitTypes() const { return (messages & EShMsgHlslEnable16BitTypes) != 0; } + bool hlslDX9Compatible() const { return (messages & EShMsgHlslDX9Compatible) != 0; } +#else + bool isReadingHLSL() const { return false; } +#endif + + TInfoSink& infoSink; + + // compilation mode + int version; // version, updated by #version in the shader + EShLanguage language; // really the stage + SpvVersion spvVersion; + TIntermediate& intermediate; // helper for making and hooking up pieces of the parse tree + +protected: + TMap extensionBehavior; // for each extension string, what its current behavior is + TMap extensionMinSpv; // for each extension string, store minimum spirv required + EShMessages messages; // errors/warnings/rule-sets + int numErrors; // number of compile-time errors encountered + TInputScanner* currentScanner; + +private: + explicit TParseVersions(const TParseVersions&); + TParseVersions& operator=(const TParseVersions&); +}; + +} // end namespace glslang + +#endif // _PARSE_VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/preprocessor/PpContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/preprocessor/PpContext.h new file mode 100644 index 0000000..714b5ea --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/preprocessor/PpContext.h @@ -0,0 +1,703 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PPCONTEXT_H +#define PPCONTEXT_H + +#include +#include +#include + +#include "../ParseHelper.h" +#include "PpTokens.h" + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4127) +#endif + +namespace glslang { + +class TPpToken { +public: + TPpToken() { clear(); } + void clear() + { + space = false; + i64val = 0; + loc.init(); + name[0] = 0; + } + + // Used for comparing macro definitions, so checks what is relevant for that. + bool operator==(const TPpToken& right) const + { + return space == right.space && + ival == right.ival && dval == right.dval && i64val == right.i64val && + strncmp(name, right.name, MaxTokenLength) == 0; + } + bool operator!=(const TPpToken& right) const { return ! operator==(right); } + + TSourceLoc loc; + // True if a space (for white space or a removed comment) should also be + // recognized, in front of the token returned: + bool space; + // Numeric value of the token: + union { + int ival; + double dval; + long long i64val; + }; + // Text string of the token: + char name[MaxTokenLength + 1]; +}; + +class TStringAtomMap { +// +// Implementation is in PpAtom.cpp +// +// Maintain a bi-directional mapping between relevant preprocessor strings and +// "atoms" which a unique integers (small, contiguous, not hash-like) per string. +// +public: + TStringAtomMap(); + + // Map string -> atom. + // Return 0 if no existing string. + int getAtom(const char* s) const + { + auto it = atomMap.find(s); + return it == atomMap.end() ? 0 : it->second; + } + + // Map a new or existing string -> atom, inventing a new atom if necessary. + int getAddAtom(const char* s) + { + int atom = getAtom(s); + if (atom == 0) { + atom = nextAtom++; + addAtomFixed(s, atom); + } + return atom; + } + + // Map atom -> string. + const char* getString(int atom) const { return stringMap[atom]->c_str(); } + +protected: + TStringAtomMap(TStringAtomMap&); + TStringAtomMap& operator=(TStringAtomMap&); + + TUnorderedMap atomMap; + TVector stringMap; // these point into the TString in atomMap + int nextAtom; + + // Bad source characters can lead to bad atoms, so gracefully handle those by + // pre-filling the table with them (to avoid if tests later). + TString badToken; + + // Add bi-directional mappings: + // - string -> atom + // - atom -> string + void addAtomFixed(const char* s, int atom) + { + auto it = atomMap.insert(std::pair(s, atom)).first; + if (stringMap.size() < (size_t)atom + 1) + stringMap.resize(atom + 100, &badToken); + stringMap[atom] = &it->first; + } +}; + +class TInputScanner; + +enum MacroExpandResult { + MacroExpandNotStarted, // macro not expanded, which might not be an error + MacroExpandError, // a clear error occurred while expanding, no expansion + MacroExpandStarted, // macro expansion process has started + MacroExpandUndef // macro is undefined and will be expanded +}; + +// This class is the result of turning a huge pile of C code communicating through globals +// into a class. This was done to allowing instancing to attain thread safety. +// Don't expect too much in terms of OO design. +class TPpContext { +public: + TPpContext(TParseContextBase&, const std::string& rootFileName, TShader::Includer&); + virtual ~TPpContext(); + + void setPreamble(const char* preamble, size_t length); + + int tokenize(TPpToken& ppToken); + int tokenPaste(int token, TPpToken&); + + class tInput { + public: + tInput(TPpContext* p) : done(false), pp(p) { } + virtual ~tInput() { } + + virtual int scan(TPpToken*) = 0; + virtual int getch() = 0; + virtual void ungetch() = 0; + virtual bool peekPasting() { return false; } // true when about to see ## + virtual bool peekContinuedPasting(int) { return false; } // true when non-spaced tokens can paste + virtual bool endOfReplacementList() { return false; } // true when at the end of a macro replacement list (RHS of #define) + virtual bool isMacroInput() { return false; } + + // Will be called when we start reading tokens from this instance + virtual void notifyActivated() {} + // Will be called when we do not read tokens from this instance anymore + virtual void notifyDeleted() {} + protected: + bool done; + TPpContext* pp; + }; + + void setInput(TInputScanner& input, bool versionWillBeError); + + void pushInput(tInput* in) + { + inputStack.push_back(in); + in->notifyActivated(); + } + void popInput() + { + inputStack.back()->notifyDeleted(); + delete inputStack.back(); + inputStack.pop_back(); + } + + // + // From PpTokens.cpp + // + + // Capture the needed parts of a token stream for macro recording/playback. + class TokenStream { + public: + // Manage a stream of these 'Token', which capture the relevant parts + // of a TPpToken, plus its atom. + class Token { + public: + Token(int atom, const TPpToken& ppToken) : + atom(atom), + space(ppToken.space), + i64val(ppToken.i64val), + name(ppToken.name) { } + int get(TPpToken& ppToken) + { + ppToken.clear(); + ppToken.space = space; + ppToken.i64val = i64val; + snprintf(ppToken.name, sizeof(ppToken.name), "%s", name.c_str()); + return atom; + } + bool isAtom(int a) const { return atom == a; } + int getAtom() const { return atom; } + bool nonSpaced() const { return !space; } + protected: + Token() {} + int atom; + bool space; // did a space precede the token? + long long i64val; + TString name; + }; + + TokenStream() : currentPos(0) { } + + void putToken(int token, TPpToken* ppToken); + bool peekToken(int atom) { return !atEnd() && stream[currentPos].isAtom(atom); } + bool peekContinuedPasting(int atom) + { + // This is basically necessary because, for example, the PP + // tokenizer only accepts valid numeric-literals plus suffixes, so + // separates numeric-literals plus bad suffix into two tokens, which + // should get both pasted together as one token when token pasting. + // + // The following code is a bit more generalized than the above example. + if (!atEnd() && atom == PpAtomIdentifier && stream[currentPos].nonSpaced()) { + switch(stream[currentPos].getAtom()) { + case PpAtomConstInt: + case PpAtomConstUint: + case PpAtomConstInt64: + case PpAtomConstUint64: + case PpAtomConstInt16: + case PpAtomConstUint16: + case PpAtomConstFloat: + case PpAtomConstDouble: + case PpAtomConstFloat16: + case PpAtomConstString: + case PpAtomIdentifier: + return true; + default: + break; + } + } + + return false; + } + int getToken(TParseContextBase&, TPpToken*); + bool atEnd() { return currentPos >= stream.size(); } + bool peekTokenizedPasting(bool lastTokenPastes); + bool peekUntokenizedPasting(); + void reset() { currentPos = 0; } + + protected: + TVector stream; + size_t currentPos; + }; + + // + // From Pp.cpp + // + + struct MacroSymbol { + MacroSymbol() : functionLike(0), busy(0), undef(0) { } + TVector args; + TokenStream body; + unsigned functionLike : 1; // 0 means object-like, 1 means function-like + unsigned busy : 1; + unsigned undef : 1; + }; + + typedef TMap TSymbolMap; + TSymbolMap macroDefs; // map atoms to macro definitions + MacroSymbol* lookupMacroDef(int atom) + { + auto existingMacroIt = macroDefs.find(atom); + return (existingMacroIt == macroDefs.end()) ? nullptr : &(existingMacroIt->second); + } + void addMacroDef(int atom, MacroSymbol& macroDef) { macroDefs[atom] = macroDef; } + +protected: + TPpContext(TPpContext&); + TPpContext& operator=(TPpContext&); + + TStringAtomMap atomStrings; + char* preamble; // string to parse, all before line 1 of string 0, it is 0 if no preamble + int preambleLength; + char** strings; // official strings of shader, starting a string 0 line 1 + size_t* lengths; + int numStrings; // how many official strings there are + int currentString; // which string we're currently parsing (-1 for preamble) + + // Scanner data: + int previous_token; + TParseContextBase& parseContext; + + // Get the next token from *stack* of input sources, popping input sources + // that are out of tokens, down until an input source is found that has a token. + // Return EndOfInput when there are no more tokens to be found by doing this. + int scanToken(TPpToken* ppToken) + { + int token = EndOfInput; + + while (! inputStack.empty()) { + token = inputStack.back()->scan(ppToken); + if (token != EndOfInput || inputStack.empty()) + break; + popInput(); + } + + return token; + } + int getChar() { return inputStack.back()->getch(); } + void ungetChar() { inputStack.back()->ungetch(); } + bool peekPasting() { return !inputStack.empty() && inputStack.back()->peekPasting(); } + bool peekContinuedPasting(int a) + { + return !inputStack.empty() && inputStack.back()->peekContinuedPasting(a); + } + bool endOfReplacementList() { return inputStack.empty() || inputStack.back()->endOfReplacementList(); } + bool isMacroInput() { return inputStack.size() > 0 && inputStack.back()->isMacroInput(); } + + static const int maxIfNesting = 65; + + int ifdepth; // current #if-#else-#endif nesting in the cpp.c file (pre-processor) + bool elseSeen[maxIfNesting]; // Keep a track of whether an else has been seen at a particular depth + int elsetracker; // #if-#else and #endif constructs...Counter. + + class tMacroInput : public tInput { + public: + tMacroInput(TPpContext* pp) : tInput(pp), prepaste(false), postpaste(false) { } + virtual ~tMacroInput() + { + for (size_t i = 0; i < args.size(); ++i) + delete args[i]; + for (size_t i = 0; i < expandedArgs.size(); ++i) + delete expandedArgs[i]; + } + + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + bool peekPasting() override { return prepaste; } + bool peekContinuedPasting(int a) override { return mac->body.peekContinuedPasting(a); } + bool endOfReplacementList() override { return mac->body.atEnd(); } + bool isMacroInput() override { return true; } + + MacroSymbol *mac; + TVector args; + TVector expandedArgs; + + protected: + bool prepaste; // true if we are just before ## + bool postpaste; // true if we are right after ## + }; + + class tMarkerInput : public tInput { + public: + tMarkerInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override + { + if (done) + return EndOfInput; + done = true; + + return marker; + } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + static const int marker = -3; + }; + + class tZeroInput : public tInput { + public: + tZeroInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + }; + + std::vector inputStack; + bool errorOnVersion; + bool versionSeen; + + // + // from Pp.cpp + // + + // Used to obtain #include content. + TShader::Includer& includer; + + int CPPdefine(TPpToken * ppToken); + int CPPundef(TPpToken * ppToken); + int CPPelse(int matchelse, TPpToken * ppToken); + int extraTokenCheck(int atom, TPpToken* ppToken, int token); + int eval(int token, int precedence, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int evalToToken(int token, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int CPPif (TPpToken * ppToken); + int CPPifdef(int defined, TPpToken * ppToken); + int CPPinclude(TPpToken * ppToken); + int CPPline(TPpToken * ppToken); + int CPPerror(TPpToken * ppToken); + int CPPpragma(TPpToken * ppToken); + int CPPversion(TPpToken * ppToken); + int CPPextension(TPpToken * ppToken); + int readCPPline(TPpToken * ppToken); + int scanHeaderName(TPpToken* ppToken, char delimit); + TokenStream* PrescanMacroArg(TokenStream&, TPpToken*, bool newLineOkay); + MacroExpandResult MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOkay); + + // + // From PpTokens.cpp + // + void pushTokenStreamInput(TokenStream&, bool pasting = false); + void UngetToken(int token, TPpToken*); + + class tTokenInput : public tInput { + public: + tTokenInput(TPpContext* pp, TokenStream* t, bool prepasting) : + tInput(pp), + tokens(t), + lastTokenPastes(prepasting) { } + virtual int scan(TPpToken *ppToken) override { return tokens->getToken(pp->parseContext, ppToken); } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + virtual bool peekPasting() override { return tokens->peekTokenizedPasting(lastTokenPastes); } + bool peekContinuedPasting(int a) override { return tokens->peekContinuedPasting(a); } + protected: + TokenStream* tokens; + bool lastTokenPastes; // true if the last token in the input is to be pasted, rather than consumed as a token + }; + + class tUngotTokenInput : public tInput { + public: + tUngotTokenInput(TPpContext* pp, int t, TPpToken* p) : tInput(pp), token(t), lval(*p) { } + virtual int scan(TPpToken *) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + protected: + int token; + TPpToken lval; + }; + + // + // From PpScanner.cpp + // + class tStringInput : public tInput { + public: + tStringInput(TPpContext* pp, TInputScanner& i) : tInput(pp), input(&i) { } + virtual int scan(TPpToken*) override; + + // Scanner used to get source stream characters. + // - Escaped newlines are handled here, invisibly to the caller. + // - All forms of newline are handled, and turned into just a '\n'. + int getch() override + { + int ch = input->get(); + + if (ch == '\\') { + // Move past escaped newlines, as many as sequentially exist + do { + if (input->peek() == '\r' || input->peek() == '\n') { + bool allowed = pp->parseContext.lineContinuationCheck(input->getSourceLoc(), pp->inComment); + if (! allowed && pp->inComment) + return '\\'; + + // escape one newline now + ch = input->get(); + int nextch = input->get(); + if (ch == '\r' && nextch == '\n') + ch = input->get(); + else + ch = nextch; + } else + return '\\'; + } while (ch == '\\'); + } + + // handle any non-escaped newline + if (ch == '\r' || ch == '\n') { + if (ch == '\r' && input->peek() == '\n') + input->get(); + return '\n'; + } + + return ch; + } + + // Scanner used to backup the source stream characters. Newlines are + // handled here, invisibly to the caller, meaning have to undo exactly + // what getch() above does (e.g., don't leave things in the middle of a + // sequence of escaped newlines). + void ungetch() override + { + input->unget(); + + do { + int ch = input->peek(); + if (ch == '\r' || ch == '\n') { + if (ch == '\n') { + // correct for two-character newline + input->unget(); + if (input->peek() != '\r') + input->get(); + } + // now in front of a complete newline, move past an escape character + input->unget(); + if (input->peek() == '\\') + input->unget(); + else { + input->get(); + break; + } + } else + break; + } while (true); + } + + protected: + TInputScanner* input; + }; + + // Holds a reference to included file data, as well as a + // prologue and an epilogue string. This can be scanned using the tInput + // interface and acts as a single source string. + class TokenizableIncludeFile : public tInput { + public: + // Copies prologue and epilogue. The includedFile must remain valid + // until this TokenizableIncludeFile is no longer used. + TokenizableIncludeFile(const TSourceLoc& startLoc, + const std::string& prologue, + TShader::Includer::IncludeResult* includedFile, + const std::string& epilogue, + TPpContext* pp) + : tInput(pp), + prologue_(prologue), + epilogue_(epilogue), + includedFile_(includedFile), + scanner(3, strings, lengths, nullptr, 0, 0, true), + prevScanner(nullptr), + stringInput(pp, scanner) + { + strings[0] = prologue_.data(); + strings[1] = includedFile_->headerData; + strings[2] = epilogue_.data(); + + lengths[0] = prologue_.size(); + lengths[1] = includedFile_->headerLength; + lengths[2] = epilogue_.size(); + + scanner.setLine(startLoc.line); + scanner.setString(startLoc.string); + + scanner.setFile(startLoc.getFilenameStr(), 0); + scanner.setFile(startLoc.getFilenameStr(), 1); + scanner.setFile(startLoc.getFilenameStr(), 2); + } + + // tInput methods: + int scan(TPpToken* t) override { return stringInput.scan(t); } + int getch() override { return stringInput.getch(); } + void ungetch() override { stringInput.ungetch(); } + + void notifyActivated() override + { + prevScanner = pp->parseContext.getScanner(); + pp->parseContext.setScanner(&scanner); + pp->push_include(includedFile_); + } + + void notifyDeleted() override + { + pp->parseContext.setScanner(prevScanner); + pp->pop_include(); + } + + private: + TokenizableIncludeFile& operator=(const TokenizableIncludeFile&); + + // Stores the prologue for this string. + const std::string prologue_; + + // Stores the epilogue for this string. + const std::string epilogue_; + + // Points to the IncludeResult that this TokenizableIncludeFile represents. + TShader::Includer::IncludeResult* includedFile_; + + // Will point to prologue_, includedFile_->headerData and epilogue_ + // This is passed to scanner constructor. + // These do not own the storage and it must remain valid until this + // object has been destroyed. + const char* strings[3]; + // Length of str_, passed to scanner constructor. + size_t lengths[3]; + // Scans over str_. + TInputScanner scanner; + // The previous effective scanner before the scanner in this instance + // has been activated. + TInputScanner* prevScanner; + // Delegate object implementing the tInput interface. + tStringInput stringInput; + }; + + int ScanFromString(char* s); + void missingEndifCheck(); + int lFloatConst(int len, int ch, TPpToken* ppToken); + int characterLiteral(TPpToken* ppToken); + + void push_include(TShader::Includer::IncludeResult* result) + { + currentSourceFile = result->headerName; + includeStack.push(result); + } + + void pop_include() + { + TShader::Includer::IncludeResult* include = includeStack.top(); + includeStack.pop(); + includer.releaseInclude(include); + if (includeStack.empty()) { + currentSourceFile = rootFileName; + } else { + currentSourceFile = includeStack.top()->headerName; + } + } + + bool inComment; + std::string rootFileName; + std::stack includeStack; + std::string currentSourceFile; + + std::istringstream strtodStream; + bool disableEscapeSequences; +}; + +} // end namespace glslang + +#endif // PPCONTEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/preprocessor/PpTokens.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/preprocessor/PpTokens.h new file mode 100644 index 0000000..7b0f815 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/preprocessor/PpTokens.h @@ -0,0 +1,179 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PARSER_H +#define PARSER_H + +namespace glslang { + +// Multi-character tokens +enum EFixedAtoms { + // single character tokens get their own char value as their token; start here for multi-character tokens + PpAtomMaxSingle = 127, + + // replace bad character tokens with this, to avoid accidental aliasing with the below + PpAtomBadToken, + + // Operators + + PPAtomAddAssign, + PPAtomSubAssign, + PPAtomMulAssign, + PPAtomDivAssign, + PPAtomModAssign, + + PpAtomRight, + PpAtomLeft, + + PpAtomRightAssign, + PpAtomLeftAssign, + PpAtomAndAssign, + PpAtomOrAssign, + PpAtomXorAssign, + + PpAtomAnd, + PpAtomOr, + PpAtomXor, + + PpAtomEQ, + PpAtomNE, + PpAtomGE, + PpAtomLE, + + PpAtomDecrement, + PpAtomIncrement, + + PpAtomColonColon, + + PpAtomPaste, + + // Constants + + PpAtomConstInt, + PpAtomConstUint, + PpAtomConstInt64, + PpAtomConstUint64, + PpAtomConstInt16, + PpAtomConstUint16, + PpAtomConstFloat, + PpAtomConstDouble, + PpAtomConstFloat16, + PpAtomConstString, + + // Identifiers + PpAtomIdentifier, + + // preprocessor "keywords" + + PpAtomDefine, + PpAtomUndef, + + PpAtomIf, + PpAtomIfdef, + PpAtomIfndef, + PpAtomElse, + PpAtomElif, + PpAtomEndif, + + PpAtomLine, + PpAtomPragma, + PpAtomError, + + // #version ... + PpAtomVersion, + PpAtomCore, + PpAtomCompatibility, + PpAtomEs, + + // #extension + PpAtomExtension, + + // __LINE__, __FILE__, __VERSION__ + + PpAtomLineMacro, + PpAtomFileMacro, + PpAtomVersionMacro, + + // #include + PpAtomInclude, + + PpAtomLast, +}; + +} // end namespace glslang + +#endif /* not PARSER_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/propagateNoContraction.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/propagateNoContraction.h new file mode 100644 index 0000000..8521ad7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/propagateNoContraction.h @@ -0,0 +1,55 @@ +// +// Copyright (C) 2015-2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Visit the nodes in the glslang intermediate tree representation to +// propagate 'noContraction' qualifier. +// + +#pragma once + +#include "../Include/intermediate.h" + +namespace glslang { + +// Propagates the 'precise' qualifier for objects (objects marked with +// 'noContraction' qualifier) from the shader source specified 'precise' +// variables to all the involved objects, and add 'noContraction' qualifier for +// the involved arithmetic operations. +// Note that the same qualifier: 'noContraction' is used in both object nodes +// and arithmetic operation nodes, but has different meaning. For object nodes, +// 'noContraction' means the object is 'precise'; and for arithmetic operation +// nodes, it means the operation should not be contracted. +void PropagateNoContraction(const glslang::TIntermediate& intermediate); +}; diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/reflection.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/reflection.h new file mode 100644 index 0000000..bfd5452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/MachineIndependent/reflection.h @@ -0,0 +1,223 @@ +// +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _REFLECTION_INCLUDED +#define _REFLECTION_INCLUDED + +#include "../Public/ShaderLang.h" +#include "../Include/Types.h" + +#include +#include + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +namespace glslang { + +class TIntermediate; +class TIntermAggregate; +class TReflectionTraverser; + +// The full reflection database +class TReflection { +public: + TReflection(EShReflectionOptions opts, EShLanguage first, EShLanguage last) + : options(opts), firstStage(first), lastStage(last), badReflection(TObjectReflection::badReflection()) + { + for (int dim=0; dim<3; ++dim) + localSize[dim] = 0; + } + + virtual ~TReflection() {} + + // grow the reflection stage by stage + bool addStage(EShLanguage, const TIntermediate&); + + // for mapping a uniform index to a uniform object's description + int getNumUniforms() { return (int)indexToUniform.size(); } + const TObjectReflection& getUniform(int i) const + { + if (i >= 0 && i < (int)indexToUniform.size()) + return indexToUniform[i]; + else + return badReflection; + } + + // for mapping a block index to the block's description + int getNumUniformBlocks() const { return (int)indexToUniformBlock.size(); } + const TObjectReflection& getUniformBlock(int i) const + { + if (i >= 0 && i < (int)indexToUniformBlock.size()) + return indexToUniformBlock[i]; + else + return badReflection; + } + + // for mapping an pipeline input index to the input's description + int getNumPipeInputs() { return (int)indexToPipeInput.size(); } + const TObjectReflection& getPipeInput(int i) const + { + if (i >= 0 && i < (int)indexToPipeInput.size()) + return indexToPipeInput[i]; + else + return badReflection; + } + + // for mapping an pipeline output index to the output's description + int getNumPipeOutputs() { return (int)indexToPipeOutput.size(); } + const TObjectReflection& getPipeOutput(int i) const + { + if (i >= 0 && i < (int)indexToPipeOutput.size()) + return indexToPipeOutput[i]; + else + return badReflection; + } + + // for mapping from an atomic counter to the uniform index + int getNumAtomicCounters() const { return (int)atomicCounterUniformIndices.size(); } + const TObjectReflection& getAtomicCounter(int i) const + { + if (i >= 0 && i < (int)atomicCounterUniformIndices.size()) + return getUniform(atomicCounterUniformIndices[i]); + else + return badReflection; + } + + // for mapping a buffer variable index to a buffer variable object's description + int getNumBufferVariables() { return (int)indexToBufferVariable.size(); } + const TObjectReflection& getBufferVariable(int i) const + { + if (i >= 0 && i < (int)indexToBufferVariable.size()) + return indexToBufferVariable[i]; + else + return badReflection; + } + + // for mapping a storage block index to the storage block's description + int getNumStorageBuffers() const { return (int)indexToBufferBlock.size(); } + const TObjectReflection& getStorageBufferBlock(int i) const + { + if (i >= 0 && i < (int)indexToBufferBlock.size()) + return indexToBufferBlock[i]; + else + return badReflection; + } + + // for mapping any name to its index (block names, uniform names and input/output names) + int getIndex(const char* name) const + { + TNameToIndex::const_iterator it = nameToIndex.find(name); + if (it == nameToIndex.end()) + return -1; + else + return it->second; + } + + // see getIndex(const char*) + int getIndex(const TString& name) const { return getIndex(name.c_str()); } + + + // for mapping any name to its index (only pipe input/output names) + int getPipeIOIndex(const char* name, const bool inOrOut) const + { + TNameToIndex::const_iterator it = inOrOut ? pipeInNameToIndex.find(name) : pipeOutNameToIndex.find(name); + if (it == (inOrOut ? pipeInNameToIndex.end() : pipeOutNameToIndex.end())) + return -1; + else + return it->second; + } + + // see gePipeIOIndex(const char*, const bool) + int getPipeIOIndex(const TString& name, const bool inOrOut) const { return getPipeIOIndex(name.c_str(), inOrOut); } + + // Thread local size + unsigned getLocalSize(int dim) const { return dim <= 2 ? localSize[dim] : 0; } + + void dump(); + +protected: + friend class glslang::TReflectionTraverser; + + void buildCounterIndices(const TIntermediate&); + void buildUniformStageMask(const TIntermediate& intermediate); + void buildAttributeReflection(EShLanguage, const TIntermediate&); + + // Need a TString hash: typedef std::unordered_map TNameToIndex; + typedef std::map TNameToIndex; + typedef std::vector TMapIndexToReflection; + typedef std::vector TIndices; + + TMapIndexToReflection& GetBlockMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferBlock; + return indexToUniformBlock; + } + TMapIndexToReflection& GetVariableMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferVariable; + return indexToUniform; + } + + EShReflectionOptions options; + + EShLanguage firstStage; + EShLanguage lastStage; + + TObjectReflection badReflection; // return for queries of -1 or generally out of range; has expected descriptions with in it for this + TNameToIndex nameToIndex; // maps names to indexes; can hold all types of data: uniform/buffer and which function names have been processed + TNameToIndex pipeInNameToIndex; // maps pipe in names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TNameToIndex pipeOutNameToIndex; // maps pipe out names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TMapIndexToReflection indexToUniform; + TMapIndexToReflection indexToUniformBlock; + TMapIndexToReflection indexToBufferVariable; + TMapIndexToReflection indexToBufferBlock; + TMapIndexToReflection indexToPipeInput; + TMapIndexToReflection indexToPipeOutput; + TIndices atomicCounterUniformIndices; + + unsigned int localSize[3]; +}; + +} // end namespace glslang + +#endif // _REFLECTION_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/ResourceLimits.h new file mode 100644 index 0000000..f70be81 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/ResourceLimits.h @@ -0,0 +1,57 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ + +#include + +#include "../Include/ResourceLimits.h" + +// Return pointer to user-writable Resource to pass through API in +// future-proof way. +extern TBuiltInResource* GetResources(); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +extern const TBuiltInResource* GetDefaultResources(); + +// Returns the DefaultTBuiltInResource as a human-readable string. +std::string GetDefaultTBuiltInResourceString(); + +// Decodes the resource limits from |config| to |resources|. +void DecodeResourceLimits(TBuiltInResource* resources, char* config); + +#endif // _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/ShaderLang.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/ShaderLang.h new file mode 100644 index 0000000..90a5302 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/ShaderLang.h @@ -0,0 +1,987 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _COMPILER_INTERFACE_INCLUDED_ +#define _COMPILER_INTERFACE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../MachineIndependent/Versions.h" + +#include +#include + +#ifdef _WIN32 + #define C_DECL __cdecl +#else + #define C_DECL +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +// +// This is the platform independent interface between an OGL driver +// and the shading language compiler/linker. +// + +#ifdef __cplusplus + extern "C" { +#endif + +// +// Call before doing any other compiler/linker operations. +// +// (Call once per process, not once per thread.) +// +GLSLANG_EXPORT int ShInitialize(); + +// +// Call this at process shutdown to clean up memory. +// +GLSLANG_EXPORT int ShFinalize(); + +// +// Types of languages the compiler can consume. +// +typedef enum { + EShLangVertex, + EShLangTessControl, + EShLangTessEvaluation, + EShLangGeometry, + EShLangFragment, + EShLangCompute, + EShLangRayGen, + EShLangRayGenNV = EShLangRayGen, + EShLangIntersect, + EShLangIntersectNV = EShLangIntersect, + EShLangAnyHit, + EShLangAnyHitNV = EShLangAnyHit, + EShLangClosestHit, + EShLangClosestHitNV = EShLangClosestHit, + EShLangMiss, + EShLangMissNV = EShLangMiss, + EShLangCallable, + EShLangCallableNV = EShLangCallable, + EShLangTask, + EShLangTaskNV = EShLangTask, + EShLangMesh, + EShLangMeshNV = EShLangMesh, + LAST_ELEMENT_MARKER(EShLangCount), +} EShLanguage; // would be better as stage, but this is ancient now + +typedef enum : unsigned { + EShLangVertexMask = (1 << EShLangVertex), + EShLangTessControlMask = (1 << EShLangTessControl), + EShLangTessEvaluationMask = (1 << EShLangTessEvaluation), + EShLangGeometryMask = (1 << EShLangGeometry), + EShLangFragmentMask = (1 << EShLangFragment), + EShLangComputeMask = (1 << EShLangCompute), + EShLangRayGenMask = (1 << EShLangRayGen), + EShLangRayGenNVMask = EShLangRayGenMask, + EShLangIntersectMask = (1 << EShLangIntersect), + EShLangIntersectNVMask = EShLangIntersectMask, + EShLangAnyHitMask = (1 << EShLangAnyHit), + EShLangAnyHitNVMask = EShLangAnyHitMask, + EShLangClosestHitMask = (1 << EShLangClosestHit), + EShLangClosestHitNVMask = EShLangClosestHitMask, + EShLangMissMask = (1 << EShLangMiss), + EShLangMissNVMask = EShLangMissMask, + EShLangCallableMask = (1 << EShLangCallable), + EShLangCallableNVMask = EShLangCallableMask, + EShLangTaskMask = (1 << EShLangTask), + EShLangTaskNVMask = EShLangTaskMask, + EShLangMeshMask = (1 << EShLangMesh), + EShLangMeshNVMask = EShLangMeshMask, + LAST_ELEMENT_MARKER(EShLanguageMaskCount), +} EShLanguageMask; + +namespace glslang { + +class TType; + +typedef enum { + EShSourceNone, + EShSourceGlsl, // GLSL, includes ESSL (OpenGL ES GLSL) + EShSourceHlsl, // HLSL + LAST_ELEMENT_MARKER(EShSourceCount), +} EShSource; // if EShLanguage were EShStage, this could be EShLanguage instead + +typedef enum { + EShClientNone, // use when there is no client, e.g. for validation + EShClientVulkan, // as GLSL dialect, specifies KHR_vulkan_glsl extension + EShClientOpenGL, // as GLSL dialect, specifies ARB_gl_spirv extension + LAST_ELEMENT_MARKER(EShClientCount), +} EShClient; + +typedef enum { + EShTargetNone, + EShTargetSpv, // SPIR-V (preferred spelling) + EshTargetSpv = EShTargetSpv, // legacy spelling + LAST_ELEMENT_MARKER(EShTargetCount), +} EShTargetLanguage; + +typedef enum { + EShTargetVulkan_1_0 = (1 << 22), // Vulkan 1.0 + EShTargetVulkan_1_1 = (1 << 22) | (1 << 12), // Vulkan 1.1 + EShTargetVulkan_1_2 = (1 << 22) | (2 << 12), // Vulkan 1.2 + EShTargetVulkan_1_3 = (1 << 22) | (3 << 12), // Vulkan 1.3 + EShTargetOpenGL_450 = 450, // OpenGL + LAST_ELEMENT_MARKER(EShTargetClientVersionCount = 5), +} EShTargetClientVersion; + +typedef EShTargetClientVersion EshTargetClientVersion; + +typedef enum { + EShTargetSpv_1_0 = (1 << 16), // SPIR-V 1.0 + EShTargetSpv_1_1 = (1 << 16) | (1 << 8), // SPIR-V 1.1 + EShTargetSpv_1_2 = (1 << 16) | (2 << 8), // SPIR-V 1.2 + EShTargetSpv_1_3 = (1 << 16) | (3 << 8), // SPIR-V 1.3 + EShTargetSpv_1_4 = (1 << 16) | (4 << 8), // SPIR-V 1.4 + EShTargetSpv_1_5 = (1 << 16) | (5 << 8), // SPIR-V 1.5 + EShTargetSpv_1_6 = (1 << 16) | (6 << 8), // SPIR-V 1.6 + LAST_ELEMENT_MARKER(EShTargetLanguageVersionCount = 7), +} EShTargetLanguageVersion; + +struct TInputLanguage { + EShSource languageFamily; // redundant information with other input, this one overrides when not EShSourceNone + EShLanguage stage; // redundant information with other input, this one overrides when not EShSourceNone + EShClient dialect; + int dialectVersion; // version of client's language definition, not the client (when not EShClientNone) + bool vulkanRulesRelaxed; +}; + +struct TClient { + EShClient client; + EShTargetClientVersion version; // version of client itself (not the client's input dialect) +}; + +struct TTarget { + EShTargetLanguage language; + EShTargetLanguageVersion version; // version to target, if SPIR-V, defined by "word 1" of the SPIR-V header + bool hlslFunctionality1; // can target hlsl_functionality1 extension(s) +}; + +// All source/client/target versions and settings. +// Can override previous methods of setting, when items are set here. +// Expected to grow, as more are added, rather than growing parameter lists. +struct TEnvironment { + TInputLanguage input; // definition of the input language + TClient client; // what client is the overall compilation being done for? + TTarget target; // what to generate +}; + +GLSLANG_EXPORT const char* StageName(EShLanguage); + +} // end namespace glslang + +// +// Types of output the linker will create. +// +typedef enum { + EShExVertexFragment, + EShExFragment +} EShExecutable; + +// +// Optimization level for the compiler. +// +typedef enum { + EShOptNoGeneration, + EShOptNone, + EShOptSimple, // Optimizations that can be done quickly + EShOptFull, // Optimizations that will take more time + LAST_ELEMENT_MARKER(EshOptLevelCount), +} EShOptimizationLevel; + +// +// Texture and Sampler transformation mode. +// +typedef enum { + EShTexSampTransKeep, // keep textures and samplers as is (default) + EShTexSampTransUpgradeTextureRemoveSampler, // change texture w/o embeded sampler into sampled texture and throw away all samplers + LAST_ELEMENT_MARKER(EShTexSampTransCount), +} EShTextureSamplerTransformMode; + +// +// Message choices for what errors and warnings are given. +// +enum EShMessages : unsigned { + EShMsgDefault = 0, // default is to give all required errors and extra warnings + EShMsgRelaxedErrors = (1 << 0), // be liberal in accepting input + EShMsgSuppressWarnings = (1 << 1), // suppress all warnings, except those required by the specification + EShMsgAST = (1 << 2), // print the AST intermediate representation + EShMsgSpvRules = (1 << 3), // issue messages for SPIR-V generation + EShMsgVulkanRules = (1 << 4), // issue messages for Vulkan-requirements of GLSL for SPIR-V + EShMsgOnlyPreprocessor = (1 << 5), // only print out errors produced by the preprocessor + EShMsgReadHlsl = (1 << 6), // use HLSL parsing rules and semantics + EShMsgCascadingErrors = (1 << 7), // get cascading errors; risks error-recovery issues, instead of an early exit + EShMsgKeepUncalled = (1 << 8), // for testing, don't eliminate uncalled functions + EShMsgHlslOffsets = (1 << 9), // allow block offsets to follow HLSL rules instead of GLSL rules + EShMsgDebugInfo = (1 << 10), // save debug information + EShMsgHlslEnable16BitTypes = (1 << 11), // enable use of 16-bit types in SPIR-V for HLSL + EShMsgHlslLegalization = (1 << 12), // enable HLSL Legalization messages + EShMsgHlslDX9Compatible = (1 << 13), // enable HLSL DX9 compatible mode (for samplers and semantics) + EShMsgBuiltinSymbolTable = (1 << 14), // print the builtin symbol table + EShMsgEnhanced = (1 << 15), // enhanced message readability + LAST_ELEMENT_MARKER(EShMsgCount), +}; + +// +// Options for building reflection +// +typedef enum { + EShReflectionDefault = 0, // default is original behaviour before options were added + EShReflectionStrictArraySuffix = (1 << 0), // reflection will follow stricter rules for array-of-structs suffixes + EShReflectionBasicArraySuffix = (1 << 1), // arrays of basic types will be appended with [0] as in GL reflection + EShReflectionIntermediateIO = (1 << 2), // reflect inputs and outputs to program, even with no vertex shader + EShReflectionSeparateBuffers = (1 << 3), // buffer variables and buffer blocks are reflected separately + EShReflectionAllBlockVariables = (1 << 4), // reflect all variables in blocks, even if they are inactive + EShReflectionUnwrapIOBlocks = (1 << 5), // unwrap input/output blocks the same as with uniform blocks + EShReflectionAllIOVariables = (1 << 6), // reflect all input/output variables, even if they are inactive + EShReflectionSharedStd140SSBO = (1 << 7), // Apply std140/shared rules for ubo to ssbo + EShReflectionSharedStd140UBO = (1 << 8), // Apply std140/shared rules for ubo to ssbo + LAST_ELEMENT_MARKER(EShReflectionCount), +} EShReflectionOptions; + +// +// Build a table for bindings. This can be used for locating +// attributes, uniforms, globals, etc., as needed. +// +typedef struct { + const char* name; + int binding; +} ShBinding; + +typedef struct { + int numBindings; + ShBinding* bindings; // array of bindings +} ShBindingTable; + +// +// ShHandle held by but opaque to the driver. It is allocated, +// managed, and de-allocated by the compiler/linker. Its contents +// are defined by and used by the compiler and linker. For example, +// symbol table information and object code passed from the compiler +// to the linker can be stored where ShHandle points. +// +// If handle creation fails, 0 will be returned. +// +typedef void* ShHandle; + +// +// Driver calls these to create and destroy compiler/linker +// objects. +// +GLSLANG_EXPORT ShHandle ShConstructCompiler(const EShLanguage, int debugOptions); // one per shader +GLSLANG_EXPORT ShHandle ShConstructLinker(const EShExecutable, int debugOptions); // one per shader pair +GLSLANG_EXPORT ShHandle ShConstructUniformMap(); // one per uniform namespace (currently entire program object) +GLSLANG_EXPORT void ShDestruct(ShHandle); + +// +// The return value of ShCompile is boolean, non-zero indicating +// success. +// +// The info-log should be written by ShCompile into +// ShHandle, so it can answer future queries. +// +GLSLANG_EXPORT int ShCompile( + const ShHandle, + const char* const shaderStrings[], + const int numStrings, + const int* lengths, + const EShOptimizationLevel, + const TBuiltInResource *resources, + int debugOptions, + int defaultVersion = 110, // use 100 for ES environment, overridden by #version in shader + bool forwardCompatible = false, // give errors for use of deprecated features + EShMessages messages = EShMsgDefault // warnings and errors + ); + +GLSLANG_EXPORT int ShLinkExt( + const ShHandle, // linker object + const ShHandle h[], // compiler objects to link together + const int numHandles); + +// +// ShSetEncrpytionMethod is a place-holder for specifying +// how source code is encrypted. +// +GLSLANG_EXPORT void ShSetEncryptionMethod(ShHandle); + +// +// All the following return 0 if the information is not +// available in the object passed down, or the object is bad. +// +GLSLANG_EXPORT const char* ShGetInfoLog(const ShHandle); +GLSLANG_EXPORT const void* ShGetExecutable(const ShHandle); +GLSLANG_EXPORT int ShSetVirtualAttributeBindings(const ShHandle, const ShBindingTable*); // to detect user aliasing +GLSLANG_EXPORT int ShSetFixedAttributeBindings(const ShHandle, const ShBindingTable*); // to force any physical mappings +// +// Tell the linker to never assign a vertex attribute to this list of physical attributes +// +GLSLANG_EXPORT int ShExcludeAttributes(const ShHandle, int *attributes, int count); + +// +// Returns the location ID of the named uniform. +// Returns -1 if error. +// +GLSLANG_EXPORT int ShGetUniformLocation(const ShHandle uniformMap, const char* name); + +#ifdef __cplusplus + } // end extern "C" +#endif + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// Deferred-Lowering C++ Interface +// ----------------------------------- +// +// Below is a new alternate C++ interface, which deprecates the above +// opaque handle-based interface. +// +// The below is further designed to handle multiple compilation units per stage, where +// the intermediate results, including the parse tree, are preserved until link time, +// rather than the above interface which is designed to have each compilation unit +// lowered at compile time. In the above model, linking occurs on the lowered results, +// whereas in this model intra-stage linking can occur at the parse tree +// (treeRoot in TIntermediate) level, and then a full stage can be lowered. +// + +#include +#include +#include + +class TCompiler; +class TInfoSink; + +namespace glslang { + +struct Version { + int major; + int minor; + int patch; + const char* flavor; +}; + +GLSLANG_EXPORT Version GetVersion(); +GLSLANG_EXPORT const char* GetEsslVersionString(); +GLSLANG_EXPORT const char* GetGlslVersionString(); +GLSLANG_EXPORT int GetKhronosToolId(); + +class TIntermediate; +class TProgram; +class TPoolAllocator; + +// Call this exactly once per process before using anything else +GLSLANG_EXPORT bool InitializeProcess(); + +// Call once per process to tear down everything +GLSLANG_EXPORT void FinalizeProcess(); + +// Resource type for IO resolver +enum TResourceType { + EResSampler, + EResTexture, + EResImage, + EResUbo, + EResSsbo, + EResUav, + EResCount +}; + +enum TBlockStorageClass +{ + EbsUniform = 0, + EbsStorageBuffer, + EbsPushConstant, + EbsNone, // not a uniform or buffer variable + EbsCount, +}; + +// Make one TShader per shader that you will link into a program. Then +// - provide the shader through setStrings() or setStringsWithLengths() +// - optionally call setEnv*(), see below for more detail +// - optionally use setPreamble() to set a special shader string that will be +// processed before all others but won't affect the validity of #version +// - optionally call addProcesses() for each setting/transform, +// see comment for class TProcesses +// - call parse(): source language and target environment must be selected +// either by correct setting of EShMessages sent to parse(), or by +// explicitly calling setEnv*() +// - query the info logs +// +// N.B.: Does not yet support having the same TShader instance being linked into +// multiple programs. +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TShader { +public: + GLSLANG_EXPORT explicit TShader(EShLanguage); + GLSLANG_EXPORT virtual ~TShader(); + GLSLANG_EXPORT void setStrings(const char* const* s, int n); + GLSLANG_EXPORT void setStringsWithLengths( + const char* const* s, const int* l, int n); + GLSLANG_EXPORT void setStringsWithLengthsAndNames( + const char* const* s, const int* l, const char* const* names, int n); + void setPreamble(const char* s) { preamble = s; } + GLSLANG_EXPORT void setEntryPoint(const char* entryPoint); + GLSLANG_EXPORT void setSourceEntryPoint(const char* sourceEntryPointName); + GLSLANG_EXPORT void addProcesses(const std::vector&); + GLSLANG_EXPORT void setUniqueId(unsigned long long id); + GLSLANG_EXPORT void setOverrideVersion(int version); + GLSLANG_EXPORT void setDebugInfo(bool debugInfo); + + // IO resolver binding data: see comments in ShaderLang.cpp + GLSLANG_EXPORT void setShiftBinding(TResourceType res, unsigned int base); + GLSLANG_EXPORT void setShiftSamplerBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftTextureBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftImageBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUavBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftCbufferBinding(unsigned int base); // synonym for setShiftUboBinding + GLSLANG_EXPORT void setShiftSsboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftBindingForSet(TResourceType res, unsigned int base, unsigned int set); + GLSLANG_EXPORT void setResourceSetBinding(const std::vector& base); + GLSLANG_EXPORT void setAutoMapBindings(bool map); + GLSLANG_EXPORT void setAutoMapLocations(bool map); + GLSLANG_EXPORT void addUniformLocationOverride(const char* name, int loc); + GLSLANG_EXPORT void setUniformLocationBase(int base); + GLSLANG_EXPORT void setInvertY(bool invert); + GLSLANG_EXPORT void setDxPositionW(bool dxPosW); + GLSLANG_EXPORT void setEnhancedMsgs(); +#ifdef ENABLE_HLSL + GLSLANG_EXPORT void setHlslIoMapping(bool hlslIoMap); + GLSLANG_EXPORT void setFlattenUniformArrays(bool flatten); +#endif + GLSLANG_EXPORT void setNoStorageFormat(bool useUnknownFormat); + GLSLANG_EXPORT void setNanMinMaxClamp(bool nanMinMaxClamp); + GLSLANG_EXPORT void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode); + GLSLANG_EXPORT void addBlockStorageOverride(const char* nameStr, glslang::TBlockStorageClass backing); + + GLSLANG_EXPORT void setGlobalUniformBlockName(const char* name); + GLSLANG_EXPORT void setAtomicCounterBlockName(const char* name); + GLSLANG_EXPORT void setGlobalUniformSet(unsigned int set); + GLSLANG_EXPORT void setGlobalUniformBinding(unsigned int binding); + GLSLANG_EXPORT void setAtomicCounterBlockSet(unsigned int set); + GLSLANG_EXPORT void setAtomicCounterBlockBinding(unsigned int binding); + + // For setting up the environment (cleared to nothingness in the constructor). + // These must be called so that parsing is done for the right source language and + // target environment, either indirectly through TranslateEnvironment() based on + // EShMessages et. al., or directly by the user. + // + // setEnvInput: The input source language and stage. If generating code for a + // specific client, the input client semantics to use and the + // version of that client's input semantics to use, otherwise + // use EShClientNone and version of 0, e.g. for validation mode. + // Note 'version' does not describe the target environment, + // just the version of the source dialect to compile under. + // For example, to choose the Vulkan dialect of GLSL defined by + // version 100 of the KHR_vulkan_glsl extension: lang = EShSourceGlsl, + // dialect = EShClientVulkan, and version = 100. + // + // See the definitions of TEnvironment, EShSource, EShLanguage, + // and EShClient for choices and more detail. + // + // setEnvClient: The client that will be hosting the execution, and its version. + // Note 'version' is not the version of the languages involved, but + // the version of the client environment. + // Use EShClientNone and version of 0 if there is no client, e.g. + // for validation mode. + // + // See EShTargetClientVersion for choices. + // + // setEnvTarget: The language to translate to when generating code, and that + // language's version. + // Use EShTargetNone and version of 0 if there is no client, e.g. + // for validation mode. + // + void setEnvInput(EShSource lang, EShLanguage envStage, EShClient client, int version) + { + environment.input.languageFamily = lang; + environment.input.stage = envStage; + environment.input.dialect = client; + environment.input.dialectVersion = version; + } + void setEnvClient(EShClient client, EShTargetClientVersion version) + { + environment.client.client = client; + environment.client.version = version; + } + void setEnvTarget(EShTargetLanguage lang, EShTargetLanguageVersion version) + { + environment.target.language = lang; + environment.target.version = version; + } + + void getStrings(const char* const* &s, int& n) { s = strings; n = numStrings; } + +#ifdef ENABLE_HLSL + void setEnvTargetHlslFunctionality1() { environment.target.hlslFunctionality1 = true; } + bool getEnvTargetHlslFunctionality1() const { return environment.target.hlslFunctionality1; } +#else + bool getEnvTargetHlslFunctionality1() const { return false; } +#endif + + void setEnvInputVulkanRulesRelaxed() { environment.input.vulkanRulesRelaxed = true; } + bool getEnvInputVulkanRulesRelaxed() const { return environment.input.vulkanRulesRelaxed; } + + // Interface to #include handlers. + // + // To support #include, a client of Glslang does the following: + // 1. Call setStringsWithNames to set the source strings and associated + // names. For example, the names could be the names of the files + // containing the shader sources. + // 2. Call parse with an Includer. + // + // When the Glslang parser encounters an #include directive, it calls + // the Includer's include method with the requested include name + // together with the current string name. The returned IncludeResult + // contains the fully resolved name of the included source, together + // with the source text that should replace the #include directive + // in the source stream. After parsing that source, Glslang will + // release the IncludeResult object. + class Includer { + public: + // An IncludeResult contains the resolved name and content of a source + // inclusion. + struct IncludeResult { + IncludeResult(const std::string& headerName, const char* const headerData, const size_t headerLength, void* userData) : + headerName(headerName), headerData(headerData), headerLength(headerLength), userData(userData) { } + // For a successful inclusion, the fully resolved name of the requested + // include. For example, in a file system-based includer, full resolution + // should convert a relative path name into an absolute path name. + // For a failed inclusion, this is an empty string. + const std::string headerName; + // The content and byte length of the requested inclusion. The + // Includer producing this IncludeResult retains ownership of the + // storage. + // For a failed inclusion, the header + // field points to a string containing error details. + const char* const headerData; + const size_t headerLength; + // Include resolver's context. + void* userData; + protected: + IncludeResult& operator=(const IncludeResult&); + IncludeResult(); + }; + + // For both include methods below: + // + // Resolves an inclusion request by name, current source name, + // and include depth. + // On success, returns an IncludeResult containing the resolved name + // and content of the include. + // On failure, returns a nullptr, or an IncludeResult + // with an empty string for the headerName and error details in the + // header field. + // The Includer retains ownership of the contents + // of the returned IncludeResult value, and those contents must + // remain valid until the releaseInclude method is called on that + // IncludeResult object. + // + // Note "local" vs. "system" is not an "either/or": "local" is an + // extra thing to do over "system". Both might get called, as per + // the C++ specification. + + // For the "system" or <>-style includes; search the "system" paths. + virtual IncludeResult* includeSystem(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // For the "local"-only aspect of a "" include. Should not search in the + // "system" paths, because on returning a failure, the parser will + // call includeSystem() to look in the "system" locations. + virtual IncludeResult* includeLocal(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // Signals that the parser will no longer use the contents of the + // specified IncludeResult. + virtual void releaseInclude(IncludeResult*) = 0; + virtual ~Includer() {} + }; + + // Fail all Includer searches + class ForbidIncluder : public Includer { + public: + virtual void releaseInclude(IncludeResult*) override { } + }; + + GLSLANG_EXPORT bool parse( + const TBuiltInResource*, int defaultVersion, EProfile defaultProfile, + bool forceDefaultVersionAndProfile, bool forwardCompatible, + EShMessages, Includer&); + + bool parse(const TBuiltInResource* res, int defaultVersion, EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages messages) + { + TShader::ForbidIncluder includer; + return parse(res, defaultVersion, defaultProfile, forceDefaultVersionAndProfile, forwardCompatible, messages, includer); + } + + // Equivalent to parse() without a default profile and without forcing defaults. + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages); + } + + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages, + Includer& includer) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages, includer); + } + + // NOTE: Doing just preprocessing to obtain a correct preprocessed shader string + // is not an officially supported or fully working path. + GLSLANG_EXPORT bool preprocess( + const TBuiltInResource* builtInResources, int defaultVersion, + EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages message, std::string* outputString, + Includer& includer); + + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + EShLanguage getStage() const { return stage; } + TIntermediate* getIntermediate() const { return intermediate; } + +protected: + TPoolAllocator* pool; + EShLanguage stage; + TCompiler* compiler; + TIntermediate* intermediate; + TInfoSink* infoSink; + // strings and lengths follow the standard for glShaderSource: + // strings is an array of numStrings pointers to string data. + // lengths can be null, but if not it is an array of numStrings + // integers containing the length of the associated strings. + // if lengths is null or lengths[n] < 0 the associated strings[n] is + // assumed to be null-terminated. + // stringNames is the optional names for all the strings. If stringNames + // is null, then none of the strings has name. If a certain element in + // stringNames is null, then the corresponding string does not have name. + const char* const* strings; // explicit code to compile, see previous comment + const int* lengths; + const char* const* stringNames; + int numStrings; // size of the above arrays + const char* preamble; // string of implicit code to compile before the explicitly provided code + + // a function in the source string can be renamed FROM this TO the name given in setEntryPoint. + std::string sourceEntryPointName; + + // overrides #version in shader source or default version if #version isn't present + int overrideVersion; + + TEnvironment environment; + + friend class TProgram; + +private: + TShader& operator=(TShader&); +}; + +#if !defined(GLSLANG_WEB) + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +// Data needed for just a single object at the granularity exchanged by the reflection API +class TObjectReflection { +public: + GLSLANG_EXPORT TObjectReflection(const std::string& pName, const TType& pType, int pOffset, int pGLDefineType, int pSize, int pIndex); + + const TType* getType() const { return type; } + GLSLANG_EXPORT int getBinding() const; + GLSLANG_EXPORT void dump() const; + static TObjectReflection badReflection() { return TObjectReflection(); } + + std::string name; + int offset; + int glDefineType; + int size; // data size in bytes for a block, array size for a (non-block) object that's an array + int index; + int counterIndex; + int numMembers; + int arrayStride; // stride of an array variable + int topLevelArraySize; // size of the top-level variable in a storage buffer member + int topLevelArrayStride; // stride of the top-level variable in a storage buffer member + EShLanguageMask stages; + +protected: + TObjectReflection() + : offset(-1), glDefineType(-1), size(-1), index(-1), counterIndex(-1), numMembers(-1), arrayStride(0), + topLevelArrayStride(0), stages(EShLanguageMask(0)), type(nullptr) + { + } + + const TType* type; +}; + +class TReflection; +class TIoMapper; +struct TVarEntryInfo; + +// Allows to customize the binding layout after linking. +// All used uniform variables will invoke at least validateBinding. +// If validateBinding returned true then the other resolveBinding, +// resolveSet, and resolveLocation are invoked to resolve the binding +// and descriptor set index respectively. +// +// Invocations happen in a particular order: +// 1) all shader inputs +// 2) all shader outputs +// 3) all uniforms with binding and set already defined +// 4) all uniforms with binding but no set defined +// 5) all uniforms with set but no binding defined +// 6) all uniforms with no binding and no set defined +// +// mapIO will use this resolver in two phases. The first +// phase is a notification phase, calling the corresponging +// notifiy callbacks, this phase ends with a call to endNotifications. +// Phase two starts directly after the call to endNotifications +// and calls all other callbacks to validate and to get the +// bindings, sets, locations, component and color indices. +// +// NOTE: that still limit checks are applied to bindings and sets +// and may result in an error. +class TIoMapResolver +{ +public: + virtual ~TIoMapResolver() {} + + // Should return true if the resulting/current binding would be okay. + // Basic idea is to do aliasing binding checks with this. + virtual bool validateBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current binding should be overridden. + // Return -1 if the current binding (including no binding) should be kept. + virtual int resolveBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current set should be overridden. + // Return -1 if the current set (including no set) should be kept. + virtual int resolveSet(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveUniformLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return true if the resulting/current setup would be okay. + // Basic idea is to do aliasing checks and reject invalid semantic names. + virtual bool validateInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current component index should be overridden. + // Return -1 if the current component index (including no index) should be kept. + virtual int resolveInOutComponent(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current color index should be overridden. + // Return -1 if the current color index (including no index) should be kept. + virtual int resolveInOutIndex(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a uniform variable + virtual void notifyBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a in or out variable + virtual void notifyInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Called by mapIO when it starts its notify pass for the given stage + virtual void beginNotifications(EShLanguage stage) = 0; + // Called by mapIO when it has finished the notify pass + virtual void endNotifications(EShLanguage stage) = 0; + // Called by mipIO when it starts its resolve pass for the given stage + virtual void beginResolve(EShLanguage stage) = 0; + // Called by mapIO when it has finished the resolve pass + virtual void endResolve(EShLanguage stage) = 0; + // Called by mapIO when it starts its symbol collect for teh given stage + virtual void beginCollect(EShLanguage stage) = 0; + // Called by mapIO when it has finished the symbol collect + virtual void endCollect(EShLanguage stage) = 0; + // Called by TSlotCollector to resolve storage locations or bindings + virtual void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by TSlotCollector to resolve resource locations or bindings + virtual void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by mapIO.addStage to set shader stage mask to mark a stage be added to this pipeline + virtual void addStage(EShLanguage stage, TIntermediate& stageIntermediate) = 0; +}; + +#endif // !GLSLANG_WEB + +// Make one TProgram per set of shaders that will get linked together. Add all +// the shaders that are to be linked together. After calling shader.parse() +// for all shaders, call link(). +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TProgram { +public: + GLSLANG_EXPORT TProgram(); + GLSLANG_EXPORT virtual ~TProgram(); + void addShader(TShader* shader) { stages[shader->stage].push_back(shader); } + std::list& getShaders(EShLanguage stage) { return stages[stage]; } + // Link Validation interface + GLSLANG_EXPORT bool link(EShMessages); + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + + TIntermediate* getIntermediate(EShLanguage stage) const { return intermediate[stage]; } + +#if !defined(GLSLANG_WEB) + + // Reflection Interface + + // call first, to do liveness analysis, index mapping, etc.; returns false on failure + GLSLANG_EXPORT bool buildReflection(int opts = EShReflectionDefault); + GLSLANG_EXPORT unsigned getLocalSize(int dim) const; // return dim'th local size + GLSLANG_EXPORT int getReflectionIndex(const char *name) const; + GLSLANG_EXPORT int getReflectionPipeIOIndex(const char* name, const bool inOrOut) const; + GLSLANG_EXPORT int getNumUniformVariables() const; + GLSLANG_EXPORT const TObjectReflection& getUniform(int index) const; + GLSLANG_EXPORT int getNumUniformBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getUniformBlock(int index) const; + GLSLANG_EXPORT int getNumPipeInputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeInput(int index) const; + GLSLANG_EXPORT int getNumPipeOutputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeOutput(int index) const; + GLSLANG_EXPORT int getNumBufferVariables() const; + GLSLANG_EXPORT const TObjectReflection& getBufferVariable(int index) const; + GLSLANG_EXPORT int getNumBufferBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getBufferBlock(int index) const; + GLSLANG_EXPORT int getNumAtomicCounters() const; + GLSLANG_EXPORT const TObjectReflection& getAtomicCounter(int index) const; + + // Legacy Reflection Interface - expressed in terms of above interface + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORMS) + int getNumLiveUniformVariables() const { return getNumUniformVariables(); } + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORM_BLOCKS) + int getNumLiveUniformBlocks() const { return getNumUniformBlocks(); } + + // can be used for glGetProgramiv(GL_ACTIVE_ATTRIBUTES) + int getNumLiveAttributes() const { return getNumPipeInputs(); } + + // can be used for glGetUniformIndices() + int getUniformIndex(const char *name) const { return getReflectionIndex(name); } + + int getPipeIOIndex(const char *name, const bool inOrOut) const + { return getReflectionPipeIOIndex(name, inOrOut); } + + // can be used for "name" part of glGetActiveUniform() + const char *getUniformName(int index) const { return getUniform(index).name.c_str(); } + + // returns the binding number + int getUniformBinding(int index) const { return getUniform(index).getBinding(); } + + // returns Shaders Stages where a Uniform is present + EShLanguageMask getUniformStages(int index) const { return getUniform(index).stages; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_BLOCK_INDEX) + int getUniformBlockIndex(int index) const { return getUniform(index).index; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_TYPE) + int getUniformType(int index) const { return getUniform(index).glDefineType; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_OFFSET) + int getUniformBufferOffset(int index) const { return getUniform(index).offset; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_SIZE) + int getUniformArraySize(int index) const { return getUniform(index).size; } + + // returns a TType* + const TType *getUniformTType(int index) const { return getUniform(index).getType(); } + + // can be used for glGetActiveUniformBlockName() + const char *getUniformBlockName(int index) const { return getUniformBlock(index).name.c_str(); } + + // can be used for glGetActiveUniformBlockiv(UNIFORM_BLOCK_DATA_SIZE) + int getUniformBlockSize(int index) const { return getUniformBlock(index).size; } + + // returns the block binding number + int getUniformBlockBinding(int index) const { return getUniformBlock(index).getBinding(); } + + // returns block index of associated counter. + int getUniformBlockCounterIndex(int index) const { return getUniformBlock(index).counterIndex; } + + // returns a TType* + const TType *getUniformBlockTType(int index) const { return getUniformBlock(index).getType(); } + + // can be used for glGetActiveAttrib() + const char *getAttributeName(int index) const { return getPipeInput(index).name.c_str(); } + + // can be used for glGetActiveAttrib() + int getAttributeType(int index) const { return getPipeInput(index).glDefineType; } + + // returns a TType* + const TType *getAttributeTType(int index) const { return getPipeInput(index).getType(); } + + GLSLANG_EXPORT void dumpReflection(); + // I/O mapping: apply base offsets and map live unbound variables + // If resolver is not provided it uses the previous approach + // and respects auto assignment and offsets. + GLSLANG_EXPORT bool mapIO(TIoMapResolver* pResolver = nullptr, TIoMapper* pIoMapper = nullptr); +#endif // !GLSLANG_WEB + +protected: + GLSLANG_EXPORT bool linkStage(EShLanguage, EShMessages); + GLSLANG_EXPORT bool crossStageCheck(EShMessages); + + TPoolAllocator* pool; + std::list stages[EShLangCount]; + TIntermediate* intermediate[EShLangCount]; + bool newedIntermediate[EShLangCount]; // track which intermediate were "new" versus reusing a singleton unit in a stage + TInfoSink* infoSink; +#if !defined(GLSLANG_WEB) + TReflection* reflection; +#endif + bool linked; + +private: + TProgram(TProgram&); + TProgram& operator=(TProgram&); +}; + +} // end namespace glslang + +#endif // _COMPILER_INTERFACE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/resource_limits_c.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/resource_limits_c.h new file mode 100644 index 0000000..05aa8eb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/Public/resource_limits_c.h @@ -0,0 +1,57 @@ +/** +BSD 2-Clause License + +Copyright (c) 2020, Travis Fort +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ + +#include "../Include/glslang_c_interface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Returns a struct that can be use to create custom resource values. +glslang_resource_t* glslang_resource(void); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +const glslang_resource_t* glslang_default_resource(void); + +// Returns the DefaultTBuiltInResource as a human-readable string. +// NOTE: User is responsible for freeing this string. +const char* glslang_default_resource_string(); + +// Decodes the resource limits from |config| to |resources|. +void glslang_decode_resource_limits(glslang_resource_t* resources, char* config); + +#ifdef __cplusplus +} +#endif + +#endif // _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.AMD.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.AMD.h new file mode 100644 index 0000000..009d2f1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.AMD.h @@ -0,0 +1,108 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextAMD_H +#define GLSLextAMD_H + +static const int GLSLextAMDVersion = 100; +static const int GLSLextAMDRevision = 7; + +// SPV_AMD_shader_ballot +static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot"; + +enum ShaderBallotAMD { + ShaderBallotBadAMD = 0, // Don't use + + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4, + + ShaderBallotCountAMD +}; + +// SPV_AMD_shader_trinary_minmax +static const char* const E_SPV_AMD_shader_trinary_minmax = "SPV_AMD_shader_trinary_minmax"; + +enum ShaderTrinaryMinMaxAMD { + ShaderTrinaryMinMaxBadAMD = 0, // Don't use + + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9, + + ShaderTrinaryMinMaxCountAMD +}; + +// SPV_AMD_shader_explicit_vertex_parameter +static const char* const E_SPV_AMD_shader_explicit_vertex_parameter = "SPV_AMD_shader_explicit_vertex_parameter"; + +enum ShaderExplicitVertexParameterAMD { + ShaderExplicitVertexParameterBadAMD = 0, // Don't use + + InterpolateAtVertexAMD = 1, + + ShaderExplicitVertexParameterCountAMD +}; + +// SPV_AMD_gcn_shader +static const char* const E_SPV_AMD_gcn_shader = "SPV_AMD_gcn_shader"; + +enum GcnShaderAMD { + GcnShaderBadAMD = 0, // Don't use + + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3, + + GcnShaderCountAMD +}; + +// SPV_AMD_gpu_shader_half_float +static const char* const E_SPV_AMD_gpu_shader_half_float = "SPV_AMD_gpu_shader_half_float"; + +// SPV_AMD_texture_gather_bias_lod +static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_gather_bias_lod"; + +// SPV_AMD_gpu_shader_int16 +static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16"; + +// SPV_AMD_shader_image_load_store_lod +static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod"; + +// SPV_AMD_shader_fragment_mask +static const char* const E_SPV_AMD_shader_fragment_mask = "SPV_AMD_shader_fragment_mask"; + +// SPV_AMD_gpu_shader_half_float_fetch +static const char* const E_SPV_AMD_gpu_shader_half_float_fetch = "SPV_AMD_gpu_shader_half_float_fetch"; + +#endif // #ifndef GLSLextAMD_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.EXT.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.EXT.h new file mode 100644 index 0000000..a247b4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.EXT.h @@ -0,0 +1,44 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextEXT_H +#define GLSLextEXT_H + +static const int GLSLextEXTVersion = 100; +static const int GLSLextEXTRevision = 2; + +static const char* const E_SPV_EXT_shader_stencil_export = "SPV_EXT_shader_stencil_export"; +static const char* const E_SPV_EXT_shader_viewport_index_layer = "SPV_EXT_shader_viewport_index_layer"; +static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fully_covered"; +static const char* const E_SPV_EXT_fragment_invocation_density = "SPV_EXT_fragment_invocation_density"; +static const char* const E_SPV_EXT_demote_to_helper_invocation = "SPV_EXT_demote_to_helper_invocation"; +static const char* const E_SPV_EXT_shader_atomic_float_add = "SPV_EXT_shader_atomic_float_add"; +static const char* const E_SPV_EXT_shader_atomic_float16_add = "SPV_EXT_shader_atomic_float16_add"; +static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader_atomic_float_min_max"; +static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64"; +static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader"; + +#endif // #ifndef GLSLextEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.KHR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.KHR.h new file mode 100644 index 0000000..d5c670f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.KHR.h @@ -0,0 +1,58 @@ +/* +** Copyright (c) 2014-2020 The Khronos Group Inc. +** Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextKHR_H +#define GLSLextKHR_H + +static const int GLSLextKHRVersion = 100; +static const int GLSLextKHRRevision = 3; + +static const char* const E_SPV_KHR_shader_ballot = "SPV_KHR_shader_ballot"; +static const char* const E_SPV_KHR_subgroup_vote = "SPV_KHR_subgroup_vote"; +static const char* const E_SPV_KHR_device_group = "SPV_KHR_device_group"; +static const char* const E_SPV_KHR_multiview = "SPV_KHR_multiview"; +static const char* const E_SPV_KHR_shader_draw_parameters = "SPV_KHR_shader_draw_parameters"; +static const char* const E_SPV_KHR_16bit_storage = "SPV_KHR_16bit_storage"; +static const char* const E_SPV_KHR_8bit_storage = "SPV_KHR_8bit_storage"; +static const char* const E_SPV_KHR_storage_buffer_storage_class = "SPV_KHR_storage_buffer_storage_class"; +static const char* const E_SPV_KHR_post_depth_coverage = "SPV_KHR_post_depth_coverage"; +static const char* const E_SPV_KHR_vulkan_memory_model = "SPV_KHR_vulkan_memory_model"; +static const char* const E_SPV_EXT_physical_storage_buffer = "SPV_EXT_physical_storage_buffer"; +static const char* const E_SPV_KHR_physical_storage_buffer = "SPV_KHR_physical_storage_buffer"; +static const char* const E_SPV_EXT_fragment_shader_interlock = "SPV_EXT_fragment_shader_interlock"; +static const char* const E_SPV_KHR_shader_clock = "SPV_KHR_shader_clock"; +static const char* const E_SPV_KHR_non_semantic_info = "SPV_KHR_non_semantic_info"; +static const char* const E_SPV_KHR_ray_tracing = "SPV_KHR_ray_tracing"; +static const char* const E_SPV_KHR_ray_query = "SPV_KHR_ray_query"; +static const char* const E_SPV_KHR_fragment_shading_rate = "SPV_KHR_fragment_shading_rate"; +static const char* const E_SPV_KHR_terminate_invocation = "SPV_KHR_terminate_invocation"; +static const char* const E_SPV_KHR_workgroup_memory_explicit_layout = "SPV_KHR_workgroup_memory_explicit_layout"; +static const char* const E_SPV_KHR_subgroup_uniform_control_flow = "SPV_KHR_subgroup_uniform_control_flow"; +static const char* const E_SPV_KHR_fragment_shader_barycentric = "SPV_KHR_fragment_shader_barycentric"; +static const char* const E_SPV_AMD_shader_early_and_late_fragment_tests = "SPV_AMD_shader_early_and_late_fragment_tests"; + +#endif // #ifndef GLSLextKHR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.NV.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.NV.h new file mode 100644 index 0000000..93c98bf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.ext.NV.h @@ -0,0 +1,84 @@ +/* +** Copyright (c) 2014-2017 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextNV_H +#define GLSLextNV_H + +enum BuiltIn; +enum Decoration; +enum Op; +enum Capability; + +static const int GLSLextNVVersion = 100; +static const int GLSLextNVRevision = 11; + +//SPV_NV_sample_mask_override_coverage +const char* const E_SPV_NV_sample_mask_override_coverage = "SPV_NV_sample_mask_override_coverage"; + +//SPV_NV_geometry_shader_passthrough +const char* const E_SPV_NV_geometry_shader_passthrough = "SPV_NV_geometry_shader_passthrough"; + +//SPV_NV_viewport_array2 +const char* const E_SPV_NV_viewport_array2 = "SPV_NV_viewport_array2"; +const char* const E_ARB_shader_viewport_layer_array = "SPV_ARB_shader_viewport_layer_array"; + +//SPV_NV_stereo_view_rendering +const char* const E_SPV_NV_stereo_view_rendering = "SPV_NV_stereo_view_rendering"; + +//SPV_NVX_multiview_per_view_attributes +const char* const E_SPV_NVX_multiview_per_view_attributes = "SPV_NVX_multiview_per_view_attributes"; + +//SPV_NV_shader_subgroup_partitioned +const char* const E_SPV_NV_shader_subgroup_partitioned = "SPV_NV_shader_subgroup_partitioned"; + +//SPV_NV_fragment_shader_barycentric +const char* const E_SPV_NV_fragment_shader_barycentric = "SPV_NV_fragment_shader_barycentric"; + +//SPV_NV_compute_shader_derivatives +const char* const E_SPV_NV_compute_shader_derivatives = "SPV_NV_compute_shader_derivatives"; + +//SPV_NV_shader_image_footprint +const char* const E_SPV_NV_shader_image_footprint = "SPV_NV_shader_image_footprint"; + +//SPV_NV_mesh_shader +const char* const E_SPV_NV_mesh_shader = "SPV_NV_mesh_shader"; + +//SPV_NV_raytracing +const char* const E_SPV_NV_ray_tracing = "SPV_NV_ray_tracing"; + +//SPV_NV_ray_tracing_motion_blur +const char* const E_SPV_NV_ray_tracing_motion_blur = "SPV_NV_ray_tracing_motion_blur"; + +//SPV_NV_shading_rate +const char* const E_SPV_NV_shading_rate = "SPV_NV_shading_rate"; + +//SPV_NV_cooperative_matrix +const char* const E_SPV_NV_cooperative_matrix = "SPV_NV_cooperative_matrix"; + +//SPV_NV_shader_sm_builtins +const char* const E_SPV_NV_shader_sm_builtins = "SPV_NV_shader_sm_builtins"; + +#endif // #ifndef GLSLextNV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.std.450.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.std.450.h new file mode 100644 index 0000000..df31092 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GlslangToSpv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GlslangToSpv.h new file mode 100644 index 0000000..3907be4 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/GlslangToSpv.h @@ -0,0 +1,61 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' +#endif + +#include "SpvTools.h" +#include "glslang/Include/intermediate.h" + +#include +#include + +#include "Logger.h" + +namespace glslang { + +void GetSpirvVersion(std::string&); +int GetSpirvGeneratorVersion(); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + SpvOptions* options = nullptr); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger* logger, SpvOptions* options = nullptr); +void OutputSpvBin(const std::vector& spirv, const char* baseName); +void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName); + +} diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/Logger.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/Logger.h new file mode 100644 index 0000000..411367c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/Logger.h @@ -0,0 +1,83 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_SPIRV_LOGGER_H +#define GLSLANG_SPIRV_LOGGER_H + +#include +#include + +namespace spv { + +// A class for holding all SPIR-V build status messages, including +// missing/TBD functionalities, warnings, and errors. +class SpvBuildLogger { +public: + SpvBuildLogger() {} + +#ifdef GLSLANG_WEB + void tbdFunctionality(const std::string& f) { } + void missingFunctionality(const std::string& f) { } + void warning(const std::string& w) { } + void error(const std::string& e) { errors.push_back(e); } + std::string getAllMessages() { return ""; } +#else + + // Registers a TBD functionality. + void tbdFunctionality(const std::string& f); + // Registers a missing functionality. + void missingFunctionality(const std::string& f); + + // Logs a warning. + void warning(const std::string& w) { warnings.push_back(w); } + // Logs an error. + void error(const std::string& e) { errors.push_back(e); } + + // Returns all messages accumulated in the order of: + // TBD functionalities, missing functionalities, warnings, errors. + std::string getAllMessages() const; +#endif + +private: + SpvBuildLogger(const SpvBuildLogger&); + + std::vector tbdFeatures; + std::vector missingFeatures; + std::vector warnings; + std::vector errors; +}; + +} // end spv namespace + +#endif // GLSLANG_SPIRV_LOGGER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/NonSemanticDebugPrintf.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/NonSemanticDebugPrintf.h new file mode 100644 index 0000000..83796d7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/NonSemanticDebugPrintf.h @@ -0,0 +1,50 @@ +// Copyright (c) 2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and/or associated documentation files (the +// "Materials"), to deal in the Materials without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Materials, and to +// permit persons to whom the Materials are furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS +// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS +// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT +// https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +// + +#ifndef SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ +#define SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticDebugPrintfRevision = 1, + NonSemanticDebugPrintfRevision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticDebugPrintfInstructions { + NonSemanticDebugPrintfDebugPrintf = 1, + NonSemanticDebugPrintfInstructionsMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h new file mode 100644 index 0000000..c52f32f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h @@ -0,0 +1,171 @@ +// Copyright (c) 2018 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +#ifndef SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ +#define SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticShaderDebugInfo100Version = 100, + NonSemanticShaderDebugInfo100Version_BitWidthPadding = 0x7fffffff +}; +enum { + NonSemanticShaderDebugInfo100Revision = 6, + NonSemanticShaderDebugInfo100Revision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100Instructions { + NonSemanticShaderDebugInfo100DebugInfoNone = 0, + NonSemanticShaderDebugInfo100DebugCompilationUnit = 1, + NonSemanticShaderDebugInfo100DebugTypeBasic = 2, + NonSemanticShaderDebugInfo100DebugTypePointer = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifier = 4, + NonSemanticShaderDebugInfo100DebugTypeArray = 5, + NonSemanticShaderDebugInfo100DebugTypeVector = 6, + NonSemanticShaderDebugInfo100DebugTypedef = 7, + NonSemanticShaderDebugInfo100DebugTypeFunction = 8, + NonSemanticShaderDebugInfo100DebugTypeEnum = 9, + NonSemanticShaderDebugInfo100DebugTypeComposite = 10, + NonSemanticShaderDebugInfo100DebugTypeMember = 11, + NonSemanticShaderDebugInfo100DebugTypeInheritance = 12, + NonSemanticShaderDebugInfo100DebugTypePtrToMember = 13, + NonSemanticShaderDebugInfo100DebugTypeTemplate = 14, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameter = 15, + NonSemanticShaderDebugInfo100DebugTypeTemplateTemplateParameter = 16, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameterPack = 17, + NonSemanticShaderDebugInfo100DebugGlobalVariable = 18, + NonSemanticShaderDebugInfo100DebugFunctionDeclaration = 19, + NonSemanticShaderDebugInfo100DebugFunction = 20, + NonSemanticShaderDebugInfo100DebugLexicalBlock = 21, + NonSemanticShaderDebugInfo100DebugLexicalBlockDiscriminator = 22, + NonSemanticShaderDebugInfo100DebugScope = 23, + NonSemanticShaderDebugInfo100DebugNoScope = 24, + NonSemanticShaderDebugInfo100DebugInlinedAt = 25, + NonSemanticShaderDebugInfo100DebugLocalVariable = 26, + NonSemanticShaderDebugInfo100DebugInlinedVariable = 27, + NonSemanticShaderDebugInfo100DebugDeclare = 28, + NonSemanticShaderDebugInfo100DebugValue = 29, + NonSemanticShaderDebugInfo100DebugOperation = 30, + NonSemanticShaderDebugInfo100DebugExpression = 31, + NonSemanticShaderDebugInfo100DebugMacroDef = 32, + NonSemanticShaderDebugInfo100DebugMacroUndef = 33, + NonSemanticShaderDebugInfo100DebugImportedEntity = 34, + NonSemanticShaderDebugInfo100DebugSource = 35, + NonSemanticShaderDebugInfo100DebugFunctionDefinition = 101, + NonSemanticShaderDebugInfo100DebugSourceContinued = 102, + NonSemanticShaderDebugInfo100DebugLine = 103, + NonSemanticShaderDebugInfo100DebugNoLine = 104, + NonSemanticShaderDebugInfo100DebugBuildIdentifier = 105, + NonSemanticShaderDebugInfo100DebugStoragePath = 106, + NonSemanticShaderDebugInfo100DebugEntryPoint = 107, + NonSemanticShaderDebugInfo100DebugTypeMatrix = 108, + NonSemanticShaderDebugInfo100InstructionsMax = 0x7fffffff +}; + + +enum NonSemanticShaderDebugInfo100DebugInfoFlags { + NonSemanticShaderDebugInfo100None = 0x0000, + NonSemanticShaderDebugInfo100FlagIsProtected = 0x01, + NonSemanticShaderDebugInfo100FlagIsPrivate = 0x02, + NonSemanticShaderDebugInfo100FlagIsPublic = 0x03, + NonSemanticShaderDebugInfo100FlagIsLocal = 0x04, + NonSemanticShaderDebugInfo100FlagIsDefinition = 0x08, + NonSemanticShaderDebugInfo100FlagFwdDecl = 0x10, + NonSemanticShaderDebugInfo100FlagArtificial = 0x20, + NonSemanticShaderDebugInfo100FlagExplicit = 0x40, + NonSemanticShaderDebugInfo100FlagPrototyped = 0x80, + NonSemanticShaderDebugInfo100FlagObjectPointer = 0x100, + NonSemanticShaderDebugInfo100FlagStaticMember = 0x200, + NonSemanticShaderDebugInfo100FlagIndirectVariable = 0x400, + NonSemanticShaderDebugInfo100FlagLValueReference = 0x800, + NonSemanticShaderDebugInfo100FlagRValueReference = 0x1000, + NonSemanticShaderDebugInfo100FlagIsOptimized = 0x2000, + NonSemanticShaderDebugInfo100FlagIsEnumClass = 0x4000, + NonSemanticShaderDebugInfo100FlagTypePassByValue = 0x8000, + NonSemanticShaderDebugInfo100FlagTypePassByReference = 0x10000, + NonSemanticShaderDebugInfo100FlagUnknownPhysicalLayout = 0x20000, + NonSemanticShaderDebugInfo100DebugInfoFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100BuildIdentifierFlags { + NonSemanticShaderDebugInfo100IdentifierPossibleDuplicates = 0x01, + NonSemanticShaderDebugInfo100BuildIdentifierFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncoding { + NonSemanticShaderDebugInfo100Unspecified = 0, + NonSemanticShaderDebugInfo100Address = 1, + NonSemanticShaderDebugInfo100Boolean = 2, + NonSemanticShaderDebugInfo100Float = 3, + NonSemanticShaderDebugInfo100Signed = 4, + NonSemanticShaderDebugInfo100SignedChar = 5, + NonSemanticShaderDebugInfo100Unsigned = 6, + NonSemanticShaderDebugInfo100UnsignedChar = 7, + NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncodingMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugCompositeType { + NonSemanticShaderDebugInfo100Class = 0, + NonSemanticShaderDebugInfo100Structure = 1, + NonSemanticShaderDebugInfo100Union = 2, + NonSemanticShaderDebugInfo100DebugCompositeTypeMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugTypeQualifier { + NonSemanticShaderDebugInfo100ConstType = 0, + NonSemanticShaderDebugInfo100VolatileType = 1, + NonSemanticShaderDebugInfo100RestrictType = 2, + NonSemanticShaderDebugInfo100AtomicType = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifierMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugOperation { + NonSemanticShaderDebugInfo100Deref = 0, + NonSemanticShaderDebugInfo100Plus = 1, + NonSemanticShaderDebugInfo100Minus = 2, + NonSemanticShaderDebugInfo100PlusUconst = 3, + NonSemanticShaderDebugInfo100BitPiece = 4, + NonSemanticShaderDebugInfo100Swap = 5, + NonSemanticShaderDebugInfo100Xderef = 6, + NonSemanticShaderDebugInfo100StackValue = 7, + NonSemanticShaderDebugInfo100Constu = 8, + NonSemanticShaderDebugInfo100Fragment = 9, + NonSemanticShaderDebugInfo100DebugOperationMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugImportedEntity { + NonSemanticShaderDebugInfo100ImportedModule = 0, + NonSemanticShaderDebugInfo100ImportedDeclaration = 1, + NonSemanticShaderDebugInfo100DebugImportedEntityMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SPVRemapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SPVRemapper.h new file mode 100644 index 0000000..d216946 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SPVRemapper.h @@ -0,0 +1,312 @@ +// +// Copyright (C) 2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef SPIRVREMAPPER_H +#define SPIRVREMAPPER_H + +#include +#include +#include +#include + +namespace spv { + +// MSVC defines __cplusplus as an older value, even when it supports almost all of 11. +// We handle that here by making our own symbol. +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1700) +# define use_cpp11 1 +#endif + +class spirvbin_base_t +{ +public: + enum Options { + NONE = 0, + STRIP = (1<<0), + MAP_TYPES = (1<<1), + MAP_NAMES = (1<<2), + MAP_FUNCS = (1<<3), + DCE_FUNCS = (1<<4), + DCE_VARS = (1<<5), + DCE_TYPES = (1<<6), + OPT_LOADSTORE = (1<<7), + OPT_FWD_LS = (1<<8), // EXPERIMENTAL: PRODUCES INVALID SCHEMA-0 SPIRV + MAP_ALL = (MAP_TYPES | MAP_NAMES | MAP_FUNCS), + DCE_ALL = (DCE_FUNCS | DCE_VARS | DCE_TYPES), + OPT_ALL = (OPT_LOADSTORE), + + ALL_BUT_STRIP = (MAP_ALL | DCE_ALL | OPT_ALL), + DO_EVERYTHING = (STRIP | ALL_BUT_STRIP) + }; +}; + +} // namespace SPV + +#if !defined (use_cpp11) +#include +#include + +namespace spv { +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int /*verbose = 0*/) { } + + void remap(std::vector& /*spv*/, unsigned int /*opts = 0*/) + { + printf("Tool not compiled for C++11, which is required for SPIR-V remapping.\n"); + exit(5); + } +}; + +} // namespace SPV + +#else // defined (use_cpp11) + +#include +#include +#include +#include +#include +#include +#include + +#include "spirv.hpp" +#include "spvIR.h" + +namespace spv { + +// class to hold SPIR-V binary data for remapping, DCE, and debug stripping +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int verbose = 0) : entryPoint(spv::NoResult), largestNewId(0), verbose(verbose), errorLatch(false) + { } + + virtual ~spirvbin_t() { } + + // remap on an existing binary in memory + void remap(std::vector& spv, const std::vector& whiteListStrings, + std::uint32_t opts = DO_EVERYTHING); + + // remap on an existing binary in memory - legacy interface without white list + void remap(std::vector& spv, std::uint32_t opts = DO_EVERYTHING); + + // Type for error/log handler functions + typedef std::function errorfn_t; + typedef std::function logfn_t; + + // Register error/log handling functions (can be lambda fn / functor / etc) + static void registerErrorHandler(errorfn_t handler) { errorHandler = handler; } + static void registerLogHandler(logfn_t handler) { logHandler = handler; } + +protected: + // This can be overridden to provide other message behavior if needed + virtual void msg(int minVerbosity, int indent, const std::string& txt) const; + +private: + // Local to global, or global to local ID map + typedef std::unordered_map idmap_t; + typedef std::unordered_set idset_t; + typedef std::unordered_map blockmap_t; + + void remap(std::uint32_t opts = DO_EVERYTHING); + + // Map of names to IDs + typedef std::unordered_map namemap_t; + + typedef std::uint32_t spirword_t; + + typedef std::pair range_t; + typedef std::function idfn_t; + typedef std::function instfn_t; + + // Special Values for ID map: + static const spv::Id unmapped; // unchanged from default value + static const spv::Id unused; // unused ID + static const int header_size; // SPIR header = 5 words + + class id_iterator_t; + + // For mapping type entries between different shaders + typedef std::vector typeentry_t; + typedef std::map globaltypes_t; + + // A set that preserves position order, and a reverse map + typedef std::set posmap_t; + typedef std::unordered_map posmap_rev_t; + + // Maps and ID to the size of its base type, if known. + typedef std::unordered_map typesize_map_t; + + // handle error + void error(const std::string& txt) const { errorLatch = true; errorHandler(txt); } + + bool isConstOp(spv::Op opCode) const; + bool isTypeOp(spv::Op opCode) const; + bool isStripOp(spv::Op opCode) const; + bool isFlowCtrl(spv::Op opCode) const; + range_t literalRange(spv::Op opCode) const; + range_t typeRange(spv::Op opCode) const; + range_t constRange(spv::Op opCode) const; + unsigned typeSizeInWords(spv::Id id) const; + unsigned idTypeSizeInWords(spv::Id id) const; + + bool isStripOp(spv::Op opCode, unsigned start) const; + + spv::Id& asId(unsigned word) { return spv[word]; } + const spv::Id& asId(unsigned word) const { return spv[word]; } + spv::Op asOpCode(unsigned word) const { return opOpCode(spv[word]); } + std::uint32_t asOpCodeHash(unsigned word); + spv::Decoration asDecoration(unsigned word) const { return spv::Decoration(spv[word]); } + unsigned asWordCount(unsigned word) const { return opWordCount(spv[word]); } + spv::Id asTypeConstId(unsigned word) const { return asId(word + (isTypeOp(asOpCode(word)) ? 1 : 2)); } + unsigned idPos(spv::Id id) const; + + static unsigned opWordCount(spirword_t data) { return data >> spv::WordCountShift; } + static spv::Op opOpCode(spirword_t data) { return spv::Op(data & spv::OpCodeMask); } + + // Header access & set methods + spirword_t magic() const { return spv[0]; } // return magic number + spirword_t bound() const { return spv[3]; } // return Id bound from header + spirword_t bound(spirword_t b) { return spv[3] = b; } + spirword_t genmagic() const { return spv[2]; } // generator magic + spirword_t genmagic(spirword_t m) { return spv[2] = m; } + spirword_t schemaNum() const { return spv[4]; } // schema number from header + + // Mapping fns: get + spv::Id localId(spv::Id id) const { return idMapL[id]; } + + // Mapping fns: set + inline spv::Id localId(spv::Id id, spv::Id newId); + void countIds(spv::Id id); + + // Return next unused new local ID. + // NOTE: boost::dynamic_bitset would be more efficient due to find_next(), + // which std::vector doens't have. + inline spv::Id nextUnusedId(spv::Id id); + + void buildLocalMaps(); + std::string literalString(unsigned word) const; // Return literal as a std::string + int literalStringWords(const std::string& str) const { return (int(str.size())+4)/4; } + + bool isNewIdMapped(spv::Id newId) const { return isMapped(newId); } + bool isOldIdUnmapped(spv::Id oldId) const { return localId(oldId) == unmapped; } + bool isOldIdUnused(spv::Id oldId) const { return localId(oldId) == unused; } + bool isOldIdMapped(spv::Id oldId) const { return !isOldIdUnused(oldId) && !isOldIdUnmapped(oldId); } + bool isFunction(spv::Id oldId) const { return fnPos.find(oldId) != fnPos.end(); } + + // bool matchType(const globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const; + // spv::Id findType(const globaltypes_t& globalTypes, spv::Id lt) const; + std::uint32_t hashType(unsigned typeStart) const; + + spirvbin_t& process(instfn_t, idfn_t, unsigned begin = 0, unsigned end = 0); + int processInstruction(unsigned word, instfn_t, idfn_t); + + void validate() const; + void mapTypeConst(); + void mapFnBodies(); + void optLoadStore(); + void dceFuncs(); + void dceVars(); + void dceTypes(); + void mapNames(); + void foldIds(); // fold IDs to smallest space + void forwardLoadStores(); // load store forwarding (EXPERIMENTAL) + void offsetIds(); // create relative offset IDs + + void applyMap(); // remap per local name map + void mapRemainder(); // map any IDs we haven't touched yet + void stripDebug(); // strip all debug info + void stripDeadRefs(); // strips debug info for now-dead references after DCE + void strip(); // remove debug symbols + + std::vector spv; // SPIR words + + std::vector stripWhiteList; + + namemap_t nameMap; // ID names from OpName + + // Since we want to also do binary ops, we can't use std::vector. we could use + // boost::dynamic_bitset, but we're trying to avoid a boost dependency. + typedef std::uint64_t bits_t; + std::vector mapped; // which new IDs have been mapped + static const int mBits = sizeof(bits_t) * 4; + + bool isMapped(spv::Id id) const { return id < maxMappedId() && ((mapped[id/mBits] & (1LL<<(id%mBits))) != 0); } + void setMapped(spv::Id id) { resizeMapped(id); mapped[id/mBits] |= (1LL<<(id%mBits)); } + void resizeMapped(spv::Id id) { if (id >= maxMappedId()) mapped.resize(id/mBits+1, 0); } + size_t maxMappedId() const { return mapped.size() * mBits; } + + // Add a strip range for a given instruction starting at 'start' + // Note: avoiding brace initializers to please older versions os MSVC. + void stripInst(unsigned start) { stripRange.push_back(range_t(start, start + asWordCount(start))); } + + // Function start and end. use unordered_map because we'll have + // many fewer functions than IDs. + std::unordered_map fnPos; + + // Which functions are called, anywhere in the module, with a call count + std::unordered_map fnCalls; + + posmap_t typeConstPos; // word positions that define types & consts (ordered) + posmap_rev_t idPosR; // reverse map from IDs to positions + typesize_map_t idTypeSizeMap; // maps each ID to its type size, if known. + + std::vector idMapL; // ID {M}ap from {L}ocal to {G}lobal IDs + + spv::Id entryPoint; // module entry point + spv::Id largestNewId; // biggest new ID we have mapped anything to + + // Sections of the binary to strip, given as [begin,end) + std::vector stripRange; + + // processing options: + std::uint32_t options; + int verbose; // verbosity level + + // Error latch: this is set if the error handler is ever executed. It would be better to + // use a try/catch block and throw, but that's not desired for certain environments, so + // this is the alternative. + mutable bool errorLatch; + + static errorfn_t errorHandler; + static logfn_t logHandler; +}; + +} // namespace SPV + +#endif // defined (use_cpp11) +#endif // SPIRVREMAPPER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SpvBuilder.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SpvBuilder.h new file mode 100644 index 0000000..f7fdc6a --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SpvBuilder.h @@ -0,0 +1,959 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// Copyright (C) 2015-2020 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// "Builder" is an interface to fully build SPIR-V IR. Allocate one of +// these to build (a thread safe) internal SPIR-V representation (IR), +// and then dump it as a binary stream according to the SPIR-V specification. +// +// A Builder has a 1:1 relationship with a SPIR-V module. +// + +#pragma once +#ifndef SpvBuilder_H +#define SpvBuilder_H + +#include "Logger.h" +#include "spirv.hpp" +#include "spvIR.h" +namespace spv { + #include "GLSL.ext.KHR.h" + #include "NonSemanticShaderDebugInfo100.h" +} + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +typedef enum { + Spv_1_0 = (1 << 16), + Spv_1_1 = (1 << 16) | (1 << 8), + Spv_1_2 = (1 << 16) | (2 << 8), + Spv_1_3 = (1 << 16) | (3 << 8), + Spv_1_4 = (1 << 16) | (4 << 8), + Spv_1_5 = (1 << 16) | (5 << 8), +} SpvVersion; + +class Builder { +public: + Builder(unsigned int spvVersion, unsigned int userNumber, SpvBuildLogger* logger); + virtual ~Builder(); + + static const int maxMatrixSize = 4; + + unsigned int getSpvVersion() const { return spvVersion; } + + void setSource(spv::SourceLanguage lang, int version) + { + sourceLang = lang; + sourceVersion = version; + } + spv::Id getStringId(const std::string& str) + { + auto sItr = stringIds.find(str); + if (sItr != stringIds.end()) + return sItr->second; + spv::Id strId = getUniqueId(); + Instruction* fileString = new Instruction(strId, NoType, OpString); + const char* file_c_str = str.c_str(); + fileString->addStringOperand(file_c_str); + strings.push_back(std::unique_ptr(fileString)); + module.mapInstruction(fileString); + stringIds[file_c_str] = strId; + return strId; + } + spv::Id getSourceFile() const + { + return sourceFileStringId; + } + void setSourceFile(const std::string& file) + { + sourceFileStringId = getStringId(file); + currentFileId = sourceFileStringId; + } + void setSourceText(const std::string& text) { sourceText = text; } + void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); } + void addModuleProcessed(const std::string& p) { moduleProcesses.push_back(p.c_str()); } + void setEmitOpLines() { emitOpLines = true; } + void setEmitNonSemanticShaderDebugInfo(bool const emit) + { + emitNonSemanticShaderDebugInfo = emit; + + if(emit) + { + importNonSemanticShaderDebugInfoInstructions(); + } + } + void setEmitNonSemanticShaderDebugSource(bool const src) + { + emitNonSemanticShaderDebugSource = src; + } + void addExtension(const char* ext) { extensions.insert(ext); } + void removeExtension(const char* ext) + { + extensions.erase(ext); + } + void addIncorporatedExtension(const char* ext, SpvVersion incorporatedVersion) + { + if (getSpvVersion() < static_cast(incorporatedVersion)) + addExtension(ext); + } + void promoteIncorporatedExtension(const char* baseExt, const char* promoExt, SpvVersion incorporatedVersion) + { + removeExtension(baseExt); + addIncorporatedExtension(promoExt, incorporatedVersion); + } + void addInclude(const std::string& name, const std::string& text) + { + spv::Id incId = getStringId(name); + includeFiles[incId] = &text; + } + Id import(const char*); + void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem) + { + addressModel = addr; + memoryModel = mem; + } + + void addCapability(spv::Capability cap) { capabilities.insert(cap); } + + // To get a new for anything needing a new one. + Id getUniqueId() { return ++uniqueId; } + + // To get a set of new s, e.g., for a set of function parameters + Id getUniqueIds(int numIds) + { + Id id = uniqueId + 1; + uniqueId += numIds; + return id; + } + + // Generate OpLine for non-filename-based #line directives (ie no filename + // seen yet): Log the current line, and if different than the last one, + // issue a new OpLine using the new line and current source file name. + void setLine(int line); + + // If filename null, generate OpLine for non-filename-based line directives, + // else do filename-based: Log the current line and file, and if different + // than the last one, issue a new OpLine using the new line and file + // name. + void setLine(int line, const char* filename); + // Low-level OpLine. See setLine() for a layered helper. + void addLine(Id fileName, int line, int column); + void addDebugScopeAndLine(Id fileName, int line, int column); + + // For creating new types (will return old type if the requested one was already made). + Id makeVoidType(); + Id makeBoolType(bool const compilerGenerated = true); + Id makePointer(StorageClass, Id pointee); + Id makeForwardPointer(StorageClass); + Id makePointerFromForwardPointer(StorageClass, Id forwardPointerType, Id pointee); + Id makeIntegerType(int width, bool hasSign); // generic + Id makeIntType(int width) { return makeIntegerType(width, true); } + Id makeUintType(int width) { return makeIntegerType(width, false); } + Id makeFloatType(int width); + Id makeStructType(const std::vector& members, const char* name, bool const compilerGenerated = true); + Id makeStructResultType(Id type0, Id type1); + Id makeVectorType(Id component, int size); + Id makeMatrixType(Id component, int cols, int rows); + Id makeArrayType(Id element, Id sizeId, int stride); // 0 stride means no stride decoration + Id makeRuntimeArray(Id element); + Id makeFunctionType(Id returnType, const std::vector& paramTypes); + Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format); + Id makeSamplerType(); + Id makeSampledImageType(Id imageType); + Id makeCooperativeMatrixType(Id component, Id scope, Id rows, Id cols); + Id makeGenericType(spv::Op opcode, std::vector& operands); + + // SPIR-V NonSemantic Shader DebugInfo Instructions + struct DebugTypeLoc { + std::string name {}; + int line {0}; + int column {0}; + }; + std::unordered_map debugTypeLocs; + Id makeDebugInfoNone(); + Id makeBoolDebugType(int const size); + Id makeIntegerDebugType(int const width, bool const hasSign); + Id makeFloatDebugType(int const width); + Id makeSequentialDebugType(Id const baseType, Id const componentCount, NonSemanticShaderDebugInfo100Instructions const sequenceType); + Id makeArrayDebugType(Id const baseType, Id const componentCount); + Id makeVectorDebugType(Id const baseType, int const componentCount); + Id makeMatrixDebugType(Id const vectorType, int const vectorCount, bool columnMajor = true); + Id makeMemberDebugType(Id const memberType, DebugTypeLoc const& debugTypeLoc); + Id makeCompositeDebugType(std::vector const& memberTypes, char const*const name, + NonSemanticShaderDebugInfo100DebugCompositeType const tag, bool const isOpaqueType = false); + Id makeDebugSource(const Id fileName); + Id makeDebugCompilationUnit(); + Id createDebugGlobalVariable(Id const type, char const*const name, Id const variable); + Id createDebugLocalVariable(Id type, char const*const name, size_t const argNumber = 0); + Id makeDebugExpression(); + Id makeDebugDeclare(Id const debugLocalVariable, Id const localVariable); + Id makeDebugValue(Id const debugLocalVariable, Id const value); + Id makeDebugFunctionType(Id returnType, const std::vector& paramTypes); + Id makeDebugFunction(Function* function, Id nameId, Id funcTypeId); + Id makeDebugLexicalBlock(uint32_t line); + std::string unmangleFunctionName(std::string const& name) const; + + // accelerationStructureNV type + Id makeAccelerationStructureType(); + // rayQueryEXT type + Id makeRayQueryType(); + + // For querying about types. + Id getTypeId(Id resultId) const { return module.getTypeId(resultId); } + Id getDerefTypeId(Id resultId) const; + Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); } + Op getTypeClass(Id typeId) const { return getOpCode(typeId); } + Op getMostBasicTypeClass(Id typeId) const; + int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); } + int getNumTypeConstituents(Id typeId) const; + int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); } + Id getScalarTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId, int) const; + StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); } + ImageFormat getImageTypeFormat(Id typeId) const + { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); } + Id getResultingAccessChainType() const; + + bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); } + bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); } + bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); } + bool isMatrix(Id resultId) const { return isMatrixType(getTypeId(resultId)); } + bool isCooperativeMatrix(Id resultId)const { return isCooperativeMatrixType(getTypeId(resultId)); } + bool isAggregate(Id resultId) const { return isAggregateType(getTypeId(resultId)); } + bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); } + + bool isBoolType(Id typeId) + { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); } + bool isIntType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) != 0; } + bool isUintType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) == 0; } + bool isFloatType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat; } + bool isPointerType(Id typeId) const { return getTypeClass(typeId) == OpTypePointer; } + bool isScalarType(Id typeId) const + { return getTypeClass(typeId) == OpTypeFloat || getTypeClass(typeId) == OpTypeInt || + getTypeClass(typeId) == OpTypeBool; } + bool isVectorType(Id typeId) const { return getTypeClass(typeId) == OpTypeVector; } + bool isMatrixType(Id typeId) const { return getTypeClass(typeId) == OpTypeMatrix; } + bool isStructType(Id typeId) const { return getTypeClass(typeId) == OpTypeStruct; } + bool isArrayType(Id typeId) const { return getTypeClass(typeId) == OpTypeArray; } +#ifdef GLSLANG_WEB + bool isCooperativeMatrixType(Id typeId)const { return false; } +#else + bool isCooperativeMatrixType(Id typeId)const { return getTypeClass(typeId) == OpTypeCooperativeMatrixNV; } +#endif + bool isAggregateType(Id typeId) const + { return isArrayType(typeId) || isStructType(typeId) || isCooperativeMatrixType(typeId); } + bool isImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeImage; } + bool isSamplerType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampler; } + bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; } + bool containsType(Id typeId, Op typeOp, unsigned int width) const; + bool containsPhysicalStorageBufferOrArray(Id typeId) const; + + bool isConstantOpCode(Op opcode) const; + bool isSpecConstantOpCode(Op opcode) const; + bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); } + bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; } + bool isSpecConstant(Id resultId) const { return isSpecConstantOpCode(getOpCode(resultId)); } + unsigned int getConstantScalar(Id resultId) const + { return module.getInstruction(resultId)->getImmediateOperand(0); } + StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); } + + bool isVariableOpCode(Op opcode) const { return opcode == OpVariable; } + bool isVariable(Id resultId) const { return isVariableOpCode(getOpCode(resultId)); } + bool isGlobalStorage(Id resultId) const { return getStorageClass(resultId) != StorageClassFunction; } + bool isGlobalVariable(Id resultId) const { return isVariable(resultId) && isGlobalStorage(resultId); } + // See if a resultId is valid for use as an initializer. + bool isValidInitializer(Id resultId) const { return isConstant(resultId) || isGlobalVariable(resultId); } + + bool isRayTracingOpCode(Op opcode) const; + + int getScalarTypeWidth(Id typeId) const + { + Id scalarTypeId = getScalarTypeId(typeId); + assert(getTypeClass(scalarTypeId) == OpTypeInt || getTypeClass(scalarTypeId) == OpTypeFloat); + return module.getInstruction(scalarTypeId)->getImmediateOperand(0); + } + + int getTypeNumColumns(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeConstituents(typeId); + } + int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); } + int getTypeNumRows(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeComponents(getContainedTypeId(typeId)); + } + int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); } + + Dim getTypeDimensionality(Id typeId) const + { + assert(isImageType(typeId)); + return (Dim)module.getInstruction(typeId)->getImmediateOperand(1); + } + Id getImageType(Id resultId) const + { + Id typeId = getTypeId(resultId); + assert(isImageType(typeId) || isSampledImageType(typeId)); + return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId; + } + bool isArrayedImageType(Id typeId) const + { + assert(isImageType(typeId)); + return module.getInstruction(typeId)->getImmediateOperand(3) != 0; + } + + // For making new constants (will return old constant if the requested one was already made). + Id makeNullConstant(Id typeId); + Id makeBoolConstant(bool b, bool specConstant = false); + Id makeInt8Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(8), (unsigned)i, specConstant); } + Id makeUint8Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(8), u, specConstant); } + Id makeInt16Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(16), (unsigned)i, specConstant); } + Id makeUint16Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(16), u, specConstant); } + Id makeIntConstant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(32), (unsigned)i, specConstant); } + Id makeUintConstant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(32), u, specConstant); } + Id makeInt64Constant(long long i, bool specConstant = false) + { return makeInt64Constant(makeIntType(64), (unsigned long long)i, specConstant); } + Id makeUint64Constant(unsigned long long u, bool specConstant = false) + { return makeInt64Constant(makeUintType(64), u, specConstant); } + Id makeFloatConstant(float f, bool specConstant = false); + Id makeDoubleConstant(double d, bool specConstant = false); + Id makeFloat16Constant(float f16, bool specConstant = false); + Id makeFpConstant(Id type, double d, bool specConstant = false); + + Id importNonSemanticShaderDebugInfoInstructions(); + + // Turn the array of constants into a proper spv constant of the requested type. + Id makeCompositeConstant(Id type, const std::vector& comps, bool specConst = false); + + // Methods for adding information outside the CFG. + Instruction* addEntryPoint(ExecutionModel, Function*, const char* name); + void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1); + void addExecutionMode(Function*, ExecutionMode mode, const std::vector& literals); + void addExecutionModeId(Function*, ExecutionMode mode, const std::vector& operandIds); + void addName(Id, const char* name); + void addMemberName(Id, int member, const char* name); + void addDecoration(Id, Decoration, int num = -1); + void addDecoration(Id, Decoration, const char*); + void addDecoration(Id, Decoration, const std::vector& literals); + void addDecoration(Id, Decoration, const std::vector& strings); + void addDecorationId(Id id, Decoration, Id idDecoration); + void addDecorationId(Id id, Decoration, const std::vector& operandIds); + void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1); + void addMemberDecoration(Id, unsigned int member, Decoration, const char*); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& literals); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& strings); + + // At the end of what block do the next create*() instructions go? + // Also reset current last DebugScope and current source line to unknown + void setBuildPoint(Block* bp) { + buildPoint = bp; + lastDebugScopeId = NoResult; + currentLine = 0; + } + Block* getBuildPoint() const { return buildPoint; } + + // Make the entry-point function. The returned pointer is only valid + // for the lifetime of this builder. + Function* makeEntryPoint(const char*); + + // Make a shader-style function, and create its entry block if entry is non-zero. + // Return the function, pass back the entry. + // The returned pointer is only valid for the lifetime of this builder. + Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, + const std::vector& paramTypes, const std::vector& paramNames, + const std::vector>& precisions, Block **entry = 0); + + // Create a return. An 'implicit' return is one not appearing in the source + // code. In the case of an implicit return, no post-return block is inserted. + void makeReturn(bool implicit, Id retVal = 0); + + // Initialize state and generate instructions for new lexical scope + void enterScope(uint32_t line); + + // Set state and generate instructions to exit current lexical scope + void leaveScope(); + + // Prepare builder for generation of instructions for a function. + void enterFunction(Function const* function); + + // Generate all the code needed to finish up a function. + void leaveFunction(); + + // Create block terminator instruction for certain statements like + // discard, terminate-invocation, terminateRayEXT, or ignoreIntersectionEXT + void makeStatementTerminator(spv::Op opcode, const char *name); + + // Create block terminator instruction for statements that have input operands + // such as OpEmitMeshTasksEXT + void makeStatementTerminator(spv::Op opcode, const std::vector& operands, const char* name); + + // Create a global or function local or IO variable. + Id createVariable(Decoration precision, StorageClass storageClass, Id type, const char* name = nullptr, + Id initializer = NoResult, bool const compilerGenerated = true); + + // Create an intermediate with an undefined value. + Id createUndefined(Id type); + + // Store into an Id and return the l-value + void createStore(Id rValue, Id lValue, spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Load from an Id and return it + Id createLoad(Id lValue, spv::Decoration precision, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Create an OpAccessChain instruction + Id createAccessChain(StorageClass, Id base, const std::vector& offsets); + + // Create an OpArrayLength instruction + Id createArrayLength(Id base, unsigned int member); + + // Create an OpCooperativeMatrixLengthNV instruction + Id createCooperativeMatrixLength(Id type); + + // Create an OpCompositeExtract instruction + Id createCompositeExtract(Id composite, Id typeId, unsigned index); + Id createCompositeExtract(Id composite, Id typeId, const std::vector& indexes); + Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index); + Id createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes); + + Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex); + Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex); + + void createNoResultOp(Op); + void createNoResultOp(Op, Id operand); + void createNoResultOp(Op, const std::vector& operands); + void createNoResultOp(Op, const std::vector& operands); + void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask); + void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics); + Id createUnaryOp(Op, Id typeId, Id operand); + Id createBinOp(Op, Id typeId, Id operand1, Id operand2); + Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createFunctionCall(spv::Function*, const std::vector&); + Id createSpecConstantOp(Op, Id typeId, const std::vector& operands, const std::vector& literals); + + // Take an rvalue (source) and a set of channels to extract from it to + // make a new rvalue, which is returned. + Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels); + + // Take a copy of an lvalue (target) and a source of components, and set the + // source components into the lvalue where the 'channels' say to put them. + // An updated version of the target is returned. + // (No true lvalue or stores are used.) + Id createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels); + + // If both the id and precision are valid, the id + // gets tagged with the requested precision. + // The passed in id is always the returned id, to simplify use patterns. + Id setPrecision(Id id, Decoration precision) + { + if (precision != NoPrecision && id != NoResult) + addDecoration(id, precision); + + return id; + } + + // Can smear a scalar to a vector for the following forms: + // - promoteScalar(scalar, vector) // smear scalar to width of vector + // - promoteScalar(vector, scalar) // smear scalar to width of vector + // - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to + // - promoteScalar(scalar, scalar) // do nothing + // Other forms are not allowed. + // + // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'. + // The type of the created vector is a vector of components of the same type as the scalar. + // + // Note: One of the arguments will change, with the result coming back that way rather than + // through the return value. + void promoteScalar(Decoration precision, Id& left, Id& right); + + // Make a value by smearing the scalar to fill the type. + // vectorType should be the correct type for making a vector of scalarVal. + // (No conversions are done.) + Id smearScalar(Decoration precision, Id scalarVal, Id vectorType); + + // Create a call to a built-in function. + Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args); + + // List of parameters used to create a texture operation + struct TextureParameters { + Id sampler; + Id coords; + Id bias; + Id lod; + Id Dref; + Id offset; + Id offsets; + Id gradX; + Id gradY; + Id sample; + Id component; + Id texelOut; + Id lodClamp; + Id granularity; + Id coarse; + bool nonprivate; + bool volatil; + }; + + // Select the correct texture operation based on all inputs, and emit the correct instruction + Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, + bool noImplicit, const TextureParameters&, ImageOperandsMask); + + // Emit the OpTextureQuery* instruction that was passed in. + // Figure out the right return value and type, and return it. + Id createTextureQueryCall(Op, const TextureParameters&, bool isUnsignedResult); + + Id createSamplePositionCall(Decoration precision, Id, Id); + + Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned); + Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id); + + // Reduction comparison for composites: For equal and not-equal resulting in a scalar. + Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */); + + // OpCompositeConstruct + Id createCompositeConstruct(Id typeId, const std::vector& constituents); + + // vector or scalar constructor + Id createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId); + + // matrix constructor + Id createMatrixConstructor(Decoration precision, const std::vector& sources, Id constructee); + + // Helper to use for building nested control flow with if-then-else. + class If { + public: + If(Id condition, unsigned int ctrl, Builder& builder); + ~If() {} + + void makeBeginElse(); + void makeEndIf(); + + private: + If(const If&); + If& operator=(If&); + + Builder& builder; + Id condition; + unsigned int control; + Function* function; + Block* headerBlock; + Block* thenBlock; + Block* elseBlock; + Block* mergeBlock; + }; + + // Make a switch statement. A switch has 'numSegments' of pieces of code, not containing + // any case/default labels, all separated by one or more case/default labels. Each possible + // case value v is a jump to the caseValues[v] segment. The defaultSegment is also in this + // number space. How to compute the value is given by 'condition', as in switch(condition). + // + // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches. + // + // Use a defaultSegment < 0 if there is no default segment (to branch to post switch). + // + // Returns the right set of basic blocks to start each code segment with, so that the caller's + // recursion stack can hold the memory for it. + // + void makeSwitch(Id condition, unsigned int control, int numSegments, const std::vector& caseValues, + const std::vector& valueToSegment, int defaultSegment, std::vector& segmentBB); + + // Add a branch to the innermost switch's merge block. + void addSwitchBreak(); + + // Move to the next code segment, passing in the return argument in makeSwitch() + void nextSwitchSegment(std::vector& segmentBB, int segment); + + // Finish off the innermost switch. + void endSwitch(std::vector& segmentBB); + + struct LoopBlocks { + LoopBlocks(Block& head, Block& body, Block& merge, Block& continue_target) : + head(head), body(body), merge(merge), continue_target(continue_target) { } + Block &head, &body, &merge, &continue_target; + private: + LoopBlocks(); + LoopBlocks& operator=(const LoopBlocks&) = delete; + }; + + // Start a new loop and prepare the builder to generate code for it. Until + // closeLoop() is called for this loop, createLoopContinue() and + // createLoopExit() will target its corresponding blocks. + LoopBlocks& makeNewLoop(); + + // Create a new block in the function containing the build point. Memory is + // owned by the function object. + Block& makeNewBlock(); + + // Add a branch to the continue_target of the current (innermost) loop. + void createLoopContinue(); + + // Add an exit (e.g. "break") from the innermost loop that we're currently + // in. + void createLoopExit(); + + // Close the innermost loop that you're in + void closeLoop(); + + // + // Access chain design for an R-Value vs. L-Value: + // + // There is a single access chain the builder is building at + // any particular time. Such a chain can be used to either to a load or + // a store, when desired. + // + // Expressions can be r-values, l-values, or both, or only r-values: + // a[b.c].d = .... // l-value + // ... = a[b.c].d; // r-value, that also looks like an l-value + // ++a[b.c].d; // r-value and l-value + // (x + y)[2]; // r-value only, can't possibly be l-value + // + // Computing an r-value means generating code. Hence, + // r-values should only be computed when they are needed, not speculatively. + // + // Computing an l-value means saving away information for later use in the compiler, + // no code is generated until the l-value is later dereferenced. It is okay + // to speculatively generate an l-value, just not okay to speculatively dereference it. + // + // The base of the access chain (the left-most variable or expression + // from which everything is based) can be set either as an l-value + // or as an r-value. Most efficient would be to set an l-value if one + // is available. If an expression was evaluated, the resulting r-value + // can be set as the chain base. + // + // The users of this single access chain can save and restore if they + // want to nest or manage multiple chains. + // + + struct AccessChain { + Id base; // for l-values, pointer to the base object, for r-values, the base object + std::vector indexChain; + Id instr; // cache the instruction that generates this access chain + std::vector swizzle; // each std::vector element selects the next GLSL component number + Id component; // a dynamic component index, can coexist with a swizzle, + // done after the swizzle, NoResult if not present + Id preSwizzleBaseType; // dereferenced type, before swizzle or component is applied; + // NoType unless a swizzle or component is present + bool isRValue; // true if 'base' is an r-value, otherwise, base is an l-value + unsigned int alignment; // bitwise OR of alignment values passed in. Accumulates worst alignment. + // Only tracks base and (optional) component selection alignment. + + // Accumulate whether anything in the chain of structures has coherent decorations. + struct CoherentFlags { + CoherentFlags() { clear(); } +#ifdef GLSLANG_WEB + void clear() { } + bool isVolatile() const { return false; } + CoherentFlags operator |=(const CoherentFlags &other) { return *this; } +#else + bool isVolatile() const { return volatil; } + bool isNonUniform() const { return nonUniform; } + bool anyCoherent() const { + return coherent || devicecoherent || queuefamilycoherent || workgroupcoherent || + subgroupcoherent || shadercallcoherent; + } + + unsigned coherent : 1; + unsigned devicecoherent : 1; + unsigned queuefamilycoherent : 1; + unsigned workgroupcoherent : 1; + unsigned subgroupcoherent : 1; + unsigned shadercallcoherent : 1; + unsigned nonprivate : 1; + unsigned volatil : 1; + unsigned isImage : 1; + unsigned nonUniform : 1; + + void clear() { + coherent = 0; + devicecoherent = 0; + queuefamilycoherent = 0; + workgroupcoherent = 0; + subgroupcoherent = 0; + shadercallcoherent = 0; + nonprivate = 0; + volatil = 0; + isImage = 0; + nonUniform = 0; + } + + CoherentFlags operator |=(const CoherentFlags &other) { + coherent |= other.coherent; + devicecoherent |= other.devicecoherent; + queuefamilycoherent |= other.queuefamilycoherent; + workgroupcoherent |= other.workgroupcoherent; + subgroupcoherent |= other.subgroupcoherent; + shadercallcoherent |= other.shadercallcoherent; + nonprivate |= other.nonprivate; + volatil |= other.volatil; + isImage |= other.isImage; + nonUniform |= other.nonUniform; + return *this; + } +#endif + }; + CoherentFlags coherentFlags; + }; + + // + // the SPIR-V builder maintains a single active chain that + // the following methods operate on + // + + // for external save and restore + AccessChain getAccessChain() { return accessChain; } + void setAccessChain(AccessChain newChain) { accessChain = newChain; } + + // clear accessChain + void clearAccessChain(); + + // set new base as an l-value base + void setAccessChainLValue(Id lValue) + { + assert(isPointer(lValue)); + accessChain.base = lValue; + } + + // set new base value as an r-value + void setAccessChainRValue(Id rValue) + { + accessChain.isRValue = true; + accessChain.base = rValue; + } + + // push offset onto the end of the chain + void accessChainPush(Id offset, AccessChain::CoherentFlags coherentFlags, unsigned int alignment) + { + accessChain.indexChain.push_back(offset); + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // push new swizzle onto the end of any existing swizzle, merging into a single swizzle + void accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType, + AccessChain::CoherentFlags coherentFlags, unsigned int alignment); + + // push a dynamic component selection onto the access chain, only applicable with a + // non-trivial swizzle or no swizzle + void accessChainPushComponent(Id component, Id preSwizzleBaseType, AccessChain::CoherentFlags coherentFlags, + unsigned int alignment) + { + if (accessChain.swizzle.size() != 1) { + accessChain.component = component; + if (accessChain.preSwizzleBaseType == NoType) + accessChain.preSwizzleBaseType = preSwizzleBaseType; + } + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // use accessChain and swizzle to store value + void accessChainStore(Id rvalue, Decoration nonUniform, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // use accessChain and swizzle to load an r-value + Id accessChainLoad(Decoration precision, Decoration l_nonUniform, Decoration r_nonUniform, Id ResultType, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, spv::Scope scope = spv::ScopeMax, + unsigned int alignment = 0); + + // Return whether or not the access chain can be represented in SPIR-V + // as an l-value. + // E.g., a[3].yx cannot be, while a[3].y and a[3].y[x] can be. + bool isSpvLvalue() const { return accessChain.swizzle.size() <= 1; } + + // get the direct pointer for an l-value + Id accessChainGetLValue(); + + // Get the inferred SPIR-V type of the result of the current access chain, + // based on the type of the base and the chain of dereferences. + Id accessChainGetInferredType(); + + // Add capabilities, extensions, remove unneeded decorations, etc., + // based on the resulting SPIR-V. + void postProcess(); + + // Prune unreachable blocks in the CFG and remove unneeded decorations. + void postProcessCFG(); + +#ifndef GLSLANG_WEB + // Add capabilities, extensions based on instructions in the module. + void postProcessFeatures(); + // Hook to visit each instruction in a block in a function + void postProcess(Instruction&); + // Hook to visit each non-32-bit sized float/int operation in a block. + void postProcessType(const Instruction&, spv::Id typeId); +#endif + + void dump(std::vector&) const; + + void createBranch(Block* block); + void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); + void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, + const std::vector& operands); + + // Sets to generate opcode for specialization constants. + void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; } + // Sets to generate opcode for non-specialization constants (normal mode). + void setToNormalCodeGenMode() { generatingOpCodeForSpecConst = false; } + // Check if the builder is generating code for spec constants. + bool isInSpecConstCodeGenMode() { return generatingOpCodeForSpecConst; } + + protected: + Id makeIntConstant(Id typeId, unsigned value, bool specConstant); + Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2); + Id findCompositeConstant(Op typeClass, Id typeId, const std::vector& comps); + Id findStructConstant(Id typeId, const std::vector& comps); + Id collapseAccessChain(); + void remapDynamicSwizzle(); + void transferAccessChainSwizzle(bool dynamic); + void simplifyAccessChainSwizzle(); + void createAndSetNoPredecessorBlock(const char*); + void createSelectionMerge(Block* mergeBlock, unsigned int control); + void dumpSourceInstructions(std::vector&) const; + void dumpSourceInstructions(const spv::Id fileId, const std::string& text, std::vector&) const; + void dumpInstructions(std::vector&, const std::vector >&) const; + void dumpModuleProcesses(std::vector&) const; + spv::MemoryAccessMask sanitizeMemoryAccessForStorageClass(spv::MemoryAccessMask memoryAccess, StorageClass sc) + const; + + unsigned int spvVersion; // the version of SPIR-V to emit in the header + SourceLanguage sourceLang; + int sourceVersion; + spv::Id sourceFileStringId; + spv::Id nonSemanticShaderCompilationUnitId {0}; + spv::Id nonSemanticShaderDebugInfo {0}; + spv::Id debugInfoNone {0}; + spv::Id debugExpression {0}; // Debug expression with zero operations. + std::string sourceText; + int currentLine; + const char* currentFile; + spv::Id currentFileId; + std::stack currentDebugScopeId; + spv::Id lastDebugScopeId; + bool emitOpLines; + bool emitNonSemanticShaderDebugInfo; + bool restoreNonSemanticShaderDebugInfo; + bool emitNonSemanticShaderDebugSource; + std::set extensions; + std::vector sourceExtensions; + std::vector moduleProcesses; + AddressingModel addressModel; + MemoryModel memoryModel; + std::set capabilities; + int builderNumber; + Module module; + Block* buildPoint; + Id uniqueId; + Function* entryPointFunction; + bool generatingOpCodeForSpecConst; + AccessChain accessChain; + + // special blocks of instructions for output + std::vector > strings; + std::vector > imports; + std::vector > entryPoints; + std::vector > executionModes; + std::vector > names; + std::vector > decorations; + std::vector > constantsTypesGlobals; + std::vector > externals; + std::vector > functions; + + // not output, internally used for quick & dirty canonical (unique) creation + + // map type opcodes to constant inst. + std::unordered_map> groupedConstants; + // map struct-id to constant instructions + std::unordered_map> groupedStructConstants; + // map type opcodes to type instructions + std::unordered_map> groupedTypes; + // map type opcodes to debug type instructions + std::unordered_map> groupedDebugTypes; + // list of OpConstantNull instructions + std::vector nullConstants; + + // stack of switches + std::stack switchMerges; + + // Our loop stack. + std::stack loops; + + // map from strings to their string ids + std::unordered_map stringIds; + + // map from include file name ids to their contents + std::map includeFiles; + + // map from core id to debug id + std::map debugId; + + // map from file name string id to DebugSource id + std::unordered_map debugSourceId; + + // The stream for outputting warnings and errors. + SpvBuildLogger* logger; +}; // end Builder class + +}; // end spv namespace + +#endif // SpvBuilder_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SpvTools.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SpvTools.h new file mode 100644 index 0000000..5386048 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/SpvTools.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2014-2016 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Call into SPIRV-Tools to disassemble, validate, and optimize. +// + +#pragma once +#ifndef GLSLANG_SPV_TOOLS_H +#define GLSLANG_SPV_TOOLS_H + +#if ENABLE_OPT +#include +#include +#include "spirv-tools/libspirv.h" +#endif + +#include "glslang/MachineIndependent/localintermediate.h" +#include "Logger.h" + +namespace glslang { + +struct SpvOptions { + bool generateDebugInfo {false}; + bool stripDebugInfo {false}; + bool disableOptimizer {true}; + bool optimizeSize {false}; + bool disassemble {false}; + bool validate {false}; + bool emitNonSemanticShaderDebugInfo {false}; + bool emitNonSemanticShaderDebugSource{ false }; +}; + +#if ENABLE_OPT + +// Use the SPIRV-Tools disassembler to print SPIR-V using a SPV_ENV_UNIVERSAL_1_3 environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv); + +// Use the SPIRV-Tools disassembler to print SPIR-V with a provided SPIR-V environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv, + spv_target_env requested_context); + +// Apply the SPIRV-Tools validator to generated SPIR-V. +void SpirvToolsValidate(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, bool prelegalization); + +// Apply the SPIRV-Tools optimizer to generated SPIR-V. HLSL SPIR-V is legalized in the process. +void SpirvToolsTransform(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, const SpvOptions*); + +// Apply the SPIRV-Tools optimizer to strip debug info from SPIR-V. This is implicitly done by +// SpirvToolsTransform if spvOptions->stripDebugInfo is set, but can be called separately if +// optimization is disabled. +void SpirvToolsStripDebugInfo(const glslang::TIntermediate& intermediate, + std::vector& spirv, spv::SpvBuildLogger*); + +#endif + +} // end namespace glslang + +#endif // GLSLANG_SPV_TOOLS_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/bitutils.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/bitutils.h new file mode 100644 index 0000000..22e44ce --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/bitutils.h @@ -0,0 +1,81 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_BITUTILS_H_ +#define LIBSPIRV_UTIL_BITUTILS_H_ + +#include +#include + +namespace spvutils { + +// Performs a bitwise copy of source to the destination type Dest. +template +Dest BitwiseCast(Src source) { + Dest dest; + static_assert(sizeof(source) == sizeof(dest), + "BitwiseCast: Source and destination must have the same size"); + std::memcpy(static_cast(&dest), &source, sizeof(dest)); + return dest; +} + +// SetBits returns an integer of type with bits set +// for position through , counting from the least +// significant bit. In particular when Num == 0, no positions are set to 1. +// A static assert will be triggered if First + Num > sizeof(T) * 8, that is, +// a bit that will not fit in the underlying type is set. +template +struct SetBits { + static_assert(First < sizeof(T) * 8, + "Tried to set a bit that is shifted too far."); + const static T get = (T(1) << First) | SetBits::get; +}; + +template +struct SetBits { + const static T get = T(0); +}; + +// This is all compile-time so we can put our tests right here. +static_assert(SetBits::get == uint32_t(0x00000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000001), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x80000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000006), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xc0000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x7FFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFF0000), + "SetBits failed"); + +static_assert(SetBits::get == uint64_t(0x0000000000000001LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x8000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0xc000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x0000000080000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x00000000FFFF0000LL), + "SetBits failed"); + +} // namespace spvutils + +#endif // LIBSPIRV_UTIL_BITUTILS_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/disassemble.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/disassemble.h new file mode 100644 index 0000000..b6a4635 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/disassemble.h @@ -0,0 +1,53 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Disassembler for SPIR-V. +// + +#pragma once +#ifndef disassembler_H +#define disassembler_H + +#include +#include + +namespace spv { + + // disassemble with glslang custom disassembler + void Disassemble(std::ostream& out, const std::vector&); + +} // end namespace spv + +#endif // disassembler_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/doc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/doc.h new file mode 100644 index 0000000..2a0b28c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/doc.h @@ -0,0 +1,259 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Parameterize the SPIR-V enumerants. +// + +#pragma once + +#include "spirv.hpp" + +#include + +namespace spv { + +// Fill in all the parameters +void Parameterize(); + +// Return the English names of all the enums. +const char* SourceString(int); +const char* AddressingString(int); +const char* MemoryString(int); +const char* ExecutionModelString(int); +const char* ExecutionModeString(int); +const char* StorageClassString(int); +const char* DecorationString(int); +const char* BuiltInString(int); +const char* DimensionString(int); +const char* SelectControlString(int); +const char* LoopControlString(int); +const char* FunctionControlString(int); +const char* SamplerAddressingModeString(int); +const char* SamplerFilterModeString(int); +const char* ImageFormatString(int); +const char* ImageChannelOrderString(int); +const char* ImageChannelTypeString(int); +const char* ImageChannelDataTypeString(int type); +const char* ImageOperandsString(int format); +const char* ImageOperands(int); +const char* FPFastMathString(int); +const char* FPRoundingModeString(int); +const char* LinkageTypeString(int); +const char* FuncParamAttrString(int); +const char* AccessQualifierString(int); +const char* MemorySemanticsString(int); +const char* MemoryAccessString(int); +const char* ExecutionScopeString(int); +const char* GroupOperationString(int); +const char* KernelEnqueueFlagsString(int); +const char* KernelProfilingInfoString(int); +const char* CapabilityString(int); +const char* OpcodeString(int); +const char* ScopeString(int mem); + +// For grouping opcodes into subsections +enum OpcodeClass { + OpClassMisc, + OpClassDebug, + OpClassAnnotate, + OpClassExtension, + OpClassMode, + OpClassType, + OpClassConstant, + OpClassMemory, + OpClassFunction, + OpClassImage, + OpClassConvert, + OpClassComposite, + OpClassArithmetic, + OpClassBit, + OpClassRelationalLogical, + OpClassDerivative, + OpClassFlowControl, + OpClassAtomic, + OpClassPrimitive, + OpClassBarrier, + OpClassGroup, + OpClassDeviceSideEnqueue, + OpClassPipe, + + OpClassCount, + OpClassMissing // all instructions start out as missing +}; + +// For parameterizing operands. +enum OperandClass { + OperandNone, + OperandId, + OperandVariableIds, + OperandOptionalLiteral, + OperandOptionalLiteralString, + OperandVariableLiterals, + OperandVariableIdLiteral, + OperandVariableLiteralId, + OperandLiteralNumber, + OperandLiteralString, + OperandVariableLiteralStrings, + OperandSource, + OperandExecutionModel, + OperandAddressing, + OperandMemory, + OperandExecutionMode, + OperandStorage, + OperandDimensionality, + OperandSamplerAddressingMode, + OperandSamplerFilterMode, + OperandSamplerImageFormat, + OperandImageChannelOrder, + OperandImageChannelDataType, + OperandImageOperands, + OperandFPFastMath, + OperandFPRoundingMode, + OperandLinkageType, + OperandAccessQualifier, + OperandFuncParamAttr, + OperandDecoration, + OperandBuiltIn, + OperandSelect, + OperandLoop, + OperandFunction, + OperandMemorySemantics, + OperandMemoryAccess, + OperandScope, + OperandGroupOperation, + OperandKernelEnqueueFlags, + OperandKernelProfilingInfo, + OperandCapability, + + OperandOpcode, + + OperandCount +}; + +// Any specific enum can have a set of capabilities that allow it: +typedef std::vector EnumCaps; + +// Parameterize a set of operands with their OperandClass(es) and descriptions. +class OperandParameters { +public: + OperandParameters() { } + void push(OperandClass oc, const char* d, bool opt = false) + { + opClass.push_back(oc); + desc.push_back(d); + optional.push_back(opt); + } + void setOptional(); + OperandClass getClass(int op) const { return opClass[op]; } + const char* getDesc(int op) const { return desc[op]; } + bool isOptional(int op) const { return optional[op]; } + int getNum() const { return (int)opClass.size(); } + +protected: + std::vector opClass; + std::vector desc; + std::vector optional; +}; + +// Parameterize an enumerant +class EnumParameters { +public: + EnumParameters() : desc(0) { } + const char* desc; +}; + +// Parameterize a set of enumerants that form an enum +class EnumDefinition : public EnumParameters { +public: + EnumDefinition() : + ceiling(0), bitmask(false), getName(0), enumParams(0), operandParams(0) { } + void set(int ceil, const char* (*name)(int), EnumParameters* ep, bool mask = false) + { + ceiling = ceil; + getName = name; + bitmask = mask; + enumParams = ep; + } + void setOperands(OperandParameters* op) { operandParams = op; } + int ceiling; // ceiling of enumerants + bool bitmask; // true if these enumerants combine into a bitmask + const char* (*getName)(int); // a function that returns the name for each enumerant value (or shift) + EnumParameters* enumParams; // parameters for each individual enumerant + OperandParameters* operandParams; // sets of operands +}; + +// Parameterize an instruction's logical format, including its known set of operands, +// per OperandParameters above. +class InstructionParameters { +public: + InstructionParameters() : + opDesc("TBD"), + opClass(OpClassMissing), + typePresent(true), // most normal, only exceptions have to be spelled out + resultPresent(true) // most normal, only exceptions have to be spelled out + { } + + void setResultAndType(bool r, bool t) + { + resultPresent = r; + typePresent = t; + } + + bool hasResult() const { return resultPresent != 0; } + bool hasType() const { return typePresent != 0; } + + const char* opDesc; + OpcodeClass opClass; + OperandParameters operands; + +protected: + int typePresent : 1; + int resultPresent : 1; +}; + +// The set of objects that hold all the instruction/operand +// parameterization information. +extern InstructionParameters InstructionDesc[]; + +// These hold definitions of the enumerants used for operands +extern EnumDefinition OperandClassParams[]; + +const char* GetOperandDesc(OperandClass operand); +void PrintImmediateRow(int imm, const char* name, const EnumParameters* enumParams, bool caps, bool hex = false); +const char* AccessQualifierString(int attr); + +void PrintOperands(const OperandParameters& operands, int reservedOperands); + +} // end namespace spv diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/hex_float.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/hex_float.h new file mode 100644 index 0000000..8be8e9f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/hex_float.h @@ -0,0 +1,1078 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ +#define LIBSPIRV_UTIL_HEX_FLOAT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER < 1800 +namespace std { +bool isnan(double f) +{ + return ::_isnan(f) != 0; +} +bool isinf(double f) +{ + return ::_finite(f) == 0; +} +} +#endif + +#include "bitutils.h" + +namespace spvutils { + +class Float16 { + public: + Float16(uint16_t v) : val(v) {} + Float16() {} + static bool isNan(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); + } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); + } + Float16(const Float16& other) { val = other.val; } + uint16_t get_value() const { return val; } + + // Returns the maximum normal value. + static Float16 max() { return Float16(0x7bff); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16(0xfbff); } + + private: + uint16_t val; +}; + +// To specialize this type, you must override uint_type to define +// an unsigned integer that can fit your floating point type. +// You must also add a isNan function that returns true if +// a value is Nan. +template +struct FloatProxyTraits { + typedef void uint_type; +}; + +template <> +struct FloatProxyTraits { + typedef uint32_t uint_type; + static bool isNan(float f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(float f) { return std::isinf(f); } + // Returns the maximum normal value. + static float max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static float lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint64_t uint_type; + static bool isNan(double f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(double f) { return std::isinf(f); } + // Returns the maximum normal value. + static double max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static double lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint16_t uint_type; + static bool isNan(Float16 f) { return Float16::isNan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } + // Returns the maximum normal value. + static Float16 max() { return Float16::max(); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16::lowest(); } +}; + +// Since copying a floating point number (especially if it is NaN) +// does not guarantee that bits are preserved, this class lets us +// store the type and use it as a float when necessary. +template +class FloatProxy { + public: + typedef typename FloatProxyTraits::uint_type uint_type; + + // Since this is to act similar to the normal floats, + // do not initialize the data by default. + FloatProxy() {} + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(T val) { data_ = BitwiseCast(val); } + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(uint_type val) { data_ = val; } + + // This is helpful to have and is guaranteed not to stomp bits. + FloatProxy operator-() const { + return static_cast(data_ ^ + (uint_type(0x1) << (sizeof(T) * 8 - 1))); + } + + // Returns the data as a floating point value. + T getAsFloat() const { return BitwiseCast(data_); } + + // Returns the raw data. + uint_type data() const { return data_; } + + // Returns true if the value represents any type of NaN. + bool isNan() { return FloatProxyTraits::isNan(getAsFloat()); } + // Returns true if the value represents any type of infinity. + bool isInfinity() { return FloatProxyTraits::isInfinity(getAsFloat()); } + + // Returns the maximum normal value. + static FloatProxy max() { + return FloatProxy(FloatProxyTraits::max()); + } + // Returns the lowest normal value. + static FloatProxy lowest() { + return FloatProxy(FloatProxyTraits::lowest()); + } + + private: + uint_type data_; +}; + +template +bool operator==(const FloatProxy& first, const FloatProxy& second) { + return first.data() == second.data(); +} + +// Reads a FloatProxy value as a normal float from a stream. +template +std::istream& operator>>(std::istream& is, FloatProxy& value) { + T float_val; + is >> float_val; + value = FloatProxy(float_val); + return is; +} + +// This is an example traits. It is not meant to be used in practice, but will +// be the default for any non-specialized type. +template +struct HexFloatTraits { + // Integer type that can store this hex-float. + typedef void uint_type; + // Signed integer type that can store this hex-float. + typedef void int_type; + // The numerical type that this HexFloat represents. + typedef void underlying_type; + // The type needed to construct the underlying type. + typedef void native_type; + // The number of bits that are actually relevant in the uint_type. + // This allows us to deal with, for example, 24-bit values in a 32-bit + // integer. + static const uint32_t num_used_bits = 0; + // Number of bits that represent the exponent. + static const uint32_t num_exponent_bits = 0; + // Number of bits that represent the fractional part. + static const uint32_t num_fraction_bits = 0; + // The bias of the exponent. (How much we need to subtract from the stored + // value to get the correct value.) + static const uint32_t exponent_bias = 0; +}; + +// Traits for IEEE float. +// 1 sign bit, 8 exponent bits, 23 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint32_t uint_type; + typedef int32_t int_type; + typedef FloatProxy underlying_type; + typedef float native_type; + static const uint_type num_used_bits = 32; + static const uint_type num_exponent_bits = 8; + static const uint_type num_fraction_bits = 23; + static const uint_type exponent_bias = 127; +}; + +// Traits for IEEE double. +// 1 sign bit, 11 exponent bits, 52 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint64_t uint_type; + typedef int64_t int_type; + typedef FloatProxy underlying_type; + typedef double native_type; + static const uint_type num_used_bits = 64; + static const uint_type num_exponent_bits = 11; + static const uint_type num_fraction_bits = 52; + static const uint_type exponent_bias = 1023; +}; + +// Traits for IEEE half. +// 1 sign bit, 5 exponent bits, 10 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint16_t uint_type; + typedef int16_t int_type; + typedef uint16_t underlying_type; + typedef uint16_t native_type; + static const uint_type num_used_bits = 16; + static const uint_type num_exponent_bits = 5; + static const uint_type num_fraction_bits = 10; + static const uint_type exponent_bias = 15; +}; + +enum round_direction { + kRoundToZero, + kRoundToNearestEven, + kRoundToPositiveInfinity, + kRoundToNegativeInfinity +}; + +// Template class that houses a floating pointer number. +// It exposes a number of constants based on the provided traits to +// assist in interpreting the bits of the value. +template > +class HexFloat { + public: + typedef typename Traits::uint_type uint_type; + typedef typename Traits::int_type int_type; + typedef typename Traits::underlying_type underlying_type; + typedef typename Traits::native_type native_type; + + explicit HexFloat(T f) : value_(f) {} + + T value() const { return value_; } + void set_value(T f) { value_ = f; } + + // These are all written like this because it is convenient to have + // compile-time constants for all of these values. + + // Pass-through values to save typing. + static const uint32_t num_used_bits = Traits::num_used_bits; + static const uint32_t exponent_bias = Traits::exponent_bias; + static const uint32_t num_exponent_bits = Traits::num_exponent_bits; + static const uint32_t num_fraction_bits = Traits::num_fraction_bits; + + // Number of bits to shift left to set the highest relevant bit. + static const uint32_t top_bit_left_shift = num_used_bits - 1; + // How many nibbles (hex characters) the fractional part takes up. + static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; + // If the fractional part does not fit evenly into a hex character (4-bits) + // then we have to left-shift to get rid of leading 0s. This is the amount + // we have to shift (might be 0). + static const uint32_t num_overflow_bits = + fraction_nibbles * 4 - num_fraction_bits; + + // The representation of the fraction, not the actual bits. This + // includes the leading bit that is usually implicit. + static const uint_type fraction_represent_mask = + spvutils::SetBits::get; + + // The topmost bit in the nibble-aligned fraction. + static const uint_type fraction_top_bit = + uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); + + // The least significant bit in the exponent, which is also the bit + // immediately to the left of the significand. + static const uint_type first_exponent_bit = uint_type(1) + << (num_fraction_bits); + + // The mask for the encoded fraction. It does not include the + // implicit bit. + static const uint_type fraction_encode_mask = + spvutils::SetBits::get; + + // The bit that is used as a sign. + static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; + + // The bits that represent the exponent. + static const uint_type exponent_mask = + spvutils::SetBits::get; + + // How far left the exponent is shifted. + static const uint32_t exponent_left_shift = num_fraction_bits; + + // How far from the right edge the fraction is shifted. + static const uint32_t fraction_right_shift = + static_cast(sizeof(uint_type) * 8) - num_fraction_bits; + + // The maximum representable unbiased exponent. + static const int_type max_exponent = + (exponent_mask >> num_fraction_bits) - exponent_bias; + // The minimum representable exponent for normalized numbers. + static const int_type min_exponent = -static_cast(exponent_bias); + + // Returns the bits associated with the value. + uint_type getBits() const { return spvutils::BitwiseCast(value_); } + + // Returns the bits associated with the value, without the leading sign bit. + uint_type getUnsignedBits() const { + return static_cast(spvutils::BitwiseCast(value_) & + ~sign_mask); + } + + // Returns the bits associated with the exponent, shifted to start at the + // lsb of the type. + const uint_type getExponentBits() const { + return static_cast((getBits() & exponent_mask) >> + num_fraction_bits); + } + + // Returns the exponent in unbiased form. This is the exponent in the + // human-friendly form. + const int_type getUnbiasedExponent() const { + return static_cast(getExponentBits() - exponent_bias); + } + + // Returns just the significand bits from the value. + const uint_type getSignificandBits() const { + return getBits() & fraction_encode_mask; + } + + // If the number was normalized, returns the unbiased exponent. + // If the number was denormal, normalize the exponent first. + const int_type getUnbiasedNormalizedExponent() const { + if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 + return 0; + } + int_type exp = getUnbiasedExponent(); + if (exp == min_exponent) { // We are in denorm land. + uint_type significand_bits = getSignificandBits(); + while ((significand_bits & (first_exponent_bit >> 1)) == 0) { + significand_bits = static_cast(significand_bits << 1); + exp = static_cast(exp - 1); + } + significand_bits &= fraction_encode_mask; + } + return exp; + } + + // Returns the signficand after it has been normalized. + const uint_type getNormalizedSignificand() const { + int_type unbiased_exponent = getUnbiasedNormalizedExponent(); + uint_type significand = getSignificandBits(); + for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { + significand = static_cast(significand << 1); + } + significand &= fraction_encode_mask; + return significand; + } + + // Returns true if this number represents a negative value. + bool isNegative() const { return (getBits() & sign_mask) != 0; } + + // Sets this HexFloat from the individual components. + // Note this assumes EVERY significand is normalized, and has an implicit + // leading one. This means that the only way that this method will set 0, + // is if you set a number so denormalized that it underflows. + // Do not use this method with raw bits extracted from a subnormal number, + // since subnormals do not have an implicit leading 1 in the significand. + // The significand is also expected to be in the + // lowest-most num_fraction_bits of the uint_type. + // The exponent is expected to be unbiased, meaning an exponent of + // 0 actually means 0. + // If underflow_round_up is set, then on underflow, if a number is non-0 + // and would underflow, we round up to the smallest denorm. + void setFromSignUnbiasedExponentAndNormalizedSignificand( + bool negative, int_type exponent, uint_type significand, + bool round_denorm_up) { + bool significand_is_zero = significand == 0; + + if (exponent <= min_exponent) { + // If this was denormalized, then we have to shift the bit on, meaning + // the significand is not zero. + significand_is_zero = false; + significand |= first_exponent_bit; + significand = static_cast(significand >> 1); + } + + while (exponent < min_exponent) { + significand = static_cast(significand >> 1); + ++exponent; + } + + if (exponent == min_exponent) { + if (significand == 0 && !significand_is_zero && round_denorm_up) { + significand = static_cast(0x1); + } + } + + uint_type new_value = 0; + if (negative) { + new_value = static_cast(new_value | sign_mask); + } + exponent = static_cast(exponent + exponent_bias); + assert(exponent >= 0); + + // put it all together + exponent = static_cast((exponent << exponent_left_shift) & + exponent_mask); + significand = static_cast(significand & fraction_encode_mask); + new_value = static_cast(new_value | (exponent | significand)); + value_ = BitwiseCast(new_value); + } + + // Increments the significand of this number by the given amount. + // If this would spill the significand into the implicit bit, + // carry is set to true and the significand is shifted to fit into + // the correct location, otherwise carry is set to false. + // All significands and to_increment are assumed to be within the bounds + // for a valid significand. + static uint_type incrementSignificand(uint_type significand, + uint_type to_increment, bool* carry) { + significand = static_cast(significand + to_increment); + *carry = false; + if (significand & first_exponent_bit) { + *carry = true; + // The implicit 1-bit will have carried, so we should zero-out the + // top bit and shift back. + significand = static_cast(significand & ~first_exponent_bit); + significand = static_cast(significand >> 1); + } + return significand; + } + + // These exist because MSVC throws warnings on negative right-shifts + // even if they are not going to be executed. Eg: + // constant_number < 0? 0: constant_number + // These convert the negative left-shifts into right shifts. + + template + uint_type negatable_left_shift(int_type N, uint_type val) + { + if(N >= 0) + return val << N; + + return val >> -N; + } + + template + uint_type negatable_right_shift(int_type N, uint_type val) + { + if(N >= 0) + return val >> N; + + return val << -N; + } + + // Returns the significand, rounded to fit in a significand in + // other_T. This is shifted so that the most significant + // bit of the rounded number lines up with the most significant bit + // of the returned significand. + template + typename other_T::uint_type getRoundedNormalizedSignificand( + round_direction dir, bool* carry_bit) { + typedef typename other_T::uint_type other_uint_type; + static const int_type num_throwaway_bits = + static_cast(num_fraction_bits) - + static_cast(other_T::num_fraction_bits); + + static const uint_type last_significant_bit = + (num_throwaway_bits < 0) + ? 0 + : negatable_left_shift(num_throwaway_bits, 1u); + static const uint_type first_rounded_bit = + (num_throwaway_bits < 1) + ? 0 + : negatable_left_shift(num_throwaway_bits - 1, 1u); + + static const uint_type throwaway_mask_bits = + num_throwaway_bits > 0 ? num_throwaway_bits : 0; + static const uint_type throwaway_mask = + spvutils::SetBits::get; + + *carry_bit = false; + other_uint_type out_val = 0; + uint_type significand = getNormalizedSignificand(); + // If we are up-casting, then we just have to shift to the right location. + if (num_throwaway_bits <= 0) { + out_val = static_cast(significand); + uint_type shift_amount = static_cast(-num_throwaway_bits); + out_val = static_cast(out_val << shift_amount); + return out_val; + } + + // If every non-representable bit is 0, then we don't have any casting to + // do. + if ((significand & throwaway_mask) == 0) { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + + bool round_away_from_zero = false; + // We actually have to narrow the significand here, so we have to follow the + // rounding rules. + switch (dir) { + case kRoundToZero: + break; + case kRoundToPositiveInfinity: + round_away_from_zero = !isNegative(); + break; + case kRoundToNegativeInfinity: + round_away_from_zero = isNegative(); + break; + case kRoundToNearestEven: + // Have to round down, round bit is 0 + if ((first_rounded_bit & significand) == 0) { + break; + } + if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { + // If any subsequent bit of the rounded portion is non-0 then we round + // up. + round_away_from_zero = true; + break; + } + // We are exactly half-way between 2 numbers, pick even. + if ((significand & last_significant_bit) != 0) { + // 1 for our last bit, round up. + round_away_from_zero = true; + break; + } + break; + } + + if (round_away_from_zero) { + return static_cast( + negatable_right_shift(num_throwaway_bits, incrementSignificand( + significand, last_significant_bit, carry_bit))); + } else { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + } + + // Casts this value to another HexFloat. If the cast is widening, + // then round_dir is ignored. If the cast is narrowing, then + // the result is rounded in the direction specified. + // This number will retain Nan and Inf values. + // It will also saturate to Inf if the number overflows, and + // underflow to (0 or min depending on rounding) if the number underflows. + template + void castTo(other_T& other, round_direction round_dir) { + other = other_T(static_cast(0)); + bool negate = isNegative(); + if (getUnsignedBits() == 0) { + if (negate) { + other.set_value(-other.value()); + } + return; + } + uint_type significand = getSignificandBits(); + bool carried = false; + typename other_T::uint_type rounded_significand = + getRoundedNormalizedSignificand(round_dir, &carried); + + int_type exponent = getUnbiasedExponent(); + if (exponent == min_exponent) { + // If we are denormal, normalize the exponent, so that we can encode + // easily. + exponent = static_cast(exponent + 1); + for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; + check_bit = static_cast(check_bit >> 1)) { + exponent = static_cast(exponent - 1); + if (check_bit & significand) break; + } + } + + bool is_nan = + (getBits() & exponent_mask) == exponent_mask && significand != 0; + bool is_inf = + !is_nan && + ((exponent + carried) > static_cast(other_T::exponent_bias) || + (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); + + // If we are Nan or Inf we should pass that through. + if (is_inf) { + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); + return; + } + if (is_nan) { + typename other_T::uint_type shifted_significand; + shifted_significand = static_cast( + negatable_left_shift( + static_cast(other_T::num_fraction_bits) - + static_cast(num_fraction_bits), significand)); + + // We are some sort of Nan. We try to keep the bit-pattern of the Nan + // as close as possible. If we had to shift off bits so we are 0, then we + // just set the last bit. + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | + (shifted_significand == 0 ? 0x1 : shifted_significand)))); + return; + } + + bool round_underflow_up = + isNegative() ? round_dir == kRoundToNegativeInfinity + : round_dir == kRoundToPositiveInfinity; + typedef typename other_T::int_type other_int_type; + // setFromSignUnbiasedExponentAndNormalizedSignificand will + // zero out any underflowing value (but retain the sign). + other.setFromSignUnbiasedExponentAndNormalizedSignificand( + negate, static_cast(exponent), rounded_significand, + round_underflow_up); + return; + } + + private: + T value_; + + static_assert(num_used_bits == + Traits::num_exponent_bits + Traits::num_fraction_bits + 1, + "The number of bits do not fit"); + static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); +}; + +// Returns 4 bits represented by the hex character. +inline uint8_t get_nibble_from_character(int character) { + const char* dec = "0123456789"; + const char* lower = "abcdef"; + const char* upper = "ABCDEF"; + const char* p = nullptr; + if ((p = strchr(dec, character))) { + return static_cast(p - dec); + } else if ((p = strchr(lower, character))) { + return static_cast(p - lower + 0xa); + } else if ((p = strchr(upper, character))) { + return static_cast(p - upper + 0xa); + } + + assert(false && "This was called with a non-hex character"); + return 0; +} + +// Outputs the given HexFloat to the stream. +template +std::ostream& operator<<(std::ostream& os, const HexFloat& value) { + typedef HexFloat HF; + typedef typename HF::uint_type uint_type; + typedef typename HF::int_type int_type; + + static_assert(HF::num_used_bits != 0, + "num_used_bits must be non-zero for a valid float"); + static_assert(HF::num_exponent_bits != 0, + "num_exponent_bits must be non-zero for a valid float"); + static_assert(HF::num_fraction_bits != 0, + "num_fractin_bits must be non-zero for a valid float"); + + const uint_type bits = spvutils::BitwiseCast(value.value()); + const char* const sign = (bits & HF::sign_mask) ? "-" : ""; + const uint_type exponent = static_cast( + (bits & HF::exponent_mask) >> HF::num_fraction_bits); + + uint_type fraction = static_cast((bits & HF::fraction_encode_mask) + << HF::num_overflow_bits); + + const bool is_zero = exponent == 0 && fraction == 0; + const bool is_denorm = exponent == 0 && !is_zero; + + // exponent contains the biased exponent we have to convert it back into + // the normal range. + int_type int_exponent = static_cast(exponent - HF::exponent_bias); + // If the number is all zeros, then we actually have to NOT shift the + // exponent. + int_exponent = is_zero ? 0 : int_exponent; + + // If we are denorm, then start shifting, and decreasing the exponent until + // our leading bit is 1. + + if (is_denorm) { + while ((fraction & HF::fraction_top_bit) == 0) { + fraction = static_cast(fraction << 1); + int_exponent = static_cast(int_exponent - 1); + } + // Since this is denormalized, we have to consume the leading 1 since it + // will end up being implicit. + fraction = static_cast(fraction << 1); // eat the leading 1 + fraction &= HF::fraction_represent_mask; + } + + uint_type fraction_nibbles = HF::fraction_nibbles; + // We do not have to display any trailing 0s, since this represents the + // fractional part. + while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { + // Shift off any trailing values; + fraction = static_cast(fraction >> 4); + --fraction_nibbles; + } + + const auto saved_flags = os.flags(); + const auto saved_fill = os.fill(); + + os << sign << "0x" << (is_zero ? '0' : '1'); + if (fraction_nibbles) { + // Make sure to keep the leading 0s in place, since this is the fractional + // part. + os << "." << std::setw(static_cast(fraction_nibbles)) + << std::setfill('0') << std::hex << fraction; + } + os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; + + os.flags(saved_flags); + os.fill(saved_fill); + + return os; +} + +// Returns true if negate_value is true and the next character on the +// input stream is a plus or minus sign. In that case we also set the fail bit +// on the stream and set the value to the zero value for its type. +template +inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, + HexFloat& value) { + if (negate_value) { + auto next_char = is.peek(); + if (next_char == '-' || next_char == '+') { + // Fail the parse. Emulate standard behaviour by setting the value to + // the zero value, and set the fail bit on the stream. + value = HexFloat(typename HexFloat::uint_type(0)); + is.setstate(std::ios_base::failbit); + return true; + } + } + return false; +} + +// Parses a floating point number from the given stream and stores it into the +// value parameter. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template +inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, + HexFloat& value) { + if (RejectParseDueToLeadingSign(is, negate_value, value)) { + return is; + } + T val; + is >> val; + if (negate_value) { + val = -val; + } + value.set_value(val); + // In the failure case, map -0.0 to 0.0. + if (is.fail() && value.getUnsignedBits() == 0u) { + value = HexFloat(typename HexFloat::uint_type(0)); + } + if (val.isInfinity()) { + // Fail the parse. Emulate standard behaviour by setting the value to + // the closest normal value, and set the fail bit on the stream. + value.set_value((value.isNegative() || negate_value) ? T::lowest() + : T::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Specialization of ParseNormalFloat for FloatProxy values. +// This will parse the float as it were a 32-bit floating point number, +// and then round it down to fit into a Float16 value. +// The number is rounded towards zero. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template <> +inline std::istream& +ParseNormalFloat, HexFloatTraits>>( + std::istream& is, bool negate_value, + HexFloat, HexFloatTraits>>& value) { + // First parse as a 32-bit float. + HexFloat> float_val(0.0f); + ParseNormalFloat(is, negate_value, float_val); + + // Then convert to 16-bit float, saturating at infinities, and + // rounding toward zero. + float_val.castTo(value, kRoundToZero); + + // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the + // fail bit and set the lowest or highest value. + if (Float16::isInfinity(value.value().getAsFloat())) { + value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Reads a HexFloat from the given stream. +// If the float is not encoded as a hex-float then it will be parsed +// as a regular float. +// This may fail if your stream does not support at least one unget. +// Nan values can be encoded with "0x1.p+exponent_bias". +// This would normally overflow a float and round to +// infinity but this special pattern is the exact representation for a NaN, +// and therefore is actually encoded as the correct NaN. To encode inf, +// either 0x0p+exponent_bias can be specified or any exponent greater than +// exponent_bias. +// Examples using IEEE 32-bit float encoding. +// 0x1.0p+128 (+inf) +// -0x1.0p-128 (-inf) +// +// 0x1.1p+128 (+Nan) +// -0x1.1p+128 (-Nan) +// +// 0x1p+129 (+inf) +// -0x1p+129 (-inf) +template +std::istream& operator>>(std::istream& is, HexFloat& value) { + using HF = HexFloat; + using uint_type = typename HF::uint_type; + using int_type = typename HF::int_type; + + value.set_value(static_cast(0.f)); + + if (is.flags() & std::ios::skipws) { + // If the user wants to skip whitespace , then we should obey that. + while (std::isspace(is.peek())) { + is.get(); + } + } + + auto next_char = is.peek(); + bool negate_value = false; + + if (next_char != '-' && next_char != '0') { + return ParseNormalFloat(is, negate_value, value); + } + + if (next_char == '-') { + negate_value = true; + is.get(); + next_char = is.peek(); + } + + if (next_char == '0') { + is.get(); // We may have to unget this. + auto maybe_hex_start = is.peek(); + if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { + is.unget(); + return ParseNormalFloat(is, negate_value, value); + } else { + is.get(); // Throw away the 'x'; + } + } else { + return ParseNormalFloat(is, negate_value, value); + } + + // This "looks" like a hex-float so treat it as one. + bool seen_p = false; + bool seen_dot = false; + uint_type fraction_index = 0; + + uint_type fraction = 0; + int_type exponent = HF::exponent_bias; + + // Strip off leading zeros so we don't have to special-case them later. + while ((next_char = is.peek()) == '0') { + is.get(); + } + + bool is_denorm = + true; // Assume denorm "representation" until we hear otherwise. + // NB: This does not mean the value is actually denorm, + // it just means that it was written 0. + bool bits_written = false; // Stays false until we write a bit. + while (!seen_p && !seen_dot) { + // Handle characters that are left of the fractional part. + if (next_char == '.') { + seen_dot = true; + } else if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + // We know this is not denormalized since we have stripped all leading + // zeroes and we are not a ".". + is_denorm = false; + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; + if (bits_written) { + // If we are here the bits represented belong in the fractional + // part of the float, and we have to adjust the exponent accordingly. + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + exponent = static_cast(exponent + 1); + } + bits_written |= write_bit != 0; + } + } else { + // We have not found our exponent yet, so we have to fail. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + bits_written = false; + while (seen_dot && !seen_p) { + // Handle only fractional parts now. + if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; + bits_written |= write_bit != 0; + if (is_denorm && !bits_written) { + // Handle modifying the exponent here this way we can handle + // an arbitrary number of hex values without overflowing our + // integer. + exponent = static_cast(exponent - 1); + } else { + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + } + } + } else { + // We still have not found our 'p' exponent yet, so this is not a valid + // hex-float. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + + bool seen_sign = false; + int8_t exponent_sign = 1; + int_type written_exponent = 0; + while (true) { + if ((next_char == '-' || next_char == '+')) { + if (seen_sign) { + is.setstate(std::ios::failbit); + return is; + } + seen_sign = true; + exponent_sign = (next_char == '-') ? -1 : 1; + } else if (::isdigit(next_char)) { + // Hex-floats express their exponent as decimal. + written_exponent = static_cast(written_exponent * 10); + written_exponent = + static_cast(written_exponent + (next_char - '0')); + } else { + break; + } + is.get(); + next_char = is.peek(); + } + + written_exponent = static_cast(written_exponent * exponent_sign); + exponent = static_cast(exponent + written_exponent); + + bool is_zero = is_denorm && (fraction == 0); + if (is_denorm && !is_zero) { + fraction = static_cast(fraction << 1); + exponent = static_cast(exponent - 1); + } else if (is_zero) { + exponent = 0; + } + + if (exponent <= 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + fraction |= static_cast(1) << HF::top_bit_left_shift; + } + + fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; + + const int_type max_exponent = + SetBits::get; + + // Handle actual denorm numbers + while (exponent < 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + exponent = static_cast(exponent + 1); + + fraction &= HF::fraction_encode_mask; + if (fraction == 0) { + // We have underflowed our fraction. We should clamp to zero. + is_zero = true; + exponent = 0; + } + } + + // We have overflowed so we should be inf/-inf. + if (exponent > max_exponent) { + exponent = max_exponent; + fraction = 0; + } + + uint_type output_bits = static_cast( + static_cast(negate_value ? 1 : 0) << HF::top_bit_left_shift); + output_bits |= fraction; + + uint_type shifted_exponent = static_cast( + static_cast(exponent << HF::exponent_left_shift) & + HF::exponent_mask); + output_bits |= shifted_exponent; + + T output_float = spvutils::BitwiseCast(output_bits); + value.set_value(output_float); + + return is; +} + +// Writes a FloatProxy value to a stream. +// Zero and normal numbers are printed in the usual notation, but with +// enough digits to fully reproduce the value. Other values (subnormal, +// NaN, and infinity) are printed as a hex float. +template +std::ostream& operator<<(std::ostream& os, const FloatProxy& value) { + auto float_val = value.getAsFloat(); + switch (std::fpclassify(float_val)) { + case FP_ZERO: + case FP_NORMAL: { + auto saved_precision = os.precision(); + os.precision(std::numeric_limits::digits10); + os << float_val; + os.precision(saved_precision); + } break; + default: + os << HexFloat>(value); + break; + } + return os; +} + +template <> +inline std::ostream& operator<<(std::ostream& os, + const FloatProxy& value) { + os << HexFloat>(value); + return os; +} +} + +#endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/spirv.hpp b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/spirv.hpp new file mode 100644 index 0000000..f85469d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/spirv.hpp @@ -0,0 +1,2533 @@ +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010600; +static const unsigned int Revision = 1; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, + SourceLanguageHLSL = 5, + SourceLanguageCPP_for_OpenCL = 6, + SourceLanguageMax = 0x7fffffff, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, + ExecutionModelTaskNV = 5267, + ExecutionModelMeshNV = 5268, + ExecutionModelRayGenerationKHR = 5313, + ExecutionModelRayGenerationNV = 5313, + ExecutionModelIntersectionKHR = 5314, + ExecutionModelIntersectionNV = 5314, + ExecutionModelAnyHitKHR = 5315, + ExecutionModelAnyHitNV = 5315, + ExecutionModelClosestHitKHR = 5316, + ExecutionModelClosestHitNV = 5316, + ExecutionModelMissKHR = 5317, + ExecutionModelMissNV = 5317, + ExecutionModelCallableKHR = 5318, + ExecutionModelCallableNV = 5318, + ExecutionModelTaskEXT = 5364, + ExecutionModelMeshEXT = 5365, + ExecutionModelMax = 0x7fffffff, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, + AddressingModelPhysicalStorageBuffer64EXT = 5348, + AddressingModelMax = 0x7fffffff, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, + MemoryModelVulkanKHR = 3, + MemoryModelMax = 0x7fffffff, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, + ExecutionModeInitializer = 33, + ExecutionModeFinalizer = 34, + ExecutionModeSubgroupSize = 35, + ExecutionModeSubgroupsPerWorkgroup = 36, + ExecutionModeSubgroupsPerWorkgroupId = 37, + ExecutionModeLocalSizeId = 38, + ExecutionModeLocalSizeHintId = 39, + ExecutionModeSubgroupUniformControlFlowKHR = 4421, + ExecutionModePostDepthCoverage = 4446, + ExecutionModeDenormPreserve = 4459, + ExecutionModeDenormFlushToZero = 4460, + ExecutionModeSignedZeroInfNanPreserve = 4461, + ExecutionModeRoundingModeRTE = 4462, + ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, + ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeStencilRefUnchangedFrontAMD = 5079, + ExecutionModeStencilRefGreaterFrontAMD = 5080, + ExecutionModeStencilRefLessFrontAMD = 5081, + ExecutionModeStencilRefUnchangedBackAMD = 5082, + ExecutionModeStencilRefGreaterBackAMD = 5083, + ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeOutputLinesEXT = 5269, + ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesEXT = 5270, + ExecutionModeOutputPrimitivesNV = 5270, + ExecutionModeDerivativeGroupQuadsNV = 5289, + ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesEXT = 5298, + ExecutionModeOutputTrianglesNV = 5298, + ExecutionModePixelInterlockOrderedEXT = 5366, + ExecutionModePixelInterlockUnorderedEXT = 5367, + ExecutionModeSampleInterlockOrderedEXT = 5368, + ExecutionModeSampleInterlockUnorderedEXT = 5369, + ExecutionModeShadingRateInterlockOrderedEXT = 5370, + ExecutionModeShadingRateInterlockUnorderedEXT = 5371, + ExecutionModeSharedLocalMemorySizeINTEL = 5618, + ExecutionModeRoundingModeRTPINTEL = 5620, + ExecutionModeRoundingModeRTNINTEL = 5621, + ExecutionModeFloatingPointModeALTINTEL = 5622, + ExecutionModeFloatingPointModeIEEEINTEL = 5623, + ExecutionModeMaxWorkgroupSizeINTEL = 5893, + ExecutionModeMaxWorkDimINTEL = 5894, + ExecutionModeNoGlobalOffsetINTEL = 5895, + ExecutionModeNumSIMDWorkitemsINTEL = 5896, + ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeMax = 0x7fffffff, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, + StorageClassStorageBuffer = 12, + StorageClassCallableDataKHR = 5328, + StorageClassCallableDataNV = 5328, + StorageClassIncomingCallableDataKHR = 5329, + StorageClassIncomingCallableDataNV = 5329, + StorageClassRayPayloadKHR = 5338, + StorageClassRayPayloadNV = 5338, + StorageClassHitAttributeKHR = 5339, + StorageClassHitAttributeNV = 5339, + StorageClassIncomingRayPayloadKHR = 5342, + StorageClassIncomingRayPayloadNV = 5342, + StorageClassShaderRecordBufferKHR = 5343, + StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, + StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassTaskPayloadWorkgroupEXT = 5402, + StorageClassCodeSectionINTEL = 5605, + StorageClassDeviceOnlyINTEL = 5936, + StorageClassHostOnlyINTEL = 5937, + StorageClassMax = 0x7fffffff, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, + DimMax = 0x7fffffff, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, + SamplerAddressingModeMax = 0x7fffffff, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, + SamplerFilterModeMax = 0x7fffffff, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, + ImageFormatR64ui = 40, + ImageFormatR64i = 41, + ImageFormatMax = 0x7fffffff, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, + ImageChannelOrderABGR = 19, + ImageChannelOrderMax = 0x7fffffff, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeMax = 0x7fffffff, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, + ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, + ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, + ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, + ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsSignExtendShift = 12, + ImageOperandsZeroExtendShift = 13, + ImageOperandsNontemporalShift = 14, + ImageOperandsOffsetsShift = 16, + ImageOperandsMax = 0x7fffffff, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, + ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, + ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, + ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, + ImageOperandsVolatileTexelKHRMask = 0x00000800, + ImageOperandsSignExtendMask = 0x00001000, + ImageOperandsZeroExtendMask = 0x00002000, + ImageOperandsNontemporalMask = 0x00004000, + ImageOperandsOffsetsMask = 0x00010000, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, + FPFastMathModeAllowContractFastINTELShift = 16, + FPFastMathModeAllowReassocINTELShift = 17, + FPFastMathModeMax = 0x7fffffff, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, + FPFastMathModeAllowContractFastINTELMask = 0x00010000, + FPFastMathModeAllowReassocINTELMask = 0x00020000, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, + FPRoundingModeMax = 0x7fffffff, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, + LinkageTypeLinkOnceODR = 2, + LinkageTypeMax = 0x7fffffff, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, + AccessQualifierMax = 0x7fffffff, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeMax = 0x7fffffff, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationUniformId = 27, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, + DecorationMaxByteOffset = 45, + DecorationAlignmentId = 46, + DecorationMaxByteOffsetId = 47, + DecorationNoSignedWrap = 4469, + DecorationNoUnsignedWrap = 4470, + DecorationExplicitInterpAMD = 4999, + DecorationOverrideCoverageNV = 5248, + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveEXT = 5271, + DecorationPerPrimitiveNV = 5271, + DecorationPerViewNV = 5272, + DecorationPerTaskNV = 5273, + DecorationPerVertexKHR = 5285, + DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, + DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, + DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, + DecorationAliasedPointerEXT = 5356, + DecorationBindlessSamplerNV = 5398, + DecorationBindlessImageNV = 5399, + DecorationBoundSamplerNV = 5400, + DecorationBoundImageNV = 5401, + DecorationSIMTCallINTEL = 5599, + DecorationReferencedIndirectlyINTEL = 5602, + DecorationClobberINTEL = 5607, + DecorationSideEffectsINTEL = 5608, + DecorationVectorComputeVariableINTEL = 5624, + DecorationFuncParamIOKindINTEL = 5625, + DecorationVectorComputeFunctionINTEL = 5626, + DecorationStackCallINTEL = 5627, + DecorationGlobalVariableOffsetINTEL = 5628, + DecorationCounterBuffer = 5634, + DecorationHlslCounterBufferGOOGLE = 5634, + DecorationHlslSemanticGOOGLE = 5635, + DecorationUserSemantic = 5635, + DecorationUserTypeGOOGLE = 5636, + DecorationFunctionRoundingModeINTEL = 5822, + DecorationFunctionDenormModeINTEL = 5823, + DecorationRegisterINTEL = 5825, + DecorationMemoryINTEL = 5826, + DecorationNumbanksINTEL = 5827, + DecorationBankwidthINTEL = 5828, + DecorationMaxPrivateCopiesINTEL = 5829, + DecorationSinglepumpINTEL = 5830, + DecorationDoublepumpINTEL = 5831, + DecorationMaxReplicatesINTEL = 5832, + DecorationSimpleDualPortINTEL = 5833, + DecorationMergeINTEL = 5834, + DecorationBankBitsINTEL = 5835, + DecorationForcePow2DepthINTEL = 5836, + DecorationBurstCoalesceINTEL = 5899, + DecorationCacheSizeINTEL = 5900, + DecorationDontStaticallyCoalesceINTEL = 5901, + DecorationPrefetchINTEL = 5902, + DecorationStallEnableINTEL = 5905, + DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationBufferLocationINTEL = 5921, + DecorationIOPipeStorageINTEL = 5944, + DecorationFunctionFloatingPointModeINTEL = 6080, + DecorationSingleElementVectorINTEL = 6085, + DecorationVectorComputeCallableFunctionINTEL = 6087, + DecorationMediaBlockIOINTEL = 6140, + DecorationMax = 0x7fffffff, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, + BuiltInSubgroupEqMask = 4416, + BuiltInSubgroupEqMaskKHR = 4416, + BuiltInSubgroupGeMask = 4417, + BuiltInSubgroupGeMaskKHR = 4417, + BuiltInSubgroupGtMask = 4418, + BuiltInSubgroupGtMaskKHR = 4418, + BuiltInSubgroupLeMask = 4419, + BuiltInSubgroupLeMaskKHR = 4419, + BuiltInSubgroupLtMask = 4420, + BuiltInSubgroupLtMaskKHR = 4420, + BuiltInBaseVertex = 4424, + BuiltInBaseInstance = 4425, + BuiltInDrawIndex = 4426, + BuiltInPrimitiveShadingRateKHR = 4432, + BuiltInDeviceIndex = 4438, + BuiltInViewIndex = 4440, + BuiltInShadingRateKHR = 4444, + BuiltInBaryCoordNoPerspAMD = 4992, + BuiltInBaryCoordNoPerspCentroidAMD = 4993, + BuiltInBaryCoordNoPerspSampleAMD = 4994, + BuiltInBaryCoordSmoothAMD = 4995, + BuiltInBaryCoordSmoothCentroidAMD = 4996, + BuiltInBaryCoordSmoothSampleAMD = 4997, + BuiltInBaryCoordPullModelAMD = 4998, + BuiltInFragStencilRefEXT = 5014, + BuiltInViewportMaskNV = 5253, + BuiltInSecondaryPositionNV = 5257, + BuiltInSecondaryViewportMaskNV = 5258, + BuiltInPositionPerViewNV = 5261, + BuiltInViewportMaskPerViewNV = 5262, + BuiltInFullyCoveredEXT = 5264, + BuiltInTaskCountNV = 5274, + BuiltInPrimitiveCountNV = 5275, + BuiltInPrimitiveIndicesNV = 5276, + BuiltInClipDistancePerViewNV = 5277, + BuiltInCullDistancePerViewNV = 5278, + BuiltInLayerPerViewNV = 5279, + BuiltInMeshViewCountNV = 5280, + BuiltInMeshViewIndicesNV = 5281, + BuiltInBaryCoordKHR = 5286, + BuiltInBaryCoordNV = 5286, + BuiltInBaryCoordNoPerspKHR = 5287, + BuiltInBaryCoordNoPerspNV = 5287, + BuiltInFragSizeEXT = 5292, + BuiltInFragmentSizeNV = 5292, + BuiltInFragInvocationCountEXT = 5293, + BuiltInInvocationsPerPixelNV = 5293, + BuiltInPrimitivePointIndicesEXT = 5294, + BuiltInPrimitiveLineIndicesEXT = 5295, + BuiltInPrimitiveTriangleIndicesEXT = 5296, + BuiltInCullPrimitiveEXT = 5299, + BuiltInLaunchIdKHR = 5319, + BuiltInLaunchIdNV = 5319, + BuiltInLaunchSizeKHR = 5320, + BuiltInLaunchSizeNV = 5320, + BuiltInWorldRayOriginKHR = 5321, + BuiltInWorldRayOriginNV = 5321, + BuiltInWorldRayDirectionKHR = 5322, + BuiltInWorldRayDirectionNV = 5322, + BuiltInObjectRayOriginKHR = 5323, + BuiltInObjectRayOriginNV = 5323, + BuiltInObjectRayDirectionKHR = 5324, + BuiltInObjectRayDirectionNV = 5324, + BuiltInRayTminKHR = 5325, + BuiltInRayTminNV = 5325, + BuiltInRayTmaxKHR = 5326, + BuiltInRayTmaxNV = 5326, + BuiltInInstanceCustomIndexKHR = 5327, + BuiltInInstanceCustomIndexNV = 5327, + BuiltInObjectToWorldKHR = 5330, + BuiltInObjectToWorldNV = 5330, + BuiltInWorldToObjectKHR = 5331, + BuiltInWorldToObjectNV = 5331, + BuiltInHitTNV = 5332, + BuiltInHitKindKHR = 5333, + BuiltInHitKindNV = 5333, + BuiltInCurrentRayTimeNV = 5334, + BuiltInIncomingRayFlagsKHR = 5351, + BuiltInIncomingRayFlagsNV = 5351, + BuiltInRayGeometryIndexKHR = 5352, + BuiltInWarpsPerSMNV = 5374, + BuiltInSMCountNV = 5375, + BuiltInWarpIDNV = 5376, + BuiltInSMIDNV = 5377, + BuiltInCullMaskKHR = 6021, + BuiltInMax = 0x7fffffff, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, + SelectionControlMax = 0x7fffffff, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, + LoopControlDependencyInfiniteShift = 2, + LoopControlDependencyLengthShift = 3, + LoopControlMinIterationsShift = 4, + LoopControlMaxIterationsShift = 5, + LoopControlIterationMultipleShift = 6, + LoopControlPeelCountShift = 7, + LoopControlPartialCountShift = 8, + LoopControlInitiationIntervalINTELShift = 16, + LoopControlMaxConcurrencyINTELShift = 17, + LoopControlDependencyArrayINTELShift = 18, + LoopControlPipelineEnableINTELShift = 19, + LoopControlLoopCoalesceINTELShift = 20, + LoopControlMaxInterleavingINTELShift = 21, + LoopControlSpeculatedIterationsINTELShift = 22, + LoopControlNoFusionINTELShift = 23, + LoopControlMax = 0x7fffffff, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, + LoopControlDependencyInfiniteMask = 0x00000004, + LoopControlDependencyLengthMask = 0x00000008, + LoopControlMinIterationsMask = 0x00000010, + LoopControlMaxIterationsMask = 0x00000020, + LoopControlIterationMultipleMask = 0x00000040, + LoopControlPeelCountMask = 0x00000080, + LoopControlPartialCountMask = 0x00000100, + LoopControlInitiationIntervalINTELMask = 0x00010000, + LoopControlMaxConcurrencyINTELMask = 0x00020000, + LoopControlDependencyArrayINTELMask = 0x00040000, + LoopControlPipelineEnableINTELMask = 0x00080000, + LoopControlLoopCoalesceINTELMask = 0x00100000, + LoopControlMaxInterleavingINTELMask = 0x00200000, + LoopControlSpeculatedIterationsINTELMask = 0x00400000, + LoopControlNoFusionINTELMask = 0x00800000, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, + FunctionControlOptNoneINTELShift = 16, + FunctionControlMax = 0x7fffffff, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, + FunctionControlOptNoneINTELMask = 0x00010000, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, + MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, + MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, + MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsVolatileShift = 15, + MemorySemanticsMax = 0x7fffffff, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, + MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, + MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, + MemorySemanticsMakeVisibleKHRMask = 0x00004000, + MemorySemanticsVolatileMask = 0x00008000, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, + MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, + MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, + MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessMax = 0x7fffffff, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, + MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, + MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, + MemoryAccessNonPrivatePointerKHRMask = 0x00000020, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, + ScopeQueueFamily = 5, + ScopeQueueFamilyKHR = 5, + ScopeShaderCallKHR = 6, + ScopeMax = 0x7fffffff, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, + GroupOperationClusteredReduce = 3, + GroupOperationPartitionedReduceNV = 6, + GroupOperationPartitionedInclusiveScanNV = 7, + GroupOperationPartitionedExclusiveScanNV = 8, + GroupOperationMax = 0x7fffffff, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, + KernelEnqueueFlagsMax = 0x7fffffff, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, + KernelProfilingInfoMax = 0x7fffffff, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, + CapabilitySubgroupDispatch = 58, + CapabilityNamedBarrier = 59, + CapabilityPipeStorage = 60, + CapabilityGroupNonUniform = 61, + CapabilityGroupNonUniformVote = 62, + CapabilityGroupNonUniformArithmetic = 63, + CapabilityGroupNonUniformBallot = 64, + CapabilityGroupNonUniformShuffle = 65, + CapabilityGroupNonUniformShuffleRelative = 66, + CapabilityGroupNonUniformClustered = 67, + CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, + CapabilityUniformDecoration = 71, + CapabilityFragmentShadingRateKHR = 4422, + CapabilitySubgroupBallotKHR = 4423, + CapabilityDrawParameters = 4427, + CapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, + CapabilitySubgroupVoteKHR = 4431, + CapabilityStorageBuffer16BitAccess = 4433, + CapabilityStorageUniformBufferBlock16 = 4433, + CapabilityStorageUniform16 = 4434, + CapabilityUniformAndStorageBuffer16BitAccess = 4434, + CapabilityStoragePushConstant16 = 4435, + CapabilityStorageInputOutput16 = 4436, + CapabilityDeviceGroup = 4437, + CapabilityMultiView = 4439, + CapabilityVariablePointersStorageBuffer = 4441, + CapabilityVariablePointers = 4442, + CapabilityAtomicStorageOps = 4445, + CapabilitySampleMaskPostDepthCoverage = 4447, + CapabilityStorageBuffer8BitAccess = 4448, + CapabilityUniformAndStorageBuffer8BitAccess = 4449, + CapabilityStoragePushConstant8 = 4450, + CapabilityDenormPreserve = 4464, + CapabilityDenormFlushToZero = 4465, + CapabilitySignedZeroInfNanPreserve = 4466, + CapabilityRoundingModeRTE = 4467, + CapabilityRoundingModeRTZ = 4468, + CapabilityRayQueryProvisionalKHR = 4471, + CapabilityRayQueryKHR = 4472, + CapabilityRayTraversalPrimitiveCullingKHR = 4478, + CapabilityRayTracingKHR = 4479, + CapabilityFloat16ImageAMD = 5008, + CapabilityImageGatherBiasLodAMD = 5009, + CapabilityFragmentMaskAMD = 5010, + CapabilityStencilExportEXT = 5013, + CapabilityImageReadWriteLodAMD = 5015, + CapabilityInt64ImageEXT = 5016, + CapabilityShaderClockKHR = 5055, + CapabilitySampleMaskOverrideCoverageNV = 5249, + CapabilityGeometryShaderPassthroughNV = 5251, + CapabilityShaderViewportIndexLayerEXT = 5254, + CapabilityShaderViewportIndexLayerNV = 5254, + CapabilityShaderViewportMaskNV = 5255, + CapabilityShaderStereoViewNV = 5259, + CapabilityPerViewAttributesNV = 5260, + CapabilityFragmentFullyCoveredEXT = 5265, + CapabilityMeshShadingNV = 5266, + CapabilityImageFootprintNV = 5282, + CapabilityMeshShadingEXT = 5283, + CapabilityFragmentBarycentricKHR = 5284, + CapabilityFragmentBarycentricNV = 5284, + CapabilityComputeDerivativeGroupQuadsNV = 5288, + CapabilityFragmentDensityEXT = 5291, + CapabilityShadingRateNV = 5291, + CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, + CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, + CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, + CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, + CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, + CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, + CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, + CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, + CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + CapabilityRayTracingNV = 5340, + CapabilityRayTracingMotionBlurNV = 5341, + CapabilityVulkanMemoryModel = 5345, + CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, + CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, + CapabilityPhysicalStorageBufferAddressesEXT = 5347, + CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilityRayTracingProvisionalKHR = 5353, + CapabilityCooperativeMatrixNV = 5357, + CapabilityFragmentShaderSampleInterlockEXT = 5363, + CapabilityFragmentShaderShadingRateInterlockEXT = 5372, + CapabilityShaderSMBuiltinsNV = 5373, + CapabilityFragmentShaderPixelInterlockEXT = 5378, + CapabilityDemoteToHelperInvocation = 5379, + CapabilityDemoteToHelperInvocationEXT = 5379, + CapabilityBindlessTextureNV = 5390, + CapabilitySubgroupShuffleINTEL = 5568, + CapabilitySubgroupBufferBlockIOINTEL = 5569, + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityRoundToInfinityINTEL = 5582, + CapabilityFloatingPointModeINTEL = 5583, + CapabilityIntegerFunctions2INTEL = 5584, + CapabilityFunctionPointersINTEL = 5603, + CapabilityIndirectReferencesINTEL = 5604, + CapabilityAsmINTEL = 5606, + CapabilityAtomicFloat32MinMaxEXT = 5612, + CapabilityAtomicFloat64MinMaxEXT = 5613, + CapabilityAtomicFloat16MinMaxEXT = 5616, + CapabilityVectorComputeINTEL = 5617, + CapabilityVectorAnyINTEL = 5619, + CapabilityExpectAssumeKHR = 5629, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + CapabilityVariableLengthArrayINTEL = 5817, + CapabilityFunctionFloatControlINTEL = 5821, + CapabilityFPGAMemoryAttributesINTEL = 5824, + CapabilityFPFastMathModeINTEL = 5837, + CapabilityArbitraryPrecisionIntegersINTEL = 5844, + CapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + CapabilityUnstructuredLoopControlsINTEL = 5886, + CapabilityFPGALoopControlsINTEL = 5888, + CapabilityKernelAttributesINTEL = 5892, + CapabilityFPGAKernelAttributesINTEL = 5897, + CapabilityFPGAMemoryAccessesINTEL = 5898, + CapabilityFPGAClusterAttributesINTEL = 5904, + CapabilityLoopFuseINTEL = 5906, + CapabilityFPGABufferLocationINTEL = 5920, + CapabilityArbitraryPrecisionFixedPointINTEL = 5922, + CapabilityUSMStorageClassesINTEL = 5935, + CapabilityIOPipesINTEL = 5943, + CapabilityBlockingPipesINTEL = 5945, + CapabilityFPGARegINTEL = 5948, + CapabilityDotProductInputAll = 6016, + CapabilityDotProductInputAllKHR = 6016, + CapabilityDotProductInput4x8Bit = 6017, + CapabilityDotProductInput4x8BitKHR = 6017, + CapabilityDotProductInput4x8BitPacked = 6018, + CapabilityDotProductInput4x8BitPackedKHR = 6018, + CapabilityDotProduct = 6019, + CapabilityDotProductKHR = 6019, + CapabilityRayCullMaskKHR = 6020, + CapabilityBitInstructions = 6025, + CapabilityAtomicFloat32AddEXT = 6033, + CapabilityAtomicFloat64AddEXT = 6034, + CapabilityLongConstantCompositeINTEL = 6089, + CapabilityOptNoneINTEL = 6094, + CapabilityAtomicFloat16AddEXT = 6095, + CapabilityDebugInfoModuleINTEL = 6114, + CapabilityMax = 0x7fffffff, +}; + +enum RayFlagsShift { + RayFlagsOpaqueKHRShift = 0, + RayFlagsNoOpaqueKHRShift = 1, + RayFlagsTerminateOnFirstHitKHRShift = 2, + RayFlagsSkipClosestHitShaderKHRShift = 3, + RayFlagsCullBackFacingTrianglesKHRShift = 4, + RayFlagsCullFrontFacingTrianglesKHRShift = 5, + RayFlagsCullOpaqueKHRShift = 6, + RayFlagsCullNoOpaqueKHRShift = 7, + RayFlagsSkipTrianglesKHRShift = 8, + RayFlagsSkipAABBsKHRShift = 9, + RayFlagsMax = 0x7fffffff, +}; + +enum RayFlagsMask { + RayFlagsMaskNone = 0, + RayFlagsOpaqueKHRMask = 0x00000001, + RayFlagsNoOpaqueKHRMask = 0x00000002, + RayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + RayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + RayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + RayFlagsCullOpaqueKHRMask = 0x00000040, + RayFlagsCullNoOpaqueKHRMask = 0x00000080, + RayFlagsSkipTrianglesKHRMask = 0x00000100, + RayFlagsSkipAABBsKHRMask = 0x00000200, +}; + +enum RayQueryIntersection { + RayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + RayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + RayQueryIntersectionMax = 0x7fffffff, +}; + +enum RayQueryCommittedIntersectionType { + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + RayQueryCommittedIntersectionTypeMax = 0x7fffffff, +}; + +enum RayQueryCandidateIntersectionType { + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + RayQueryCandidateIntersectionTypeMax = 0x7fffffff, +}; + +enum FragmentShadingRateShift { + FragmentShadingRateVertical2PixelsShift = 0, + FragmentShadingRateVertical4PixelsShift = 1, + FragmentShadingRateHorizontal2PixelsShift = 2, + FragmentShadingRateHorizontal4PixelsShift = 3, + FragmentShadingRateMax = 0x7fffffff, +}; + +enum FragmentShadingRateMask { + FragmentShadingRateMaskNone = 0, + FragmentShadingRateVertical2PixelsMask = 0x00000001, + FragmentShadingRateVertical4PixelsMask = 0x00000002, + FragmentShadingRateHorizontal2PixelsMask = 0x00000004, + FragmentShadingRateHorizontal4PixelsMask = 0x00000008, +}; + +enum FPDenormMode { + FPDenormModePreserve = 0, + FPDenormModeFlushToZero = 1, + FPDenormModeMax = 0x7fffffff, +}; + +enum FPOperationMode { + FPOperationModeIEEE = 0, + FPOperationModeALT = 1, + FPOperationModeMax = 0x7fffffff, +}; + +enum QuantizationModes { + QuantizationModesTRN = 0, + QuantizationModesTRN_ZERO = 1, + QuantizationModesRND = 2, + QuantizationModesRND_ZERO = 3, + QuantizationModesRND_INF = 4, + QuantizationModesRND_MIN_INF = 5, + QuantizationModesRND_CONV = 6, + QuantizationModesRND_CONV_ODD = 7, + QuantizationModesMax = 0x7fffffff, +}; + +enum OverflowModes { + OverflowModesWRAP = 0, + OverflowModesSAT = 1, + OverflowModesSAT_ZERO = 2, + OverflowModesSAT_SYM = 3, + OverflowModesMax = 0x7fffffff, +}; + +enum PackedVectorFormat { + PackedVectorFormatPackedVectorFormat4x8Bit = 0, + PackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + PackedVectorFormatMax = 0x7fffffff, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpTerminateInvocation = 4416, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpTraceRayKHR = 4445, + OpExecuteCallableKHR = 4446, + OpConvertUToAccelerationStructureKHR = 4447, + OpIgnoreIntersectionKHR = 4448, + OpTerminateRayKHR = 4449, + OpSDot = 4450, + OpSDotKHR = 4450, + OpUDot = 4451, + OpUDotKHR = 4451, + OpSUDot = 4452, + OpSUDotKHR = 4452, + OpSDotAccSat = 4453, + OpSDotAccSatKHR = 4453, + OpUDotAccSat = 4454, + OpUDotAccSatKHR = 4454, + OpSUDotAccSat = 4455, + OpSUDotAccSatKHR = 4455, + OpTypeRayQueryKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, + OpImageSampleFootprintNV = 5283, + OpEmitMeshTasksEXT = 5294, + OpSetMeshOutputsEXT = 5295, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTraceMotionNV = 5338, + OpTraceRayMotionNV = 5339, + OpTypeAccelerationStructureKHR = 5341, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocation = 5380, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpConvertUToImageNV = 5391, + OpConvertUToSamplerNV = 5392, + OpConvertImageToUNV = 5393, + OpConvertSamplerToUNV = 5394, + OpConvertUToSampledImageNV = 5395, + OpConvertSampledImageToUNV = 5396, + OpSamplerImageAddressingModeNV = 5397, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpConstantFunctionPointerINTEL = 5600, + OpFunctionPointerCallINTEL = 5601, + OpAsmTargetINTEL = 5609, + OpAsmINTEL = 5610, + OpAsmCallINTEL = 5611, + OpAtomicFMinEXT = 5614, + OpAtomicFMaxEXT = 5615, + OpAssumeTrueKHR = 5630, + OpExpectKHR = 5631, + OpDecorateString = 5632, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, + OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpVariableLengthArrayINTEL = 5818, + OpSaveMemoryINTEL = 5819, + OpRestoreMemoryINTEL = 5820, + OpArbitraryFloatSinCosPiINTEL = 5840, + OpArbitraryFloatCastINTEL = 5841, + OpArbitraryFloatCastFromIntINTEL = 5842, + OpArbitraryFloatCastToIntINTEL = 5843, + OpArbitraryFloatAddINTEL = 5846, + OpArbitraryFloatSubINTEL = 5847, + OpArbitraryFloatMulINTEL = 5848, + OpArbitraryFloatDivINTEL = 5849, + OpArbitraryFloatGTINTEL = 5850, + OpArbitraryFloatGEINTEL = 5851, + OpArbitraryFloatLTINTEL = 5852, + OpArbitraryFloatLEINTEL = 5853, + OpArbitraryFloatEQINTEL = 5854, + OpArbitraryFloatRecipINTEL = 5855, + OpArbitraryFloatRSqrtINTEL = 5856, + OpArbitraryFloatCbrtINTEL = 5857, + OpArbitraryFloatHypotINTEL = 5858, + OpArbitraryFloatSqrtINTEL = 5859, + OpArbitraryFloatLogINTEL = 5860, + OpArbitraryFloatLog2INTEL = 5861, + OpArbitraryFloatLog10INTEL = 5862, + OpArbitraryFloatLog1pINTEL = 5863, + OpArbitraryFloatExpINTEL = 5864, + OpArbitraryFloatExp2INTEL = 5865, + OpArbitraryFloatExp10INTEL = 5866, + OpArbitraryFloatExpm1INTEL = 5867, + OpArbitraryFloatSinINTEL = 5868, + OpArbitraryFloatCosINTEL = 5869, + OpArbitraryFloatSinCosINTEL = 5870, + OpArbitraryFloatSinPiINTEL = 5871, + OpArbitraryFloatCosPiINTEL = 5872, + OpArbitraryFloatASinINTEL = 5873, + OpArbitraryFloatASinPiINTEL = 5874, + OpArbitraryFloatACosINTEL = 5875, + OpArbitraryFloatACosPiINTEL = 5876, + OpArbitraryFloatATanINTEL = 5877, + OpArbitraryFloatATanPiINTEL = 5878, + OpArbitraryFloatATan2INTEL = 5879, + OpArbitraryFloatPowINTEL = 5880, + OpArbitraryFloatPowRINTEL = 5881, + OpArbitraryFloatPowNINTEL = 5882, + OpLoopControlINTEL = 5887, + OpFixedSqrtINTEL = 5923, + OpFixedRecipINTEL = 5924, + OpFixedRsqrtINTEL = 5925, + OpFixedSinINTEL = 5926, + OpFixedCosINTEL = 5927, + OpFixedSinCosINTEL = 5928, + OpFixedSinPiINTEL = 5929, + OpFixedCosPiINTEL = 5930, + OpFixedSinCosPiINTEL = 5931, + OpFixedLogINTEL = 5932, + OpFixedExpINTEL = 5933, + OpPtrCastToCrossWorkgroupINTEL = 5934, + OpCrossWorkgroupCastToPtrINTEL = 5938, + OpReadPipeBlockingINTEL = 5946, + OpWritePipeBlockingINTEL = 5947, + OpFPGARegINTEL = 5949, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpAtomicFAddEXT = 6035, + OpTypeBufferSurfaceINTEL = 6086, + OpTypeStructContinuedINTEL = 6090, + OpConstantCompositeContinuedINTEL = 6091, + OpSpecConstantCompositeContinuedINTEL = 6092, + OpMax = 0x7fffffff, +}; + +#ifdef SPV_ENABLE_UTILITY_CODE +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case OpNop: *hasResult = false; *hasResultType = false; break; + case OpUndef: *hasResult = true; *hasResultType = true; break; + case OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case OpSource: *hasResult = false; *hasResultType = false; break; + case OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case OpName: *hasResult = false; *hasResultType = false; break; + case OpMemberName: *hasResult = false; *hasResultType = false; break; + case OpString: *hasResult = true; *hasResultType = false; break; + case OpLine: *hasResult = false; *hasResultType = false; break; + case OpExtension: *hasResult = false; *hasResultType = false; break; + case OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case OpExtInst: *hasResult = true; *hasResultType = true; break; + case OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case OpCapability: *hasResult = false; *hasResultType = false; break; + case OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case OpTypeBool: *hasResult = true; *hasResultType = false; break; + case OpTypeInt: *hasResult = true; *hasResultType = false; break; + case OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case OpTypeVector: *hasResult = true; *hasResultType = false; break; + case OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case OpTypeImage: *hasResult = true; *hasResultType = false; break; + case OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case OpTypeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case OpTypePointer: *hasResult = true; *hasResultType = false; break; + case OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case OpTypePipe: *hasResult = true; *hasResultType = false; break; + case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpConstant: *hasResult = true; *hasResultType = true; break; + case OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case OpConstantNull: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case OpFunction: *hasResult = true; *hasResultType = true; break; + case OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case OpVariable: *hasResult = true; *hasResultType = true; break; + case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case OpLoad: *hasResult = true; *hasResultType = true; break; + case OpStore: *hasResult = false; *hasResultType = false; break; + case OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case OpAccessChain: *hasResult = true; *hasResultType = true; break; + case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpArrayLength: *hasResult = true; *hasResultType = true; break; + case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpDecorate: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case OpCopyObject: *hasResult = true; *hasResultType = true; break; + case OpTranspose: *hasResult = true; *hasResultType = true; break; + case OpSampledImage: *hasResult = true; *hasResultType = true; break; + case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageFetch: *hasResult = true; *hasResultType = true; break; + case OpImageGather: *hasResult = true; *hasResultType = true; break; + case OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageRead: *hasResult = true; *hasResultType = true; break; + case OpImageWrite: *hasResult = false; *hasResultType = false; break; + case OpImage: *hasResult = true; *hasResultType = true; break; + case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case OpUConvert: *hasResult = true; *hasResultType = true; break; + case OpSConvert: *hasResult = true; *hasResultType = true; break; + case OpFConvert: *hasResult = true; *hasResultType = true; break; + case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case OpBitcast: *hasResult = true; *hasResultType = true; break; + case OpSNegate: *hasResult = true; *hasResultType = true; break; + case OpFNegate: *hasResult = true; *hasResultType = true; break; + case OpIAdd: *hasResult = true; *hasResultType = true; break; + case OpFAdd: *hasResult = true; *hasResultType = true; break; + case OpISub: *hasResult = true; *hasResultType = true; break; + case OpFSub: *hasResult = true; *hasResultType = true; break; + case OpIMul: *hasResult = true; *hasResultType = true; break; + case OpFMul: *hasResult = true; *hasResultType = true; break; + case OpUDiv: *hasResult = true; *hasResultType = true; break; + case OpSDiv: *hasResult = true; *hasResultType = true; break; + case OpFDiv: *hasResult = true; *hasResultType = true; break; + case OpUMod: *hasResult = true; *hasResultType = true; break; + case OpSRem: *hasResult = true; *hasResultType = true; break; + case OpSMod: *hasResult = true; *hasResultType = true; break; + case OpFRem: *hasResult = true; *hasResultType = true; break; + case OpFMod: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case OpDot: *hasResult = true; *hasResultType = true; break; + case OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case OpAny: *hasResult = true; *hasResultType = true; break; + case OpAll: *hasResult = true; *hasResultType = true; break; + case OpIsNan: *hasResult = true; *hasResultType = true; break; + case OpIsInf: *hasResult = true; *hasResultType = true; break; + case OpIsFinite: *hasResult = true; *hasResultType = true; break; + case OpIsNormal: *hasResult = true; *hasResultType = true; break; + case OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case OpOrdered: *hasResult = true; *hasResultType = true; break; + case OpUnordered: *hasResult = true; *hasResultType = true; break; + case OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case OpSelect: *hasResult = true; *hasResultType = true; break; + case OpIEqual: *hasResult = true; *hasResultType = true; break; + case OpINotEqual: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpULessThan: *hasResult = true; *hasResultType = true; break; + case OpSLessThan: *hasResult = true; *hasResultType = true; break; + case OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpNot: *hasResult = true; *hasResultType = true; break; + case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case OpBitReverse: *hasResult = true; *hasResultType = true; break; + case OpBitCount: *hasResult = true; *hasResultType = true; break; + case OpDPdx: *hasResult = true; *hasResultType = true; break; + case OpDPdy: *hasResult = true; *hasResultType = true; break; + case OpFwidth: *hasResult = true; *hasResultType = true; break; + case OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case OpPhi: *hasResult = true; *hasResultType = true; break; + case OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case OpLabel: *hasResult = true; *hasResultType = false; break; + case OpBranch: *hasResult = false; *hasResultType = false; break; + case OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case OpSwitch: *hasResult = false; *hasResultType = false; break; + case OpKill: *hasResult = false; *hasResultType = false; break; + case OpReturn: *hasResult = false; *hasResultType = false; break; + case OpReturnValue: *hasResult = false; *hasResultType = false; break; + case OpUnreachable: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case OpGroupAll: *hasResult = true; *hasResultType = true; break; + case OpGroupAny: *hasResult = true; *hasResultType = true; break; + case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case OpReadPipe: *hasResult = true; *hasResultType = true; break; + case OpWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case OpNoLine: *hasResult = false; *hasResultType = false; break; + case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case OpSizeOf: *hasResult = true; *hasResultType = true; break; + case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case OpDecorateId: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case OpSDot: *hasResult = true; *hasResultType = true; break; + case OpUDot: *hasResult = true; *hasResultType = true; break; + case OpSUDot: *hasResult = true; *hasResultType = true; break; + case OpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case OpTraceNV: *hasResult = false; *hasResultType = false; break; + case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case OpExpectKHR: *hasResult = true; *hasResultType = true; break; + case OpDecorateString: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case OpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case OpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case OpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case OpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case OpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case OpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case OpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } +inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/spvIR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/spvIR.h new file mode 100644 index 0000000..0969127 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/SPIRV/spvIR.h @@ -0,0 +1,520 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// SPIRV-IR +// +// Simple in-memory representation (IR) of SPIRV. Just for holding +// Each function's CFG of blocks. Has this hierarchy: +// - Module, which is a list of +// - Function, which is a list of +// - Block, which is a list of +// - Instruction +// + +#pragma once +#ifndef spvIR_H +#define spvIR_H + +#include "spirv.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +class Block; +class Function; +class Module; + +const Id NoResult = 0; +const Id NoType = 0; + +const Decoration NoPrecision = DecorationMax; + +#ifdef __GNUC__ +# define POTENTIALLY_UNUSED __attribute__((unused)) +#else +# define POTENTIALLY_UNUSED +#endif + +POTENTIALLY_UNUSED +const MemorySemanticsMask MemorySemanticsAllMemory = + (MemorySemanticsMask)(MemorySemanticsUniformMemoryMask | + MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask | + MemorySemanticsImageMemoryMask); + +struct IdImmediate { + bool isId; // true if word is an Id, false if word is an immediate + unsigned word; + IdImmediate(bool i, unsigned w) : isId(i), word(w) {} +}; + +// +// SPIR-V IR instruction. +// + +class Instruction { +public: + Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { } + explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { } + virtual ~Instruction() {} + void addIdOperand(Id id) { + operands.push_back(id); + idOperand.push_back(true); + } + void addImmediateOperand(unsigned int immediate) { + operands.push_back(immediate); + idOperand.push_back(false); + } + void setImmediateOperand(unsigned idx, unsigned int immediate) { + assert(!idOperand[idx]); + operands[idx] = immediate; + } + + void addStringOperand(const char* str) + { + unsigned int word = 0; + unsigned int shiftAmount = 0; + char c; + + do { + c = *(str++); + word |= ((unsigned int)c) << shiftAmount; + shiftAmount += 8; + if (shiftAmount == 32) { + addImmediateOperand(word); + word = 0; + shiftAmount = 0; + } + } while (c != 0); + + // deal with partial last word + if (shiftAmount > 0) { + addImmediateOperand(word); + } + } + bool isIdOperand(int op) const { return idOperand[op]; } + void setBlock(Block* b) { block = b; } + Block* getBlock() const { return block; } + Op getOpCode() const { return opCode; } + int getNumOperands() const + { + assert(operands.size() == idOperand.size()); + return (int)operands.size(); + } + Id getResultId() const { return resultId; } + Id getTypeId() const { return typeId; } + Id getIdOperand(int op) const { + assert(idOperand[op]); + return operands[op]; + } + unsigned int getImmediateOperand(int op) const { + assert(!idOperand[op]); + return operands[op]; + } + + // Write out the binary form. + void dump(std::vector& out) const + { + // Compute the wordCount + unsigned int wordCount = 1; + if (typeId) + ++wordCount; + if (resultId) + ++wordCount; + wordCount += (unsigned int)operands.size(); + + // Write out the beginning of the instruction + out.push_back(((wordCount) << WordCountShift) | opCode); + if (typeId) + out.push_back(typeId); + if (resultId) + out.push_back(resultId); + + // Write out the operands + for (int op = 0; op < (int)operands.size(); ++op) + out.push_back(operands[op]); + } + +protected: + Instruction(const Instruction&); + Id resultId; + Id typeId; + Op opCode; + std::vector operands; // operands, both and immediates (both are unsigned int) + std::vector idOperand; // true for operands that are , false for immediates + Block* block; +}; + +// +// SPIR-V IR block. +// + +class Block { +public: + Block(Id id, Function& parent); + virtual ~Block() + { + } + + Id getId() { return instructions.front()->getResultId(); } + + Function& getParent() const { return parent; } + void addInstruction(std::unique_ptr inst); + void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} + void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } + const std::vector& getPredecessors() const { return predecessors; } + const std::vector& getSuccessors() const { return successors; } + const std::vector >& getInstructions() const { + return instructions; + } + const std::vector >& getLocalVariables() const { return localVariables; } + void setUnreachable() { unreachable = true; } + bool isUnreachable() const { return unreachable; } + // Returns the block's merge instruction, if one exists (otherwise null). + const Instruction* getMergeInstruction() const { + if (instructions.size() < 2) return nullptr; + const Instruction* nextToLast = (instructions.cend() - 2)->get(); + switch (nextToLast->getOpCode()) { + case OpSelectionMerge: + case OpLoopMerge: + return nextToLast; + default: + return nullptr; + } + return nullptr; + } + + // Change this block into a canonical dead merge block. Delete instructions + // as necessary. A canonical dead merge block has only an OpLabel and an + // OpUnreachable. + void rewriteAsCanonicalUnreachableMerge() { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + addInstruction(std::unique_ptr(new Instruction(OpUnreachable))); + } + // Change this block into a canonical dead continue target branching to the + // given header ID. Delete instructions as necessary. A canonical dead continue + // target has only an OpLabel and an unconditional branch back to the corresponding + // header. + void rewriteAsCanonicalUnreachableContinue(Block* header) { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + // Add OpBranch back to the header. + assert(header != nullptr); + Instruction* branch = new Instruction(OpBranch); + branch->addIdOperand(header->getId()); + addInstruction(std::unique_ptr(branch)); + successors.push_back(header); + } + + bool isTerminated() const + { + switch (instructions.back()->getOpCode()) { + case OpBranch: + case OpBranchConditional: + case OpSwitch: + case OpKill: + case OpTerminateInvocation: + case OpReturn: + case OpReturnValue: + case OpUnreachable: + return true; + default: + return false; + } + } + + void dump(std::vector& out) const + { + instructions[0]->dump(out); + for (int i = 0; i < (int)localVariables.size(); ++i) + localVariables[i]->dump(out); + for (int i = 1; i < (int)instructions.size(); ++i) + instructions[i]->dump(out); + } + +protected: + Block(const Block&); + Block& operator=(Block&); + + // To enforce keeping parent and ownership in sync: + friend Function; + + std::vector > instructions; + std::vector predecessors, successors; + std::vector > localVariables; + Function& parent; + + // track whether this block is known to be uncreachable (not necessarily + // true for all unreachable blocks, but should be set at least + // for the extraneous ones introduced by the builder). + bool unreachable; +}; + +// The different reasons for reaching a block in the inReadableOrder traversal. +enum ReachReason { + // Reachable from the entry block via transfers of control, i.e. branches. + ReachViaControlFlow = 0, + // A continue target that is not reachable via control flow. + ReachDeadContinue, + // A merge block that is not reachable via control flow. + ReachDeadMerge +}; + +// Traverses the control-flow graph rooted at root in an order suited for +// readable code generation. Invokes callback at every node in the traversal +// order. The callback arguments are: +// - the block, +// - the reason we reached the block, +// - if the reason was that block is an unreachable continue or unreachable merge block +// then the last parameter is the corresponding header block. +void inReadableOrder(Block* root, std::function callback); + +// +// SPIR-V IR Function. +// + +class Function { +public: + Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent); + virtual ~Function() + { + for (int i = 0; i < (int)parameterInstructions.size(); ++i) + delete parameterInstructions[i]; + + for (int i = 0; i < (int)blocks.size(); ++i) + delete blocks[i]; + } + Id getId() const { return functionInstruction.getResultId(); } + Id getParamId(int p) const { return parameterInstructions[p]->getResultId(); } + Id getParamType(int p) const { return parameterInstructions[p]->getTypeId(); } + + void addBlock(Block* block) { blocks.push_back(block); } + void removeBlock(Block* block) + { + auto found = find(blocks.begin(), blocks.end(), block); + assert(found != blocks.end()); + blocks.erase(found); + delete block; + } + + Module& getParent() const { return parent; } + Block* getEntryBlock() const { return blocks.front(); } + Block* getLastBlock() const { return blocks.back(); } + const std::vector& getBlocks() const { return blocks; } + void addLocalVariable(std::unique_ptr inst); + Id getReturnType() const { return functionInstruction.getTypeId(); } + Id getFuncId() const { return functionInstruction.getResultId(); } + void setReturnPrecision(Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionReturn = true; + } + Decoration getReturnPrecision() const + { return reducedPrecisionReturn ? DecorationRelaxedPrecision : NoPrecision; } + + void setDebugLineInfo(Id fileName, int line, int column) { + lineInstruction = std::unique_ptr{new Instruction(OpLine)}; + lineInstruction->addIdOperand(fileName); + lineInstruction->addImmediateOperand(line); + lineInstruction->addImmediateOperand(column); + } + bool hasDebugLineInfo() const { return lineInstruction != nullptr; } + + void setImplicitThis() { implicitThis = true; } + bool hasImplicitThis() const { return implicitThis; } + + void addParamPrecision(unsigned param, Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionParams.insert(param); + } + Decoration getParamPrecision(unsigned param) const + { + return reducedPrecisionParams.find(param) != reducedPrecisionParams.end() ? + DecorationRelaxedPrecision : NoPrecision; + } + + void dump(std::vector& out) const + { + // OpLine + if (lineInstruction != nullptr) { + lineInstruction->dump(out); + } + + // OpFunction + functionInstruction.dump(out); + + // OpFunctionParameter + for (int p = 0; p < (int)parameterInstructions.size(); ++p) + parameterInstructions[p]->dump(out); + + // Blocks + inReadableOrder(blocks[0], [&out](const Block* b, ReachReason, Block*) { b->dump(out); }); + Instruction end(0, 0, OpFunctionEnd); + end.dump(out); + } + +protected: + Function(const Function&); + Function& operator=(Function&); + + Module& parent; + std::unique_ptr lineInstruction; + Instruction functionInstruction; + std::vector parameterInstructions; + std::vector blocks; + bool implicitThis; // true if this is a member function expecting to be passed a 'this' as the first argument + bool reducedPrecisionReturn; + std::set reducedPrecisionParams; // list of parameter indexes that need a relaxed precision arg +}; + +// +// SPIR-V IR Module. +// + +class Module { +public: + Module() {} + virtual ~Module() + { + // TODO delete things + } + + void addFunction(Function *fun) { functions.push_back(fun); } + + void mapInstruction(Instruction *instruction) + { + spv::Id resultId = instruction->getResultId(); + // map the instruction's result id + if (resultId >= idToInstruction.size()) + idToInstruction.resize(resultId + 16); + idToInstruction[resultId] = instruction; + } + + Instruction* getInstruction(Id id) const { return idToInstruction[id]; } + const std::vector& getFunctions() const { return functions; } + spv::Id getTypeId(Id resultId) const { + return idToInstruction[resultId] == nullptr ? NoType : idToInstruction[resultId]->getTypeId(); + } + StorageClass getStorageClass(Id typeId) const + { + assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer); + return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0); + } + + void dump(std::vector& out) const + { + for (int f = 0; f < (int)functions.size(); ++f) + functions[f]->dump(out); + } + +protected: + Module(const Module&); + std::vector functions; + + // map from result id to instruction having that result id + std::vector idToInstruction; + + // map from a result id to its type id +}; + +// +// Implementation (it's here due to circular type definitions). +// + +// Add both +// - the OpFunction instruction +// - all the OpFunctionParameter instructions +__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent) + : parent(parent), lineInstruction(nullptr), + functionInstruction(id, resultType, OpFunction), implicitThis(false), + reducedPrecisionReturn(false) +{ + // OpFunction + functionInstruction.addImmediateOperand(FunctionControlMaskNone); + functionInstruction.addIdOperand(functionType); + parent.mapInstruction(&functionInstruction); + parent.addFunction(this); + + // OpFunctionParameter + Instruction* typeInst = parent.getInstruction(functionType); + int numParams = typeInst->getNumOperands() - 1; + for (int p = 0; p < numParams; ++p) { + Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter); + parent.mapInstruction(param); + parameterInstructions.push_back(param); + } +} + +__inline void Function::addLocalVariable(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + blocks[0]->addLocalVariable(std::move(inst)); + parent.mapInstruction(raw_instruction); +} + +__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false) +{ + instructions.push_back(std::unique_ptr(new Instruction(id, NoType, OpLabel))); + instructions.back()->setBlock(this); + parent.getParent().mapInstruction(instructions.back().get()); +} + +__inline void Block::addInstruction(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + instructions.push_back(std::move(inst)); + raw_instruction->setBlock(this); + if (raw_instruction->getResultId()) + parent.getParent().mapInstruction(raw_instruction); +} + +} // end spv namespace + +#endif // spvIR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/build_info.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/build_info.h new file mode 100644 index 0000000..e711b20 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/glslang/build_info.h @@ -0,0 +1,62 @@ +// Copyright (C) 2020 The Khronos Group Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of The Khronos Group Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_BUILD_INFO +#define GLSLANG_BUILD_INFO + +#define GLSLANG_VERSION_MAJOR 11 +#define GLSLANG_VERSION_MINOR 12 +#define GLSLANG_VERSION_PATCH 0 +#define GLSLANG_VERSION_FLAVOR "" + +#define GLSLANG_VERSION_GREATER_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) > (patch))))) + +#define GLSLANG_VERSION_GREATER_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH >= (patch)))))) + +#define GLSLANG_VERSION_LESS_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) < (patch))))) + +#define GLSLANG_VERSION_LESS_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH <= (patch)))))) + +#endif // GLSLANG_BUILD_INFO diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/allocator.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/allocator.h new file mode 100644 index 0000000..3a5ebca --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/allocator.h @@ -0,0 +1,448 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_ALLOCATOR_H +#define NCNN_ALLOCATOR_H + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +#include +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// the alignment of all the allocated buffers +#if NCNN_AVX512 +#define NCNN_MALLOC_ALIGN 64 +#elif NCNN_AVX +#define NCNN_MALLOC_ALIGN 32 +#else +#define NCNN_MALLOC_ALIGN 16 +#endif + +// we have some optimized kernels that may overread buffer a bit in loop +// it is common to interleave next-loop data load with arithmetic instructions +// allocating more bytes keeps us safe from SEGV_ACCERR failure +#define NCNN_MALLOC_OVERREAD 64 + +// Aligns a pointer to the specified number of bytes +// ptr Aligned pointer +// n Alignment size that must be a power of two +template +static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) +{ + return (_Tp*)(((size_t)ptr + n - 1) & -n); +} + +// Aligns a buffer size to the specified number of bytes +// The function returns the minimum number that is greater or equal to sz and is divisible by n +// sz Buffer size to align +// n Alignment size that must be a power of two +static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n) +{ + return (sz + n - 1) & -n; +} + +static NCNN_FORCEINLINE void* fastMalloc(size_t size) +{ +#if _MSC_VER + return _aligned_malloc(size, NCNN_MALLOC_ALIGN); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + void* ptr = 0; + if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) + ptr = 0; + return ptr; +#elif __ANDROID__ && __ANDROID_API__ < 17 + return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); +#else + unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); + if (!udata) + return 0; + unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN); + adata[-1] = udata; + return adata; +#endif +} + +static NCNN_FORCEINLINE void fastFree(void* ptr) +{ + if (ptr) + { +#if _MSC_VER + _aligned_free(ptr); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + free(ptr); +#elif __ANDROID__ && __ANDROID_API__ < 17 + free(ptr); +#else + unsigned char* udata = ((unsigned char**)ptr)[-1]; + free(udata); +#endif + } +} + +#if NCNN_THREADS +// exchange-add operation for atomic operations on reference counters +#if defined __riscv && !defined __riscv_atomic +// riscv target without A extension +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) +// atomic increment on the linux version of the Intel(tm) compiler +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) +#elif defined __GNUC__ +#if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) +#ifdef __ATOMIC_ACQ_REL +#define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) +#endif +#else +#if defined __ATOMIC_ACQ_REL && !defined __clang__ +// version for gcc >= 4.7 +#define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) +#endif +#endif +#elif defined _MSC_VER && !defined RC_INVOKED +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) +#else +// thread-unsafe branch +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif +#else // NCNN_THREADS +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif // NCNN_THREADS + +class NCNN_EXPORT Allocator +{ +public: + virtual ~Allocator(); + virtual void* fastMalloc(size_t size) = 0; + virtual void fastFree(void* ptr) = 0; +}; + +class PoolAllocatorPrivate; +class NCNN_EXPORT PoolAllocator : public Allocator +{ +public: + PoolAllocator(); + ~PoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + PoolAllocator(const PoolAllocator&); + PoolAllocator& operator=(const PoolAllocator&); + +private: + PoolAllocatorPrivate* const d; +}; + +class UnlockedPoolAllocatorPrivate; +class NCNN_EXPORT UnlockedPoolAllocator : public Allocator +{ +public: + UnlockedPoolAllocator(); + ~UnlockedPoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + UnlockedPoolAllocator(const UnlockedPoolAllocator&); + UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&); + +private: + UnlockedPoolAllocatorPrivate* const d; +}; + +#if NCNN_VULKAN + +class VulkanDevice; + +class NCNN_EXPORT VkBufferMemory +{ +public: + VkBuffer buffer; + + // the base offset assigned by allocator + size_t offset; + size_t capacity; + + VkDeviceMemory memory; + void* mapped_ptr; + + // buffer state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkPipelineStageFlags stage_flags; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkImageMemory +{ +public: + VkImage image; + VkImageView imageview; + + // underlying info assigned by allocator + int width; + int height; + int depth; + VkFormat format; + + VkDeviceMemory memory; + void* mapped_ptr; + + // the base offset assigned by allocator + size_t bind_offset; + size_t bind_capacity; + + // image state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkImageLayout image_layout; + mutable VkPipelineStageFlags stage_flags; + + // in-execution state, modified by command functions internally + mutable int command_refcount; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkAllocator +{ +public: + explicit VkAllocator(const VulkanDevice* _vkdev); + virtual ~VkAllocator(); + + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size) = 0; + virtual void fastFree(VkBufferMemory* ptr) = 0; + virtual int flush(VkBufferMemory* ptr); + virtual int invalidate(VkBufferMemory* ptr); + + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; + virtual void fastFree(VkImageMemory* ptr) = 0; + +public: + const VulkanDevice* vkdev; + uint32_t buffer_memory_type_index; + uint32_t image_memory_type_index; + uint32_t reserved_type_index; + bool mappable; + bool coherent; + +protected: + VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); + VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); + VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); + + VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); + VkImageView create_imageview(VkImage image, VkFormat format); +}; + +class VkBlobAllocatorPrivate; +class NCNN_EXPORT VkBlobAllocator : public VkAllocator +{ +public: + explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M + virtual ~VkBlobAllocator(); + +public: + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkBlobAllocator(const VkBlobAllocator&); + VkBlobAllocator& operator=(const VkBlobAllocator&); + +private: + VkBlobAllocatorPrivate* const d; +}; + +class VkWeightAllocatorPrivate; +class NCNN_EXPORT VkWeightAllocator : public VkAllocator +{ +public: + explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M + virtual ~VkWeightAllocator(); + +public: + // release all blocks immediately + virtual void clear(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightAllocator(const VkWeightAllocator&); + VkWeightAllocator& operator=(const VkWeightAllocator&); + +private: + VkWeightAllocatorPrivate* const d; +}; + +class VkStagingAllocatorPrivate; +class NCNN_EXPORT VkStagingAllocator : public VkAllocator +{ +public: + explicit VkStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkStagingAllocator(); + +public: + // ratio range 0 ~ 1 + // default cr = 0.75 + void set_size_compare_ratio(float scr); + + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkStagingAllocator(const VkStagingAllocator&); + VkStagingAllocator& operator=(const VkStagingAllocator&); + +private: + VkStagingAllocatorPrivate* const d; +}; + +class VkWeightStagingAllocatorPrivate; +class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator +{ +public: + explicit VkWeightStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkWeightStagingAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightStagingAllocator(const VkWeightStagingAllocator&); + VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&); + +private: + VkWeightStagingAllocatorPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator +{ +public: + VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); + virtual ~VkAndroidHardwareBufferImageAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&); + VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&); + +public: + int init(); + + int width() const; + int height() const; + uint64_t external_format() const; + +public: + AHardwareBuffer* hb; + AHardwareBuffer_Desc bufferDesc; + VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; + VkAndroidHardwareBufferPropertiesANDROID bufferProperties; + VkSamplerYcbcrConversionKHR samplerYcbcrConversion; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_ALLOCATOR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/benchmark.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/benchmark.h new file mode 100644 index 0000000..3d5c0cd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/benchmark.h @@ -0,0 +1,36 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BENCHMARK_H +#define NCNN_BENCHMARK_H + +#include "layer.h" +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +// get now timestamp in ms +NCNN_EXPORT double get_current_time(); + +#if NCNN_BENCHMARK + +NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); +NCNN_EXPORT void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); + +#endif // NCNN_BENCHMARK + +} // namespace ncnn + +#endif // NCNN_BENCHMARK_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/blob.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/blob.h new file mode 100644 index 0000000..c9f144f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/blob.h @@ -0,0 +1,44 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BLOB_H +#define NCNN_BLOB_H + +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT Blob +{ +public: + // empty + Blob(); + +public: +#if NCNN_STRING + // blob name + std::string name; +#endif // NCNN_STRING + // layer index which produce this blob as output + int producer; + // layer index which need this blob as input + int consumer; + // shape hint + Mat shape; +}; + +} // namespace ncnn + +#endif // NCNN_BLOB_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/c_api.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/c_api.h new file mode 100644 index 0000000..ebf5db5 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/c_api.h @@ -0,0 +1,336 @@ +/* Tencent is pleased to support the open source community by making ncnn available. + * + * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +#ifndef NCNN_C_API_H +#define NCNN_C_API_H + +#include "platform.h" + +#if NCNN_C_API + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT const char* ncnn_version(); + +/* allocator api */ +typedef struct __ncnn_allocator_t* ncnn_allocator_t; +struct NCNN_EXPORT __ncnn_allocator_t +{ + void* pthis; + + void* (*fast_malloc)(ncnn_allocator_t allocator, size_t size); + void (*fast_free)(ncnn_allocator_t allocator, void* ptr); +}; + +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_pool_allocator(); +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator(); +NCNN_EXPORT void ncnn_allocator_destroy(ncnn_allocator_t allocator); + +/* option api */ +typedef struct __ncnn_option_t* ncnn_option_t; + +NCNN_EXPORT ncnn_option_t ncnn_option_create(); +NCNN_EXPORT void ncnn_option_destroy(ncnn_option_t opt); + +NCNN_EXPORT int ncnn_option_get_num_threads(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads); + +NCNN_EXPORT int ncnn_option_get_use_local_pool_allocator(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_local_pool_allocator(ncnn_option_t opt, int use_local_pool_allocator); + +NCNN_EXPORT void ncnn_option_set_blob_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_option_set_workspace_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute); + +/* mat api */ +typedef struct __ncnn_mat_t* ncnn_mat_t; + +NCNN_EXPORT ncnn_mat_t ncnn_mat_create(); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_destroy(ncnn_mat_t mat); + +NCNN_EXPORT void ncnn_mat_fill_float(ncnn_mat_t mat, float v); + +NCNN_EXPORT ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d, int c, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_mat_get_dims(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_w(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_h(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_d(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_c(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_elempack(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_cstep(const ncnn_mat_t mat); +NCNN_EXPORT void* ncnn_mat_get_data(const ncnn_mat_t mat); + +NCNN_EXPORT void* ncnn_mat_get_channel_data(const ncnn_mat_t mat, int c); + +#if NCNN_PIXEL + +/* mat pixel api */ +#define NCNN_MAT_PIXEL_RGB 1 +#define NCNN_MAT_PIXEL_BGR 2 +#define NCNN_MAT_PIXEL_GRAY 3 +#define NCNN_MAT_PIXEL_RGBA 4 +#define NCNN_MAT_PIXEL_BGRA 5 +#define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16)) +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int type, int stride); +NCNN_EXPORT void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride); + +#endif /* NCNN_PIXEL */ + +NCNN_EXPORT void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals); + +NCNN_EXPORT void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int elempack, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_option_t opt); + +/* blob api */ +typedef struct __ncnn_blob_t* ncnn_blob_t; + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_blob_get_name(const ncnn_blob_t blob); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_blob_get_producer(const ncnn_blob_t blob); +NCNN_EXPORT int ncnn_blob_get_consumer(const ncnn_blob_t blob); + +NCNN_EXPORT void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int* h, int* c); + +/* paramdict api */ +typedef struct __ncnn_paramdict_t* ncnn_paramdict_t; + +NCNN_EXPORT ncnn_paramdict_t ncnn_paramdict_create(); +NCNN_EXPORT void ncnn_paramdict_destroy(ncnn_paramdict_t pd); + +NCNN_EXPORT int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id); + +NCNN_EXPORT int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def); +NCNN_EXPORT float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float def); +NCNN_EXPORT ncnn_mat_t ncnn_paramdict_get_array(const ncnn_paramdict_t pd, int id, const ncnn_mat_t def); + +NCNN_EXPORT void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i); +NCNN_EXPORT void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f); +NCNN_EXPORT void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, const ncnn_mat_t v); + +/* datareader api */ +typedef struct __ncnn_datareader_t* ncnn_datareader_t; +struct NCNN_EXPORT __ncnn_datareader_t +{ + void* pthis; + +#if NCNN_STRING + int (*scan)(ncnn_datareader_t dr, const char* format, void* p); +#endif /* NCNN_STRING */ + size_t (*read)(ncnn_datareader_t dr, void* buf, size_t size); +}; + +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create(); +#if NCNN_STDIO +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp); +#endif /* NCNN_STDIO */ +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char** mem); +NCNN_EXPORT void ncnn_datareader_destroy(ncnn_datareader_t dr); + +/* modelbin api */ +typedef struct __ncnn_modelbin_t* ncnn_modelbin_t; +struct NCNN_EXPORT __ncnn_modelbin_t +{ + void* pthis; + + ncnn_mat_t (*load_1d)(const ncnn_modelbin_t mb, int w, int type); + ncnn_mat_t (*load_2d)(const ncnn_modelbin_t mb, int w, int h, int type); + ncnn_mat_t (*load_3d)(const ncnn_modelbin_t mb, int w, int h, int c, int type); +}; + +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_datareader_t dr); +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* weights, int n); +NCNN_EXPORT void ncnn_modelbin_destroy(ncnn_modelbin_t mb); + +/* layer api */ +typedef struct __ncnn_layer_t* ncnn_layer_t; +struct NCNN_EXPORT __ncnn_layer_t +{ + void* pthis; + + int (*load_param)(ncnn_layer_t layer, const ncnn_paramdict_t pd); + int (*load_model)(ncnn_layer_t layer, const ncnn_modelbin_t mb); + + int (*create_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + int (*destroy_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + + int (*forward_1)(const ncnn_layer_t layer, const ncnn_mat_t bottom_blob, ncnn_mat_t* top_blob, const ncnn_option_t opt); + int (*forward_n)(const ncnn_layer_t layer, const ncnn_mat_t* bottom_blobs, int n, ncnn_mat_t* top_blobs, int n2, const ncnn_option_t opt); + + int (*forward_inplace_1)(const ncnn_layer_t layer, ncnn_mat_t bottom_top_blob, const ncnn_option_t opt); + int (*forward_inplace_n)(const ncnn_layer_t layer, ncnn_mat_t* bottom_top_blobs, int n, const ncnn_option_t opt); +}; + +NCNN_EXPORT ncnn_layer_t ncnn_layer_create(); +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex); +#if NCNN_STRING +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_type(const char* type); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_layer_destroy(ncnn_layer_t layer); + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_name(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_typeindex(const ncnn_layer_t layer); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_type(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_inplace(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_packing(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_image_storage(const ncnn_layer_t layer); + +NCNN_EXPORT void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_image_storage(ncnn_layer_t layer, int enable); + +NCNN_EXPORT int ncnn_layer_get_bottom_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i); +NCNN_EXPORT int ncnn_layer_get_top_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_top(const ncnn_layer_t layer, int i); + +NCNN_EXPORT void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); +NCNN_EXPORT void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); + +/* layer factory function */ +typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata); +typedef void (*ncnn_layer_destroyer_t)(ncnn_layer_t layer, void* userdata); + +typedef struct __ncnn_net_custom_layer_factory_t* ncnn_net_custom_layer_factory_t; +struct __ncnn_net_custom_layer_factory_t +{ + ncnn_layer_creator_t creator; + ncnn_layer_destroyer_t destroyer; + void* userdata; + ncnn_net_custom_layer_factory_t next; +}; + +/* net api */ +typedef struct __ncnn_net_t* ncnn_net_t; +struct __ncnn_net_t +{ + void* pthis; + + ncnn_net_custom_layer_factory_t custom_layer_factory; +}; + +NCNN_EXPORT ncnn_net_t ncnn_net_create(); +NCNN_EXPORT void ncnn_net_destroy(ncnn_net_t net); + +NCNN_EXPORT ncnn_option_t ncnn_net_get_option(ncnn_net_t net); +NCNN_EXPORT void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* type, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typeindex, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param(ncnn_net_t net, const char* path); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin(ncnn_net_t net, const char* path); +NCNN_EXPORT int ncnn_net_load_model(ncnn_net_t net, const char* path); +#endif /* NCNN_STDIO */ + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem); +#endif /* NCNN_STRING */ +#endif /* NCNN_STDIO */ +NCNN_EXPORT int ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned char* mem); +NCNN_EXPORT int ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +NCNN_EXPORT int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader_t dr); + +NCNN_EXPORT void ncnn_net_clear(ncnn_net_t net); + +NCNN_EXPORT int ncnn_net_get_input_count(const ncnn_net_t net); +NCNN_EXPORT int ncnn_net_get_output_count(const ncnn_net_t net); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_net_get_input_name(const ncnn_net_t net, int i); +NCNN_EXPORT const char* ncnn_net_get_output_name(const ncnn_net_t net, int i); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_get_input_index(const ncnn_net_t net, int i); +NCNN_EXPORT int ncnn_net_get_output_index(const ncnn_net_t net, int i); + +/* extractor api */ +typedef struct __ncnn_extractor_t* ncnn_extractor_t; + +NCNN_EXPORT ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net); +NCNN_EXPORT void ncnn_extractor_destroy(ncnn_extractor_t ex); + +NCNN_EXPORT void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_mat_t* mat); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* NCNN_C_API */ + +#endif /* NCNN_C_API_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/command.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/command.h new file mode 100644 index 0000000..337d085 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/command.h @@ -0,0 +1,136 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_COMMAND_H +#define NCNN_COMMAND_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +namespace ncnn { + +class Pipeline; +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class ImportAndroidHardwareBufferPipeline; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API +class VkComputePrivate; +class NCNN_EXPORT VkCompute +{ +public: + explicit VkCompute(const VulkanDevice* vkdev); + virtual ~VkCompute(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_download(const VkMat& src, Mat& dst, const Option& opt); + + void record_download(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkImageMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkImageMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const Mat& dispatcher); + +#if NCNN_BENCHMARK + void record_write_timestamp(uint32_t query); +#endif // NCNN_BENCHMARK + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst); + + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + int submit_and_wait(); + + int reset(); + +#if NCNN_BENCHMARK + int create_query_pool(uint32_t query_count); + + int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); +#endif // NCNN_BENCHMARK + +protected: + const VulkanDevice* vkdev; + + void barrier_readwrite(const VkMat& binding); + void barrier_readwrite(const VkImageMat& binding); + void barrier_readonly(const VkImageMat& binding); + +private: + VkComputePrivate* const d; +}; + +class VkTransferPrivate; +class NCNN_EXPORT VkTransfer +{ +public: + explicit VkTransfer(const VulkanDevice* vkdev); + virtual ~VkTransfer(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + int submit_and_wait(); + +protected: + const VulkanDevice* vkdev; + +private: + VkTransferPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_COMMAND_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/cpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/cpu.h new file mode 100644 index 0000000..d03e7e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/cpu.h @@ -0,0 +1,173 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_CPU_H +#define NCNN_CPU_H + +#include + +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#endif +#if defined __ANDROID__ || defined __linux__ +#include // cpu_set_t +#endif + +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT CpuSet +{ +public: + CpuSet(); + void enable(int cpu); + void disable(int cpu); + void disable_all(); + bool is_enabled(int cpu) const; + int num_enabled() const; + +public: +#if (defined _WIN32 && !(defined __MINGW32__)) + ULONG_PTR mask; +#endif +#if defined __ANDROID__ || defined __linux__ + cpu_set_t cpu_set; +#endif +#if __APPLE__ + unsigned int policy; +#endif +}; + +// test optional cpu features +// edsp = armv7 edsp +NCNN_EXPORT int cpu_support_arm_edsp(); +// neon = armv7 neon or aarch64 asimd +NCNN_EXPORT int cpu_support_arm_neon(); +// vfpv4 = armv7 fp16 + fma +NCNN_EXPORT int cpu_support_arm_vfpv4(); +// asimdhp = aarch64 asimd half precision +NCNN_EXPORT int cpu_support_arm_asimdhp(); +// asimddp = aarch64 asimd dot product +NCNN_EXPORT int cpu_support_arm_asimddp(); +// asimdfhm = aarch64 asimd fhm +NCNN_EXPORT int cpu_support_arm_asimdfhm(); +// bf16 = aarch64 bf16 +NCNN_EXPORT int cpu_support_arm_bf16(); +// i8mm = aarch64 i8mm +NCNN_EXPORT int cpu_support_arm_i8mm(); +// sve = aarch64 sve +NCNN_EXPORT int cpu_support_arm_sve(); +// sve2 = aarch64 sve2 +NCNN_EXPORT int cpu_support_arm_sve2(); +// svebf16 = aarch64 svebf16 +NCNN_EXPORT int cpu_support_arm_svebf16(); +// svei8mm = aarch64 svei8mm +NCNN_EXPORT int cpu_support_arm_svei8mm(); +// svef32mm = aarch64 svef32mm +NCNN_EXPORT int cpu_support_arm_svef32mm(); + +// avx = x86 avx +NCNN_EXPORT int cpu_support_x86_avx(); +// fma = x86 fma +NCNN_EXPORT int cpu_support_x86_fma(); +// xop = x86 xop +NCNN_EXPORT int cpu_support_x86_xop(); +// f16c = x86 f16c +NCNN_EXPORT int cpu_support_x86_f16c(); +// avx2 = x86 avx2 + fma + f16c +NCNN_EXPORT int cpu_support_x86_avx2(); +// avx_vnni = x86 avx vnni +NCNN_EXPORT int cpu_support_x86_avx_vnni(); +// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl +NCNN_EXPORT int cpu_support_x86_avx512(); +// avx512_vnni = x86 avx512 vnni +NCNN_EXPORT int cpu_support_x86_avx512_vnni(); +// avx512_bf16 = x86 avx512 bf16 +NCNN_EXPORT int cpu_support_x86_avx512_bf16(); +// avx512_fp16 = x86 avx512 fp16 +NCNN_EXPORT int cpu_support_x86_avx512_fp16(); + +// lsx = loongarch lsx +NCNN_EXPORT int cpu_support_loongarch_lsx(); +// lasx = loongarch lasx +NCNN_EXPORT int cpu_support_loongarch_lasx(); + +// msa = mips mas +NCNN_EXPORT int cpu_support_mips_msa(); +// mmi = loongson mmi +NCNN_EXPORT int cpu_support_loongson_mmi(); + +// v = riscv vector +NCNN_EXPORT int cpu_support_riscv_v(); +// zfh = riscv half-precision float +NCNN_EXPORT int cpu_support_riscv_zfh(); +// vlenb = riscv vector length in bytes +NCNN_EXPORT int cpu_riscv_vlenb(); + +// cpu info +NCNN_EXPORT int get_cpu_count(); +NCNN_EXPORT int get_little_cpu_count(); +NCNN_EXPORT int get_big_cpu_count(); + +NCNN_EXPORT int get_physical_cpu_count(); +NCNN_EXPORT int get_physical_little_cpu_count(); +NCNN_EXPORT int get_physical_big_cpu_count(); + +// cpu l2 varies from 64k to 1M, but l3 can be zero +NCNN_EXPORT int get_cpu_level2_cache_size(); +NCNN_EXPORT int get_cpu_level3_cache_size(); + +// bind all threads on little clusters if powersave enabled +// affects HMP arch cpu like ARM big.LITTLE +// only implemented on android at the moment +// switching powersave is expensive and not thread-safe +// 0 = all cores enabled(default) +// 1 = only little clusters enabled +// 2 = only big clusters enabled +// return 0 if success for setter function +NCNN_EXPORT int get_cpu_powersave(); +NCNN_EXPORT int set_cpu_powersave(int powersave); + +// convenient wrapper +NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); + +// set explicit thread affinity +NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); + +// misc function wrapper for openmp routines +NCNN_EXPORT int get_omp_num_threads(); +NCNN_EXPORT void set_omp_num_threads(int num_threads); + +NCNN_EXPORT int get_omp_dynamic(); +NCNN_EXPORT void set_omp_dynamic(int dynamic); + +NCNN_EXPORT int get_omp_thread_num(); + +NCNN_EXPORT int get_kmp_blocktime(); +NCNN_EXPORT void set_kmp_blocktime(int time_ms); + +// need to flush denormals on Intel Chipset. +// Other architectures such as ARM can be added as needed. +// 0 = DAZ OFF, FTZ OFF +// 1 = DAZ ON , FTZ OFF +// 2 = DAZ OFF, FTZ ON +// 3 = DAZ ON, FTZ ON +NCNN_EXPORT int get_flush_denormals(); +NCNN_EXPORT int set_flush_denormals(int flush_denormals); + +} // namespace ncnn + +#endif // NCNN_CPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/datareader.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/datareader.h new file mode 100644 index 0000000..ed2aba3 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/datareader.h @@ -0,0 +1,122 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_DATAREADER_H +#define NCNN_DATAREADER_H + +#include "platform.h" +#if NCNN_STDIO +#include +#endif + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// data read wrapper +class NCNN_EXPORT DataReader +{ +public: + DataReader(); + virtual ~DataReader(); + +#if NCNN_STRING + // parse plain param text + // return 1 if scan success + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + + // read binary param and model data + // return bytes read + virtual size_t read(void* buf, size_t size) const; + + // get model data reference + // return bytes referenced + virtual size_t reference(size_t size, const void** buf) const; +}; + +#if NCNN_STDIO +class DataReaderFromStdioPrivate; +class NCNN_EXPORT DataReaderFromStdio : public DataReader +{ +public: + explicit DataReaderFromStdio(FILE* fp); + virtual ~DataReaderFromStdio(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromStdio(const DataReaderFromStdio&); + DataReaderFromStdio& operator=(const DataReaderFromStdio&); + +private: + DataReaderFromStdioPrivate* const d; +}; +#endif // NCNN_STDIO + +class DataReaderFromMemoryPrivate; +class NCNN_EXPORT DataReaderFromMemory : public DataReader +{ +public: + explicit DataReaderFromMemory(const unsigned char*& mem); + virtual ~DataReaderFromMemory(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + virtual size_t reference(size_t size, const void** buf) const; + +private: + DataReaderFromMemory(const DataReaderFromMemory&); + DataReaderFromMemory& operator=(const DataReaderFromMemory&); + +private: + DataReaderFromMemoryPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +class DataReaderFromAndroidAssetPrivate; +class NCNN_EXPORT DataReaderFromAndroidAsset : public DataReader +{ +public: + explicit DataReaderFromAndroidAsset(AAsset* asset); + virtual ~DataReaderFromAndroidAsset(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromAndroidAsset(const DataReaderFromAndroidAsset&); + DataReaderFromAndroidAsset& operator=(const DataReaderFromAndroidAsset&); + +private: + DataReaderFromAndroidAssetPrivate* const d; +}; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +} // namespace ncnn + +#endif // NCNN_DATAREADER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/gpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/gpu.h new file mode 100644 index 0000000..1cda182 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/gpu.h @@ -0,0 +1,360 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_GPU_H +#define NCNN_GPU_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +#include "vulkan_header_fix.h" + +namespace ncnn { + +// instance +NCNN_EXPORT int create_gpu_instance(); +NCNN_EXPORT void destroy_gpu_instance(); + +// instance extension capability +extern int support_VK_KHR_external_memory_capabilities; +extern int support_VK_KHR_get_physical_device_properties2; +extern int support_VK_KHR_get_surface_capabilities2; +extern int support_VK_KHR_surface; +extern int support_VK_EXT_debug_utils; +#if __ANDROID_API__ >= 26 +extern int support_VK_KHR_android_surface; +#endif // __ANDROID_API__ >= 26 + +// VK_KHR_external_memory_capabilities +extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR; + +// VK_KHR_get_physical_device_properties2 +extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; +extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; +extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; +extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; +extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR; + +// VK_KHR_get_surface_capabilities2 +extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; + +// VK_KHR_surface +extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; +extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; +extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; +extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; + +#if __ANDROID_API__ >= 26 +// VK_KHR_android_surface +extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; +#endif // __ANDROID_API__ >= 26 + +// VK_NV_cooperative_matrix +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; + +// get info +NCNN_EXPORT int get_gpu_count(); +NCNN_EXPORT int get_default_gpu_index(); + +class GpuInfoPrivate; +class NCNN_EXPORT GpuInfo +{ +public: + explicit GpuInfo(); + virtual ~GpuInfo(); + + // vulkan physical device + VkPhysicalDevice physical_device() const; + + // memory properties + const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const; + + // info + uint32_t api_version() const; + uint32_t driver_version() const; + uint32_t vendor_id() const; + uint32_t device_id() const; + const char* device_name() const; + uint8_t* pipeline_cache_uuid() const; + + // 0 = discrete gpu + // 1 = integrated gpu + // 2 = virtual gpu + // 3 = cpu + int type() const; + + // hardware limit + uint32_t max_shared_memory_size() const; + uint32_t max_workgroup_count_x() const; + uint32_t max_workgroup_count_y() const; + uint32_t max_workgroup_count_z() const; + uint32_t max_workgroup_invocations() const; + uint32_t max_workgroup_size_x() const; + uint32_t max_workgroup_size_y() const; + uint32_t max_workgroup_size_z() const; + size_t memory_map_alignment() const; + size_t buffer_offset_alignment() const; + size_t non_coherent_atom_size() const; + size_t buffer_image_granularity() const; + uint32_t max_image_dimension_1d() const; + uint32_t max_image_dimension_2d() const; + uint32_t max_image_dimension_3d() const; + float timestamp_period() const; + + // runtime + uint32_t compute_queue_family_index() const; + uint32_t graphics_queue_family_index() const; + uint32_t transfer_queue_family_index() const; + + uint32_t compute_queue_count() const; + uint32_t graphics_queue_count() const; + uint32_t transfer_queue_count() const; + + // property + bool unified_compute_transfer_queue() const; + + // subgroup + uint32_t subgroup_size() const; + bool support_subgroup_basic() const; + bool support_subgroup_vote() const; + bool support_subgroup_ballot() const; + bool support_subgroup_shuffle() const; + + // bug is not feature + bool bug_storage_buffer_no_l1() const; + bool bug_corrupted_online_pipeline_cache() const; + bool bug_buffer_image_load_zero() const; + + // but sometimes bug is a feature + bool bug_implicit_fp16_arithmetic() const; + + // fp16 and int8 feature + bool support_fp16_packed() const; + bool support_fp16_storage() const; + bool support_fp16_arithmetic() const; + bool support_int8_packed() const; + bool support_int8_storage() const; + bool support_int8_arithmetic() const; + + // ycbcr conversion feature + bool support_ycbcr_conversion() const; + + // cooperative matrix feature + bool support_cooperative_matrix() const; + bool support_cooperative_matrix_16_8_8() const; + + // extension capability + int support_VK_KHR_8bit_storage() const; + int support_VK_KHR_16bit_storage() const; + int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_create_renderpass2() const; + int support_VK_KHR_dedicated_allocation() const; + int support_VK_KHR_descriptor_update_template() const; + int support_VK_KHR_external_memory() const; + int support_VK_KHR_get_memory_requirements2() const; + int support_VK_KHR_maintenance1() const; + int support_VK_KHR_maintenance2() const; + int support_VK_KHR_maintenance3() const; + int support_VK_KHR_multiview() const; + int support_VK_KHR_portability_subset() const; + int support_VK_KHR_push_descriptor() const; + int support_VK_KHR_sampler_ycbcr_conversion() const; + int support_VK_KHR_shader_float16_int8() const; + int support_VK_KHR_shader_float_controls() const; + int support_VK_KHR_storage_buffer_storage_class() const; + int support_VK_KHR_swapchain() const; + int support_VK_EXT_descriptor_indexing() const; + int support_VK_EXT_memory_budget() const; + int support_VK_EXT_queue_family_foreign() const; +#if __ANDROID_API__ >= 26 + int support_VK_ANDROID_external_memory_android_hardware_buffer() const; +#endif // __ANDROID_API__ >= 26 + int support_VK_NV_cooperative_matrix() const; + +private: + GpuInfo(const GpuInfo&); + GpuInfo& operator=(const GpuInfo&); + +private: + friend int create_gpu_instance(); + GpuInfoPrivate* const d; +}; + +NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); + +class VkAllocator; +class VkCompute; +class Option; +class PipelineCache; +class VulkanDevicePrivate; +class NCNN_EXPORT VulkanDevice +{ +public: + VulkanDevice(int device_index = get_default_gpu_index()); + ~VulkanDevice(); + + const GpuInfo& info; + + VkDevice vkdevice() const; + + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; + + // with fixed workgroup size + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const; + + // helper for creating pipeline + int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; + int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; + int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, VkPipeline* pipeline) const; + int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + + uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; + bool is_mappable(uint32_t memory_type_index) const; + bool is_coherent(uint32_t memory_type_index) const; + + VkQueue acquire_queue(uint32_t queue_family_index) const; + void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; + + // allocator on this device + VkAllocator* acquire_blob_allocator() const; + void reclaim_blob_allocator(VkAllocator* allocator) const; + + VkAllocator* acquire_staging_allocator() const; + void reclaim_staging_allocator(VkAllocator* allocator) const; + + // immutable sampler for texelfetch + const VkSampler* immutable_texelfetch_sampler() const; + + // dummy buffer image + VkMat get_dummy_buffer() const; + VkImageMat get_dummy_image() const; + VkImageMat get_dummy_image_readonly() const; + + // pipeline cache on this device + const PipelineCache* get_pipeline_cache() const; + + // test image allocation + bool shape_support_image_storage(const Mat& shape) const; + + // current gpu heap memory budget in MB + uint32_t get_heap_budget() const; + + // utility operator + void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + + // VK_KHR_bind_memory2 + PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; + PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + + // VK_KHR_create_renderpass2 + PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; + PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; + PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; + PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; + + // VK_KHR_descriptor_update_template + PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; + PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; + PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; + + // VK_KHR_get_memory_requirements2 + PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; + PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; + PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; + + // VK_KHR_maintenance1 + PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; + + // VK_KHR_maintenance3 + PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; + + // VK_KHR_push_descriptor + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; + PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; + + // VK_KHR_sampler_ycbcr_conversion + PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; + PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; + + // VK_KHR_swapchain + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkQueuePresentKHR vkQueuePresentKHR; + +#if __ANDROID_API__ >= 26 + // VK_ANDROID_external_memory_android_hardware_buffer + PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; + PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; +#endif // __ANDROID_API__ >= 26 + +protected: + // device extension + int init_device_extension(); + +private: + VulkanDevice(const VulkanDevice&); + VulkanDevice& operator=(const VulkanDevice&); + +private: + VulkanDevicePrivate* const d; +}; + +NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); + +// online spirv compilation +NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector& spirv); + +// info from spirv +class NCNN_EXPORT ShaderInfo +{ +public: + int specialization_count; + int binding_count; + int push_constant_count; + + // 0 = null + // 1 = storage buffer + // 2 = storage image + // 3 = combined image sampler + int binding_types[16]; // 16 is large enough I think ... + + int reserved_0; + int reserved_1; + int reserved_2; + int reserved_3; +}; + +NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info); + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_GPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer.h new file mode 100644 index 0000000..d02f65b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer.h @@ -0,0 +1,214 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_H +#define NCNN_LAYER_H + +#include "mat.h" +#include "modelbin.h" +#include "option.h" +#include "paramdict.h" +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include "command.h" +#include "pipeline.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +class NCNN_EXPORT Layer +{ +public: + // empty + Layer(); + // virtual destructor + virtual ~Layer(); + + // load layer specific parameter from parsed dict + // return 0 if success + virtual int load_param(const ParamDict& pd); + + // load layer specific weight data from model binary + // return 0 if success + virtual int load_model(const ModelBin& mb); + + // layer implementation specific setup + // return 0 if success + virtual int create_pipeline(const Option& opt); + + // layer implementation specific clean + // return 0 if success + virtual int destroy_pipeline(const Option& opt); + +public: + // one input and one output blob + bool one_blob_only; + + // support inplace inference + bool support_inplace; + + // support vulkan compute + bool support_vulkan; + + // accept input blob with packed storage + bool support_packing; + + // accept bf16 + bool support_bf16_storage; + + // accept fp16 + bool support_fp16_storage; + + // accept int8 + bool support_int8_storage; + + // shader image storage + bool support_image_storage; + + // shader tensor storage + bool support_tensor_storage; + + bool support_reserved_00; + + bool support_reserved_0; + bool support_reserved_1; + bool support_reserved_2; + bool support_reserved_3; + bool support_reserved_4; + bool support_reserved_5; + bool support_reserved_6; + bool support_reserved_7; + bool support_reserved_8; + bool support_reserved_9; + + // feature disabled set + int featmask; + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +#if NCNN_VULKAN +public: + // upload weight blob from host to device + virtual int upload_model(VkTransfer& cmd, const Option& opt); + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + +public: + // assigned immediately after creating this layer + const VulkanDevice* vkdev; +#endif // NCNN_VULKAN + +public: + // custom user data + void* userdata; + // layer type index + int typeindex; +#if NCNN_STRING + // layer type name + std::string type; + // layer name + std::string name; +#endif // NCNN_STRING + // blob index which this layer needs as input + std::vector bottoms; + // blob index which this layer produces as output + std::vector tops; + // shape hint + std::vector bottom_shapes; + std::vector top_shapes; +}; + +// layer factory function +typedef Layer* (*layer_creator_func)(void*); +typedef void (*layer_destroyer_func)(Layer*, void*); + +struct layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; +}; + +struct custom_layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + +#if NCNN_STRING +// get layer type from type name +NCNN_EXPORT int layer_to_index(const char* type); +// create layer from type name +NCNN_EXPORT Layer* create_layer(const char* type); +#endif // NCNN_STRING +// create layer from layer type +NCNN_EXPORT Layer* create_layer(int index); + +#define DEFINE_LAYER_CREATOR(name) \ + ::ncnn::Layer* name##_layer_creator(void* /*userdata*/) \ + { \ + return new name; \ + } + +#define DEFINE_LAYER_DESTROYER(name) \ + void name##_layer_destroyer(::ncnn::Layer* layer, void* /*userdata*/) \ + { \ + delete layer; \ + } + +} // namespace ncnn + +#endif // NCNN_LAYER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_shader_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_shader_type.h new file mode 100644 index 0000000..c143e7d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_shader_type.h @@ -0,0 +1,29 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_SHADER_TYPE_H +#define NCNN_LAYER_SHADER_TYPE_H + +namespace ncnn { + +namespace LayerShaderType { +enum LayerShaderType +{ +#include "layer_shader_type_enum.h" +}; +} // namespace LayerShaderType + +} // namespace ncnn + +#endif // NCNN_LAYER_SHADER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_shader_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_shader_type_enum.h new file mode 100644 index 0000000..bad5605 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_shader_type_enum.h @@ -0,0 +1,369 @@ +// Layer Shader Enum header +// +// This file is auto-generated by cmake, don't edit it. + +absval = 0, +absval_pack4 = 1, +absval_pack8 = 2, +batchnorm = 3, +batchnorm_pack4 = 4, +batchnorm_pack8 = 5, +concat = 6, +concat_pack4 = 7, +concat_pack4to1 = 8, +concat_pack8 = 9, +concat_pack8to1 = 10, +concat_pack8to4 = 11, +convolution = 12, +convolution_1x1s1d1 = 13, +convolution_3x3s1d1_winograd23_transform_input = 14, +convolution_3x3s1d1_winograd23_transform_output = 15, +convolution_3x3s1d1_winograd43_transform_input = 16, +convolution_3x3s1d1_winograd43_transform_output = 17, +convolution_3x3s1d1_winograd_gemm = 18, +convolution_gemm = 19, +convolution_pack1to4 = 20, +convolution_pack1to4_1x1s1d1 = 21, +convolution_pack1to4_3x3s1d1_winograd_gemm = 22, +convolution_pack1to4_gemm = 23, +convolution_pack1to8 = 24, +convolution_pack1to8_1x1s1d1 = 25, +convolution_pack1to8_3x3s1d1_winograd_gemm = 26, +convolution_pack1to8_gemm = 27, +convolution_pack4 = 28, +convolution_pack4_1x1s1d1 = 29, +convolution_pack4_1x1s1d1_cm_16_8_8 = 30, +convolution_pack4_3x3s1d1_winograd23_transform_input = 31, +convolution_pack4_3x3s1d1_winograd23_transform_output = 32, +convolution_pack4_3x3s1d1_winograd43_transform_input = 33, +convolution_pack4_3x3s1d1_winograd43_transform_output = 34, +convolution_pack4_3x3s1d1_winograd_gemm = 35, +convolution_pack4_3x3s1d1_winograd_gemm_cm_16_8_8 = 36, +convolution_pack4_gemm = 37, +convolution_pack4_gemm_cm_16_8_8 = 38, +convolution_pack4to1 = 39, +convolution_pack4to1_1x1s1d1 = 40, +convolution_pack4to1_3x3s1d1_winograd_gemm = 41, +convolution_pack4to1_gemm = 42, +convolution_pack4to8 = 43, +convolution_pack4to8_1x1s1d1 = 44, +convolution_pack4to8_3x3s1d1_winograd_gemm = 45, +convolution_pack4to8_gemm = 46, +convolution_pack8 = 47, +convolution_pack8_1x1s1d1 = 48, +convolution_pack8_3x3s1d1_winograd23_transform_input = 49, +convolution_pack8_3x3s1d1_winograd23_transform_output = 50, +convolution_pack8_3x3s1d1_winograd43_transform_input = 51, +convolution_pack8_3x3s1d1_winograd43_transform_output = 52, +convolution_pack8_3x3s1d1_winograd_gemm = 53, +convolution_pack8_gemm = 54, +convolution_pack8to1 = 55, +convolution_pack8to1_1x1s1d1 = 56, +convolution_pack8to1_3x3s1d1_winograd_gemm = 57, +convolution_pack8to1_gemm = 58, +convolution_pack8to4 = 59, +convolution_pack8to4_1x1s1d1 = 60, +convolution_pack8to4_3x3s1d1_winograd_gemm = 61, +convolution_pack8to4_gemm = 62, +crop = 63, +crop_pack1to4 = 64, +crop_pack1to8 = 65, +crop_pack4 = 66, +crop_pack4to1 = 67, +crop_pack4to8 = 68, +crop_pack8 = 69, +crop_pack8to1 = 70, +crop_pack8to4 = 71, +deconvolution = 72, +deconvolution_col2im = 73, +deconvolution_gemm = 74, +deconvolution_pack1to4 = 75, +deconvolution_pack1to4_gemm = 76, +deconvolution_pack1to8 = 77, +deconvolution_pack1to8_gemm = 78, +deconvolution_pack4 = 79, +deconvolution_pack4_col2im = 80, +deconvolution_pack4_gemm = 81, +deconvolution_pack4_gemm_cm_16_8_8 = 82, +deconvolution_pack4to1 = 83, +deconvolution_pack4to1_gemm = 84, +deconvolution_pack4to8 = 85, +deconvolution_pack4to8_gemm = 86, +deconvolution_pack8 = 87, +deconvolution_pack8_col2im = 88, +deconvolution_pack8_gemm = 89, +deconvolution_pack8to1 = 90, +deconvolution_pack8to1_gemm = 91, +deconvolution_pack8to4 = 92, +deconvolution_pack8to4_gemm = 93, +dropout = 94, +dropout_pack4 = 95, +dropout_pack8 = 96, +eltwise = 97, +eltwise_pack4 = 98, +eltwise_pack8 = 99, +elu = 100, +elu_pack4 = 101, +elu_pack8 = 102, +flatten = 103, +flatten_pack1to4 = 104, +flatten_pack1to8 = 105, +flatten_pack4 = 106, +flatten_pack4to8 = 107, +flatten_pack8 = 108, +innerproduct = 109, +innerproduct_gemm = 110, +innerproduct_gemm_wp1to4 = 111, +innerproduct_gemm_wp1to8 = 112, +innerproduct_gemm_wp4 = 113, +innerproduct_gemm_wp4to1 = 114, +innerproduct_gemm_wp4to8 = 115, +innerproduct_gemm_wp8 = 116, +innerproduct_gemm_wp8to1 = 117, +innerproduct_gemm_wp8to4 = 118, +innerproduct_pack1to4 = 119, +innerproduct_pack1to8 = 120, +innerproduct_pack4 = 121, +innerproduct_pack4to1 = 122, +innerproduct_pack4to8 = 123, +innerproduct_pack8 = 124, +innerproduct_pack8to1 = 125, +innerproduct_pack8to4 = 126, +innerproduct_reduce_sum8 = 127, +innerproduct_reduce_sum8_pack4 = 128, +innerproduct_reduce_sum8_pack8 = 129, +innerproduct_sum8 = 130, +innerproduct_sum8_pack1to4 = 131, +innerproduct_sum8_pack1to8 = 132, +innerproduct_sum8_pack4 = 133, +innerproduct_sum8_pack4to1 = 134, +innerproduct_sum8_pack4to8 = 135, +innerproduct_sum8_pack8 = 136, +innerproduct_sum8_pack8to1 = 137, +innerproduct_sum8_pack8to4 = 138, +lrn_norm = 139, +lrn_norm_across_channel_pack4 = 140, +lrn_norm_across_channel_pack8 = 141, +lrn_norm_within_channel_pack4 = 142, +lrn_norm_within_channel_pack8 = 143, +lrn_square_pad = 144, +lrn_square_pad_across_channel_pack4 = 145, +lrn_square_pad_across_channel_pack8 = 146, +lrn_square_pad_within_channel_pack4 = 147, +lrn_square_pad_within_channel_pack8 = 148, +pooling = 149, +pooling_adaptive = 150, +pooling_adaptive_pack4 = 151, +pooling_adaptive_pack8 = 152, +pooling_global = 153, +pooling_global_pack4 = 154, +pooling_global_pack8 = 155, +pooling_pack4 = 156, +pooling_pack8 = 157, +prelu = 158, +prelu_pack4 = 159, +prelu_pack8 = 160, +relu = 161, +relu_pack4 = 162, +relu_pack8 = 163, +reshape = 164, +reshape_pack1to4 = 165, +reshape_pack1to8 = 166, +reshape_pack4 = 167, +reshape_pack4to1 = 168, +reshape_pack4to8 = 169, +reshape_pack8 = 170, +reshape_pack8to1 = 171, +reshape_pack8to4 = 172, +scale = 173, +scale_pack4 = 174, +scale_pack8 = 175, +sigmoid = 176, +sigmoid_pack4 = 177, +sigmoid_pack8 = 178, +slice = 179, +slice_pack1to4 = 180, +slice_pack1to8 = 181, +slice_pack4 = 182, +slice_pack4to8 = 183, +slice_pack8 = 184, +softmax_div_sum = 185, +softmax_div_sum_pack4 = 186, +softmax_div_sum_pack8 = 187, +softmax_exp_sub_max = 188, +softmax_exp_sub_max_pack4 = 189, +softmax_exp_sub_max_pack8 = 190, +softmax_reduce_max = 191, +softmax_reduce_max_pack4 = 192, +softmax_reduce_max_pack8 = 193, +softmax_reduce_sum = 194, +softmax_reduce_sum_pack4 = 195, +softmax_reduce_sum_pack8 = 196, +tanh = 197, +tanh_pack4 = 198, +tanh_pack8 = 199, +binaryop = 200, +binaryop_broadcast_inner = 201, +binaryop_broadcast_inner_pack4 = 202, +binaryop_broadcast_inner_pack8 = 203, +binaryop_broadcast_outer = 204, +binaryop_broadcast_outer_pack4 = 205, +binaryop_broadcast_outer_pack8 = 206, +binaryop_pack4 = 207, +binaryop_pack8 = 208, +unaryop = 209, +unaryop_pack4 = 210, +unaryop_pack8 = 211, +convolutiondepthwise = 212, +convolutiondepthwise_group = 213, +convolutiondepthwise_group_pack1to4 = 214, +convolutiondepthwise_group_pack1to8 = 215, +convolutiondepthwise_group_pack4 = 216, +convolutiondepthwise_group_pack4to1 = 217, +convolutiondepthwise_group_pack4to8 = 218, +convolutiondepthwise_group_pack8 = 219, +convolutiondepthwise_group_pack8to1 = 220, +convolutiondepthwise_group_pack8to4 = 221, +convolutiondepthwise_pack4 = 222, +convolutiondepthwise_pack8 = 223, +padding = 224, +padding_3d = 225, +padding_3d_pack4 = 226, +padding_3d_pack8 = 227, +padding_pack1to4 = 228, +padding_pack1to8 = 229, +padding_pack4 = 230, +padding_pack4to1 = 231, +padding_pack4to8 = 232, +padding_pack8 = 233, +padding_pack8to1 = 234, +padding_pack8to4 = 235, +normalize_coeffs = 236, +normalize_coeffs_pack4 = 237, +normalize_coeffs_pack8 = 238, +normalize_norm = 239, +normalize_norm_pack4 = 240, +normalize_norm_pack8 = 241, +normalize_reduce_sum4_fp16_to_fp32 = 242, +normalize_reduce_sum4_fp16_to_fp32_pack4 = 243, +normalize_reduce_sum4_fp16_to_fp32_pack8 = 244, +normalize_reduce_sum4_fp32 = 245, +normalize_reduce_sum4_fp32_pack4 = 246, +normalize_reduce_sum4_fp32_pack8 = 247, +permute = 248, +permute_pack1to4 = 249, +permute_pack1to8 = 250, +permute_pack4 = 251, +permute_pack4to1 = 252, +permute_pack4to8 = 253, +permute_pack8 = 254, +permute_pack8to1 = 255, +permute_pack8to4 = 256, +priorbox = 257, +priorbox_mxnet = 258, +interp = 259, +interp_bicubic = 260, +interp_bicubic_coeffs = 261, +interp_bicubic_pack4 = 262, +interp_bicubic_pack8 = 263, +interp_pack4 = 264, +interp_pack8 = 265, +deconvolutiondepthwise = 266, +deconvolutiondepthwise_group = 267, +deconvolutiondepthwise_group_pack1to4 = 268, +deconvolutiondepthwise_group_pack1to8 = 269, +deconvolutiondepthwise_group_pack4 = 270, +deconvolutiondepthwise_group_pack4to1 = 271, +deconvolutiondepthwise_group_pack4to8 = 272, +deconvolutiondepthwise_group_pack8 = 273, +deconvolutiondepthwise_group_pack8to1 = 274, +deconvolutiondepthwise_group_pack8to4 = 275, +deconvolutiondepthwise_pack4 = 276, +deconvolutiondepthwise_pack8 = 277, +shufflechannel = 278, +shufflechannel_pack4 = 279, +shufflechannel_pack8 = 280, +instancenorm_coeffs = 281, +instancenorm_coeffs_pack4 = 282, +instancenorm_coeffs_pack8 = 283, +instancenorm_norm = 284, +instancenorm_norm_pack4 = 285, +instancenorm_norm_pack8 = 286, +instancenorm_reduce_mean = 287, +instancenorm_reduce_mean_pack4 = 288, +instancenorm_reduce_mean_pack8 = 289, +instancenorm_reduce_sum4_fp16_to_fp32 = 290, +instancenorm_reduce_sum4_fp16_to_fp32_pack4 = 291, +instancenorm_reduce_sum4_fp16_to_fp32_pack8 = 292, +instancenorm_reduce_sum4_fp32 = 293, +instancenorm_reduce_sum4_fp32_pack4 = 294, +instancenorm_reduce_sum4_fp32_pack8 = 295, +instancenorm_sub_mean_square = 296, +instancenorm_sub_mean_square_pack4 = 297, +instancenorm_sub_mean_square_pack8 = 298, +clip = 299, +clip_pack4 = 300, +clip_pack8 = 301, +reorg = 302, +reorg_pack1to4 = 303, +reorg_pack1to8 = 304, +reorg_pack4 = 305, +reorg_pack4to8 = 306, +reorg_pack8 = 307, +packing = 308, +packing_fp16_to_fp32 = 309, +packing_fp32_to_fp16 = 310, +packing_pack1to4 = 311, +packing_pack1to4_fp16_to_fp32 = 312, +packing_pack1to4_fp32_to_fp16 = 313, +packing_pack1to8 = 314, +packing_pack1to8_fp16_to_fp32 = 315, +packing_pack1to8_fp32_to_fp16 = 316, +packing_pack4 = 317, +packing_pack4_fp16_to_fp32 = 318, +packing_pack4_fp32_to_fp16 = 319, +packing_pack4to1 = 320, +packing_pack4to1_fp16_to_fp32 = 321, +packing_pack4to1_fp32_to_fp16 = 322, +packing_pack4to8 = 323, +packing_pack4to8_fp16_to_fp32 = 324, +packing_pack4to8_fp32_to_fp16 = 325, +packing_pack8 = 326, +packing_pack8_fp16_to_fp32 = 327, +packing_pack8_fp32_to_fp16 = 328, +packing_pack8to1 = 329, +packing_pack8to1_fp16_to_fp32 = 330, +packing_pack8to1_fp32_to_fp16 = 331, +packing_pack8to4 = 332, +packing_pack8to4_fp16_to_fp32 = 333, +packing_pack8to4_fp32_to_fp16 = 334, +cast_fp16_to_fp32 = 335, +cast_fp16_to_fp32_pack4 = 336, +cast_fp16_to_fp32_pack8 = 337, +cast_fp32_to_fp16 = 338, +cast_fp32_to_fp16_pack4 = 339, +cast_fp32_to_fp16_pack8 = 340, +hardsigmoid = 341, +hardsigmoid_pack4 = 342, +hardsigmoid_pack8 = 343, +hardswish = 344, +hardswish_pack4 = 345, +hardswish_pack8 = 346, +pixelshuffle = 347, +pixelshuffle_pack4 = 348, +pixelshuffle_pack4to1 = 349, +pixelshuffle_pack8 = 350, +pixelshuffle_pack8to1 = 351, +pixelshuffle_pack8to4 = 352, +deepcopy = 353, +deepcopy_pack4 = 354, +deepcopy_pack8 = 355, +mish = 356, +mish_pack4 = 357, +mish_pack8 = 358, +swish = 359, +swish_pack4 = 360, +swish_pack8 = 361, +convert_ycbcr = 362, +vulkan_activation = 363, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_type.h new file mode 100644 index 0000000..511c714 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_type.h @@ -0,0 +1,30 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_TYPE_H +#define NCNN_LAYER_TYPE_H + +namespace ncnn { + +namespace LayerType { +enum LayerType +{ +#include "layer_type_enum.h" + CustomBit = (1 << 8), +}; +} // namespace LayerType + +} // namespace ncnn + +#endif // NCNN_LAYER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_type_enum.h new file mode 100644 index 0000000..9c1be52 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/layer_type_enum.h @@ -0,0 +1,105 @@ +// Layer Type Enum header +// +// This file is auto-generated by cmake, don't edit it. + +AbsVal = 0, +ArgMax = 1, +BatchNorm = 2, +Bias = 3, +BNLL = 4, +Concat = 5, +Convolution = 6, +Crop = 7, +Deconvolution = 8, +Dropout = 9, +Eltwise = 10, +ELU = 11, +Embed = 12, +Exp = 13, +Flatten = 14, +InnerProduct = 15, +Input = 16, +Log = 17, +LRN = 18, +MemoryData = 19, +MVN = 20, +Pooling = 21, +Power = 22, +PReLU = 23, +Proposal = 24, +Reduction = 25, +ReLU = 26, +Reshape = 27, +ROIPooling = 28, +Scale = 29, +Sigmoid = 30, +Slice = 31, +Softmax = 32, +Split = 33, +SPP = 34, +TanH = 35, +Threshold = 36, +Tile = 37, +RNN = 38, +LSTM = 39, +BinaryOp = 40, +UnaryOp = 41, +ConvolutionDepthWise = 42, +Padding = 43, +Squeeze = 44, +ExpandDims = 45, +Normalize = 46, +Permute = 47, +PriorBox = 48, +DetectionOutput = 49, +Interp = 50, +DeconvolutionDepthWise = 51, +ShuffleChannel = 52, +InstanceNorm = 53, +Clip = 54, +Reorg = 55, +YoloDetectionOutput = 56, +Quantize = 57, +Dequantize = 58, +Yolov3DetectionOutput = 59, +PSROIPooling = 60, +ROIAlign = 61, +Packing = 62, +Requantize = 63, +Cast = 64, +HardSigmoid = 65, +SELU = 66, +HardSwish = 67, +Noop = 68, +PixelShuffle = 69, +DeepCopy = 70, +Mish = 71, +StatisticsPooling = 72, +Swish = 73, +Gemm = 74, +GroupNorm = 75, +LayerNorm = 76, +Softplus = 77, +GRU = 78, +MultiHeadAttention = 79, +GELU = 80, +Convolution1D = 81, +Pooling1D = 82, +ConvolutionDepthWise1D = 83, +Convolution3D = 84, +ConvolutionDepthWise3D = 85, +Pooling3D = 86, +MatMul = 87, +Deconvolution1D = 88, +DeconvolutionDepthWise1D = 89, +Deconvolution3D = 90, +DeconvolutionDepthWise3D = 91, +Einsum = 92, +DeformableConv2D = 93, +GLU = 94, +Fold = 95, +Unfold = 96, +GridSample = 97, +CumulativeSum = 98, +CopyTo = 99, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/mat.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/mat.h new file mode 100644 index 0000000..c6f59ef --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/mat.h @@ -0,0 +1,1843 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MAT_H +#define NCNN_MAT_H + +#include +#include +#if __ARM_NEON +#include +#endif +#if __SSE2__ +#include +#if __AVX__ +#include +#endif +#endif +#if __mips_msa +#include +#endif +#if __loongarch_sx +#include +#endif +#if __riscv_vector +#include +#include "cpu.h" // cpu_riscv_vlenb() +#endif + +#include "allocator.h" +#include "option.h" +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PIXEL +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + +namespace ncnn { + +#if NCNN_VULKAN +class VkMat; +class VkImageMat; +#endif // NCNN_VULKAN + +// the three dimension matrix +class NCNN_EXPORT Mat +{ +public: + // empty + Mat(); + // vec + Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // image + Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // dim + Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // cube + Mat(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // packed vec + Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed image + Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed dim + Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed cube + Mat(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // copy + Mat(const Mat& m); + // external vec + Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external image + Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external dim + Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external packed vec + Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed image + Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed dim + Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // release + ~Mat(); + // assign + Mat& operator=(const Mat& m); + // set all + void fill(float v); + void fill(int v); +#if __ARM_NEON + void fill(float32x4_t _v); + void fill(uint16x4_t _v); + void fill(int32x4_t _v); + void fill(int32x4_t _v0, int32x4_t _v1); +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + void fill(float16x4_t _v); + void fill(float16x8_t _v); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ + void fill(__m512 _v); +#endif // __AVX512F__ + void fill(__m256 _v, int i = 0); +#endif // __AVX__ + void fill(__m128 _v); + void fill(__m128i _v); +#endif // __SSE2__ +#if __mips_msa + void fill(v4f32 _v); +#endif // __mips_msa +#if __loongarch_sx + void fill(__m128 _v); +#endif //__loongarch_sx +#if __riscv_vector + void fill(vfloat32m1_t _v); + void fill(vuint16m1_t _v); + void fill(vint8m1_t _v); +#if __riscv_zfh + void fill(vfloat16m1_t _v); +#endif // __riscv_zfh +#endif // __riscv_vector + template + void fill(T v); + // deep copy + Mat clone(Allocator* allocator = 0) const; + // deep copy from other mat, inplace + void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0); + // reshape vec + Mat reshape(int w, Allocator* allocator = 0) const; + // reshape image + Mat reshape(int w, int h, Allocator* allocator = 0) const; + // reshape dim + Mat reshape(int w, int h, int c, Allocator* allocator = 0) const; + // reshape cube + Mat reshape(int w, int h, int d, int c, Allocator* allocator = 0) const; + // allocate vec + void create(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate image + void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate dim + void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate like + void create_like(const Mat& m, Allocator* allocator = 0); +#if NCNN_VULKAN + // allocate like + void create_like(const VkMat& m, Allocator* allocator = 0); + // allocate like + void create_like(const VkImageMat& im, Allocator* allocator = 0); +#endif // NCNN_VULKAN + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // data reference + Mat channel(int c); + const Mat channel(int c) const; + Mat depth(int z); + const Mat depth(int z) const; + float* row(int y); + const float* row(int y) const; + template + T* row(int y); + template + const T* row(int y) const; + + // range reference + Mat channel_range(int c, int channels); + const Mat channel_range(int c, int channels) const; + Mat depth_range(int z, int depths); + const Mat depth_range(int z, int depths) const; + Mat row_range(int y, int rows); + const Mat row_range(int y, int rows) const; + Mat range(int x, int n); + const Mat range(int x, int n) const; + + // access raw data + template + operator T*(); + template + operator const T*() const; + + // convenient access float vec element + float& operator[](size_t i); + const float& operator[](size_t i) const; + +#if NCNN_PIXEL + enum PixelType + { + PIXEL_CONVERT_SHIFT = 16, + PIXEL_FORMAT_MASK = 0x0000ffff, + PIXEL_CONVERT_MASK = 0xffff0000, + + PIXEL_RGB = 1, + PIXEL_BGR = 2, + PIXEL_GRAY = 3, + PIXEL_RGBA = 4, + PIXEL_BGRA = 5, + + PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + }; + // convenient construct from pixel data + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0); + // convenient construct from pixel data with stride(bytes-per-row) parameter + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi with stride(bytes-per-row) parameter + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + + // convenient export to pixel data + void to_pixels(unsigned char* pixels, int type) const; + // convenient export to pixel data with stride(bytes-per-row) parameter + void to_pixels(unsigned char* pixels, int type, int stride) const; + // convenient export to pixel data and resize to specific size + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const; + // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 + // convenient construct from android Bitmap + static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0); + // convenient construct from android Bitmap and resize to specific size + static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from android Bitmap roi + static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from android Bitmap roi and resize to specific size + static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient export to android Bitmap and resize to the android Bitmap size + void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + + // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip + void substract_mean_normalize(const float* mean_vals, const float* norm_vals); + + // convenient construct from half precision floating point data + static Mat from_float16(const unsigned short* data, int size); + + // pointer to the data + void* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + Allocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +#if NCNN_VULKAN + +// the three dimension matrix, vulkan version +class NCNN_EXPORT VkMat +{ +public: + // empty + VkMat(); + // vec + VkMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkMat(const VkMat& m); + // external vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkMat(); + // assign + VkMat& operator=(const VkMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkBuffer buffer() const; + size_t buffer_offset() const; + size_t buffer_capacity() const; + + // device buffer + VkBufferMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +class NCNN_EXPORT VkImageMat +{ +public: + // empty + VkImageMat(); + // vec + VkImageMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkImageMat(const VkImageMat& m); + // external vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkImageMat(); + // assign + VkImageMat& operator=(const VkImageMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkImage image() const; + VkImageView imageview() const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + // convenient construct from android hardware buffer + static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + // device image + VkImageMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; +}; + +// type for vulkan specialization constant and push constant +union vk_specialization_type +{ + int i; + float f; + uint32_t u32; +}; +union vk_constant_type +{ + int i; + float f; +}; +#endif // NCNN_VULKAN + +// misc function +#if NCNN_PIXEL +// convert yuv420sp(nv21) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv12) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv21) to rgb with half resize, the faster approximate version +NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// image pixel bilinear resize +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +// image pixel bilinear resize with stride(bytes-per-row) parameter +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +// image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +#endif // NCNN_PIXEL +#if NCNN_PIXEL_ROTATE +// type is the from type, 6 means rotating from 6 to 1 +// +// 1 2 3 4 5 6 7 8 +// +// 888888 888888 88 88 8888888888 88 88 8888888888 +// 88 88 88 88 88 88 88 88 88 88 88 88 +// 8888 8888 8888 8888 88 8888888888 8888888888 88 +// 88 88 88 88 +// 88 88 888888 888888 +// +// ref http://sylvana.net/jpegcrop/exif_orientation.html +// image pixel kanna rotate +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +// image pixel kanna rotate with stride(bytes-per-row) parameter +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +// image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +#endif // NCNN_PIXEL_ROTATE +#if NCNN_PIXEL_AFFINE +// resolve affine transform matrix from rotation angle, scale factor and x y offset +NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm); +// resolve affine transform matrix from two set of points, num_point must be >= 2 +NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm); +// resolve the inversion affine transform matrix +NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv); +// image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +#endif // NCNN_PIXEL_AFFINE +#if NCNN_PIXEL_DRAWING +// draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw line, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// resolve text bounding box size +NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h); +// draw ascii printables and newline, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +#endif // NCNN_PIXEL_DRAWING + +// type conversion +// convert float to half precision floating point +NCNN_EXPORT unsigned short float32_to_float16(float value); +// convert half precision floating point to float +NCNN_EXPORT float float16_to_float32(unsigned short value); +// convert float to brain half +NCNN_EXPORT NCNN_FORCEINLINE unsigned short float32_to_bfloat16(float value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.f = value; + return tmp.u >> 16; +} +// convert brain half to float +NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.u = value << 16; + return tmp.f; +} + +// mat process +enum BorderType +{ + BORDER_CONSTANT = 0, + BORDER_REPLICATE = 1, + BORDER_REFLECT = 2, + BORDER_TRANSPARENT = -233, +}; +NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt = Option()); +NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option()); +NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option()); +NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option()); +NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option()); + +NCNN_FORCEINLINE Mat::Mat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(const Mat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c), cstep(m.cstep) +{ + addref(); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::~Mat() +{ + release(); +} + +NCNN_FORCEINLINE void Mat::fill(float _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + + int i = 0; +#if __ARM_NEON + float32x4_t _c = vdupq_n_f32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_f32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +NCNN_FORCEINLINE void Mat::fill(int _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + + int i = 0; +#if __ARM_NEON + int32x4_t _c = vdupq_n_s32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_s32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +#if __ARM_NEON +NCNN_FORCEINLINE void Mat::fill(float32x4_t _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vst1q_f32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(uint16x4_t _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vst1_u16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v0, int32x4_t _v1) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v0); + vst1q_s32(ptr + 4, _v1); + ptr += 8; + } +} +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +NCNN_FORCEINLINE void Mat::fill(float16x4_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1_f16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(float16x8_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1q_f16(ptr, _v); + ptr += 8; + } +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON + +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m512 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm512_storeu_ps(ptr, _v); + ptr += 16; + } +} +#endif // __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m256 _v, int _i) +{ + // old gcc cannot overload __m128 and __m256 type + // add a dummy int parameter for different mangled function symbol + (void)_i; + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm256_storeu_ps(ptr, _v); + ptr += 8; + } +} +#endif // __AVX__ +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm_storeu_ps(ptr, _v); + ptr += 4; + } +} +NCNN_FORCEINLINE void Mat::fill(__m128i _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + _mm_store_si128((__m128i*)ptr, _v); + ptr += 8; + } +} +#endif // __SSE2__ + +#if __mips_msa +NCNN_FORCEINLINE void Mat::fill(v4f32 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __msa_st_w((v4i32)_v, ptr, 0); + ptr += 4; + } +} +#endif // __mips_msa + +#if __loongarch_sx +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __lsx_vst(_v, ptr, 0); + ptr += 4; + } +} +#endif // __loongarch_sx +#if __riscv_vector +NCNN_FORCEINLINE void Mat::fill(vfloat32m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 4; + const size_t vl = vsetvl_e32m1(packn); + + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vse32_v_f32m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vuint16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vse16_v_u16m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 1; + const size_t vl = vsetvl_e8m1(packn); + + int size = (int)total(); + signed char* ptr = (signed char*)data; + for (int i = 0; i < size; i++) + { + vse8_v_i8m1(ptr, _v, vl); + ptr += packn; + } +} +#if __riscv_zfh +NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vse16_v_f16m1(ptr, _v, vl); + ptr += packn; + } +} +#endif // __riscv_zfh +#endif // __riscv_vector + +template +NCNN_FORCEINLINE void Mat::fill(T _v) +{ + int size = (int)total(); + T* ptr = (T*)data; + for (int i = 0; i < size; i++) + { + ptr[i] = _v; + } +} + +NCNN_FORCEINLINE Mat& Mat::operator=(const Mat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE void Mat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void Mat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator) + allocator->fastFree(data); + else + fastFree(data); + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool Mat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t Mat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int Mat::elembits() const +{ + return elempack ? static_cast(elemsize * 8) / elempack : 0; +} + +NCNN_FORCEINLINE Mat Mat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE Mat Mat::channel(int _c) +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel(int _c) const +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth(int z) +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::depth(int z) const +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE float* Mat::row(int y) +{ + return (float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE const float* Mat::row(int y) const +{ + return (const float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE T* Mat::row(int y) +{ + return (T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE const T* Mat::row(int y) const +{ + return (const T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE Mat Mat::channel_range(int _c, int channels) +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel_range(int _c, int channels) const +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth_range(int z, int depths) +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::depth_range(int z, int depths) const +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::row_range(int y, int rows) +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::row_range(int y, int rows) const +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE Mat Mat::range(int x, int n) +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::range(int x, int n) const +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +template +NCNN_FORCEINLINE Mat::operator T*() +{ + return (T*)data; +} + +template +NCNN_FORCEINLINE Mat::operator const T*() const +{ + return (const T*)data; +} + +NCNN_FORCEINLINE float& Mat::operator[](size_t i) +{ + return ((float*)data)[i]; +} + +NCNN_FORCEINLINE const float& Mat::operator[](size_t i) const +{ + return ((const float*)data)[i]; +} + +#if NCNN_VULKAN + +NCNN_FORCEINLINE VkMat::VkMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(const VkMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); + + cstep = m.cstep; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::~VkMat() +{ + release(); +} + +NCNN_FORCEINLINE VkMat& VkMat::operator=(const VkMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE Mat VkMat::mapped() const +{ + if (!allocator->mappable) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkMat::mapped_ptr() const +{ + if (!allocator->mappable) + return 0; + + return (unsigned char*)data->mapped_ptr + data->offset; +} + +NCNN_FORCEINLINE void VkMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkMat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int VkMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkBuffer VkMat::buffer() const +{ + return data->buffer; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_offset() const +{ + return data->offset; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_capacity() const +{ + return data->capacity; +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(const VkImageMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::~VkImageMat() +{ + release(); +} + +NCNN_FORCEINLINE VkImageMat& VkImageMat::operator=(const VkImageMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + return *this; +} + +NCNN_FORCEINLINE Mat VkImageMat::mapped() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkImageMat::mapped_ptr() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return 0; + + return (unsigned char*)data->mapped_ptr + data->bind_offset; +} + +NCNN_FORCEINLINE void VkImageMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkImageMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkImageMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkImageMat::total() const +{ + return w * h * d * c; +} + +NCNN_FORCEINLINE int VkImageMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkImageMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkImage VkImageMat::image() const +{ + return data->image; +} + +NCNN_FORCEINLINE VkImageView VkImageMat::imageview() const +{ + return data->imageview; +} + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_MAT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/modelbin.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/modelbin.h new file mode 100644 index 0000000..15d2b9c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/modelbin.h @@ -0,0 +1,80 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MODELBIN_H +#define NCNN_MODELBIN_H + +#include "mat.h" + +namespace ncnn { + +class DataReader; +class NCNN_EXPORT ModelBin +{ +public: + ModelBin(); + virtual ~ModelBin(); + // element type + // 0 = auto + // 1 = float32 + // 2 = float16 + // 3 = int8 + // load vec + virtual Mat load(int w, int type) const = 0; + // load image + virtual Mat load(int w, int h, int type) const; + // load dim + virtual Mat load(int w, int h, int c, int type) const; + // load cube + virtual Mat load(int w, int h, int d, int c, int type) const; +}; + +class ModelBinFromDataReaderPrivate; +class NCNN_EXPORT ModelBinFromDataReader : public ModelBin +{ +public: + explicit ModelBinFromDataReader(const DataReader& dr); + virtual ~ModelBinFromDataReader(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromDataReader(const ModelBinFromDataReader&); + ModelBinFromDataReader& operator=(const ModelBinFromDataReader&); + +private: + ModelBinFromDataReaderPrivate* const d; +}; + +class ModelBinFromMatArrayPrivate; +class NCNN_EXPORT ModelBinFromMatArray : public ModelBin +{ +public: + // construct from weight blob array + explicit ModelBinFromMatArray(const Mat* weights); + virtual ~ModelBinFromMatArray(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromMatArray(const ModelBinFromMatArray&); + ModelBinFromMatArray& operator=(const ModelBinFromMatArray&); + +private: + ModelBinFromMatArrayPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_MODELBIN_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/ncnn_export.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/ncnn_export.h new file mode 100644 index 0000000..7343310 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/ncnn_export.h @@ -0,0 +1,42 @@ + +#ifndef NCNN_EXPORT_H +#define NCNN_EXPORT_H + +#ifdef NCNN_STATIC_DEFINE +# define NCNN_EXPORT +# define NCNN_NO_EXPORT +#else +# ifndef NCNN_EXPORT +# ifdef ncnn_EXPORTS + /* We are building this library */ +# define NCNN_EXPORT +# else + /* We are using this library */ +# define NCNN_EXPORT +# endif +# endif + +# ifndef NCNN_NO_EXPORT +# define NCNN_NO_EXPORT +# endif +#endif + +#ifndef NCNN_DEPRECATED +# define NCNN_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef NCNN_DEPRECATED_EXPORT +# define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED +#endif + +#ifndef NCNN_DEPRECATED_NO_EXPORT +# define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef NCNN_NO_DEPRECATED +# define NCNN_NO_DEPRECATED +# endif +#endif + +#endif /* NCNN_EXPORT_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/net.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/net.h new file mode 100644 index 0000000..9407042 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/net.h @@ -0,0 +1,272 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_NET_H +#define NCNN_NET_H + +#include "blob.h" +#include "layer.h" +#include "mat.h" +#include "option.h" +#include "platform.h" + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +#if NCNN_VULKAN +class VkCompute; +#endif // NCNN_VULKAN +class DataReader; +class Extractor; +class NetPrivate; +class NCNN_EXPORT Net +{ +public: + // empty init + Net(); + // clear and destroy + virtual ~Net(); + +public: + // option can be changed before loading + Option opt; + +#if NCNN_VULKAN + // set gpu device by index + void set_vulkan_device(int device_index); + + // set gpu device by device handle, no owner transfer + void set_vulkan_device(const VulkanDevice* vkdev); + + const VulkanDevice* vulkan_device() const; +#endif // NCNN_VULKAN + +#if NCNN_STRING + // register custom layer by layer type name + // return 0 if success + int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + virtual int custom_layer_to_index(const char* type); +#endif // NCNN_STRING + // register custom layer by layer type + // return 0 if success + int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + +#if NCNN_STRING + int load_param(const DataReader& dr); +#endif // NCNN_STRING + + int load_param_bin(const DataReader& dr); + + int load_model(const DataReader& dr); + +#if NCNN_STDIO +#if NCNN_STRING + // load network structure from plain param file + // return 0 if success + int load_param(FILE* fp); + int load_param(const char* protopath); + int load_param_mem(const char* mem); +#endif // NCNN_STRING + // load network structure from binary param file + // return 0 if success + int load_param_bin(FILE* fp); + int load_param_bin(const char* protopath); + + // load network weight data from model file + // return 0 if success + int load_model(FILE* fp); + int load_model(const char* modelpath); +#endif // NCNN_STDIO + + // load network structure from external memory + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_param(const unsigned char* mem); + + // reference network weight data from external memory + // weight data is not copied but referenced + // so external memory should be retained when used + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_model(const unsigned char* mem); + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#if NCNN_STRING + // convenient load network structure from android asset plain param file + int load_param(AAsset* asset); + int load_param(AAssetManager* mgr, const char* assetpath); +#endif // NCNN_STRING + // convenient load network structure from android asset binary param file + int load_param_bin(AAsset* asset); + int load_param_bin(AAssetManager* mgr, const char* assetpath); + + // convenient load network weight data from android asset model file + int load_model(AAsset* asset); + int load_model(AAssetManager* mgr, const char* assetpath); +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + + // unload network structure and weight data + void clear(); + + // construct an Extractor from network + Extractor create_extractor() const; + + // get input/output indexes/names + const std::vector& input_indexes() const; + const std::vector& output_indexes() const; +#if NCNN_STRING + const std::vector& input_names() const; + const std::vector& output_names() const; +#endif + + const std::vector& blobs() const; + const std::vector& layers() const; + + std::vector& mutable_blobs(); + std::vector& mutable_layers(); + +protected: + friend class Extractor; +#if NCNN_STRING + int find_blob_index_by_name(const char* name) const; + int find_layer_index_by_name(const char* name) const; + virtual Layer* create_custom_layer(const char* type); +#endif // NCNN_STRING + virtual Layer* create_custom_layer(int index); + +private: + Net(const Net&); + Net& operator=(const Net&); + +private: + NetPrivate* const d; +}; + +class ExtractorPrivate; +class NCNN_EXPORT Extractor +{ +public: + virtual ~Extractor(); + + // copy + Extractor(const Extractor&); + + // assign + Extractor& operator=(const Extractor&); + + // clear blob mats and alloctors + void clear(); + + // enable light mode + // intermediate blob will be recycled when enabled + // enabled by default + void set_light_mode(bool enable); + + // set thread count for this extractor + // this will overwrite the global setting + // default count is system depended + void set_num_threads(int num_threads); + + // set blob memory allocator + void set_blob_allocator(Allocator* allocator); + + // set workspace memory allocator + void set_workspace_allocator(Allocator* allocator); + +#if NCNN_VULKAN + void set_vulkan_compute(bool enable); + + void set_blob_vkallocator(VkAllocator* allocator); + + void set_workspace_vkallocator(VkAllocator* allocator); + + void set_staging_vkallocator(VkAllocator* allocator); +#endif // NCNN_VULKAN + +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const Mat& in); + + // get result by blob name + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(const char* blob_name, Mat& feat, int type = 0); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const Mat& in); + + // get result by blob index + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(int blob_index, Mat& feat, int type = 0); + +#if NCNN_VULKAN +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); + + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkImageMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkMat& feat, VkCompute& cmd); + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkImageMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_VULKAN + +protected: + friend Extractor Net::create_extractor() const; + Extractor(const Net* net, size_t blob_count); + +private: + ExtractorPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_NET_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/option.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/option.h new file mode 100644 index 0000000..3fda808 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/option.h @@ -0,0 +1,153 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_OPTION_H +#define NCNN_OPTION_H + +#include "platform.h" + +namespace ncnn { + +#if NCNN_VULKAN +class VkAllocator; +class PipelineCache; +#endif // NCNN_VULKAN + +class Allocator; +class NCNN_EXPORT Option +{ +public: + // default option + Option(); + +public: + // light mode + // intermediate blob will be recycled when enabled + // enabled by default + bool lightmode; + + // thread count + // default value is the one returned by get_cpu_count() + int num_threads; + + // blob memory allocator + Allocator* blob_allocator; + + // workspace memory allocator + Allocator* workspace_allocator; + +#if NCNN_VULKAN + // blob memory allocator + VkAllocator* blob_vkallocator; + + // workspace memory allocator + VkAllocator* workspace_vkallocator; + + // staging memory allocator + VkAllocator* staging_vkallocator; + + // pipeline cache + PipelineCache* pipeline_cache; +#endif // NCNN_VULKAN + + // the time openmp threads busy-wait for more work before going to sleep + // default value is 20ms to keep the cores enabled + // without too much extra power consumption afterwards + int openmp_blocktime; + + // enable winograd convolution optimization + // improve convolution 3x3 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_winograd_convolution; + + // enable sgemm convolution optimization + // improve convolution 1x1 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_sgemm_convolution; + + // enable quantized int8 inference + // use low-precision int8 path for quantized model + // changes should be applied before loading network structure and weight + // enabled by default + bool use_int8_inference; + + // enable vulkan compute + bool use_vulkan_compute; + + // enable bf16 data type for storage + // improve most operator performance on all arm devices, may consume more memory + bool use_bf16_storage; + + // enable options for gpu inference + bool use_fp16_packed; + bool use_fp16_storage; + bool use_fp16_arithmetic; + bool use_int8_packed; + bool use_int8_storage; + bool use_int8_arithmetic; + + // enable simd-friendly packed memory layout + // improve all operator performance on all arm devices, will consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_packing_layout; + + bool use_shader_pack8; + + // subgroup option + bool use_subgroup_basic; + bool use_subgroup_vote; + bool use_subgroup_ballot; + bool use_subgroup_shuffle; + + // turn on for adreno + bool use_image_storage; + bool use_tensor_storage; + + bool use_reserved_0; + + // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) + // default value is 3 + // 0 = DAZ OFF, FTZ OFF + // 1 = DAZ ON , FTZ OFF + // 2 = DAZ OFF, FTZ ON + // 3 = DAZ ON, FTZ ON + int flush_denormals; + + bool use_local_pool_allocator; + + // enable local memory optimization for gpu inference + bool use_shader_local_memory; + + // enable cooperative matrix optimization for gpu inference + bool use_cooperative_matrix; + + // more fine-grained control of winograd convolution + bool use_winograd23_convolution; + bool use_winograd43_convolution; + bool use_winograd63_convolution; + + bool use_reserved_6; + bool use_reserved_7; + bool use_reserved_8; + bool use_reserved_9; + bool use_reserved_10; + bool use_reserved_11; +}; + +} // namespace ncnn + +#endif // NCNN_OPTION_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/paramdict.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/paramdict.h new file mode 100644 index 0000000..c2ef160 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/paramdict.h @@ -0,0 +1,73 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PARAMDICT_H +#define NCNN_PARAMDICT_H + +#include "mat.h" + +// at most 32 parameters +#define NCNN_MAX_PARAM_COUNT 32 + +namespace ncnn { + +class DataReader; +class Net; +class ParamDictPrivate; +class NCNN_EXPORT ParamDict +{ +public: + // empty + ParamDict(); + + virtual ~ParamDict(); + + // copy + ParamDict(const ParamDict&); + + // assign + ParamDict& operator=(const ParamDict&); + + // get type + int type(int id) const; + + // get int + int get(int id, int def) const; + // get float + float get(int id, float def) const; + // get array + Mat get(int id, const Mat& def) const; + + // set int + void set(int id, int i); + // set float + void set(int id, float f); + // set array + void set(int id, const Mat& v); + +protected: + friend class Net; + + void clear(); + + int load_param(const DataReader& dr); + int load_param_bin(const DataReader& dr); + +private: + ParamDictPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_PARAMDICT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/pipeline.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/pipeline.h new file mode 100644 index 0000000..c284a14 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/pipeline.h @@ -0,0 +1,113 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINE_H +#define NCNN_PIPELINE_H + +#include "mat.h" +#include "platform.h" +#if NCNN_VULKAN +#include "gpu.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +#if NCNN_VULKAN +class Option; +class PipelinePrivate; +class NCNN_EXPORT Pipeline +{ +public: + explicit Pipeline(const VulkanDevice* vkdev); + virtual ~Pipeline(); + +public: + void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4); + void set_optimal_local_size_xyz(const Mat& local_size_xyz); + void set_local_size_xyz(int w, int h, int c); + + int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations); + + int create(int shader_type_index, const Option& opt, const std::vector& specializations); + +public: + VkShaderModule shader_module() const; + VkDescriptorSetLayout descriptorset_layout() const; + VkPipelineLayout pipeline_layout() const; + VkPipeline pipeline() const; + VkDescriptorUpdateTemplateKHR descriptor_update_template() const; + + const ShaderInfo& shader_info() const; + + uint32_t local_size_x() const; + uint32_t local_size_y() const; + uint32_t local_size_z() const; + +protected: + void set_shader_module(VkShaderModule shader_module); + void set_descriptorset_layout(VkDescriptorSetLayout descriptorset_layout); + void set_pipeline_layout(VkPipelineLayout pipeline_layout); + void set_pipeline(VkPipeline pipeline); + void set_descriptor_update_template(VkDescriptorUpdateTemplateKHR descriptor_update_template); + + void set_shader_info(const ShaderInfo& shader_info); + +public: + const VulkanDevice* vkdev; + +private: + Pipeline(const Pipeline&); + Pipeline& operator=(const Pipeline&); + +private: + PipelinePrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class VkCompute; +class NCNN_EXPORT ImportAndroidHardwareBufferPipeline : private Pipeline +{ +public: + explicit ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev); + virtual ~ImportAndroidHardwareBufferPipeline(); + + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt); + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt); + void destroy(); + + friend class VkCompute; + +protected: + int create_shader_module(const Option& opt); + int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator); + int create_descriptorset_layout(); + +public: + int type_to; + int rotate_from; + bool need_resize; + + VkSampler sampler; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/pipelinecache.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/pipelinecache.h new file mode 100644 index 0000000..bb6b8fb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/pipelinecache.h @@ -0,0 +1,85 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINECACHE_H +#define NCNN_PIPELINECACHE_H + +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#include "mat.h" +#include "gpu.h" + +namespace ncnn { + +#if NCNN_VULKAN + +class VulkanDevice; +class PipelineCachePrivate; +class NCNN_EXPORT PipelineCache +{ +public: + explicit PipelineCache(const VulkanDevice* _vkdev); + + virtual ~PipelineCache(); + + void clear(); + + int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + + int get_pipeline(int shader_type_index, const Option& opt, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + +protected: + int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* _shader_module, ShaderInfo& si) const; + + int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector& specializations, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + +protected: + const VulkanDevice* vkdev; + +private: + PipelineCache(const PipelineCache&); + PipelineCache& operator=(const PipelineCache&); + +private: + PipelineCachePrivate* const d; +}; + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINECACHE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/platform.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/platform.h new file mode 100644 index 0000000..ee8bff9 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/platform.h @@ -0,0 +1,286 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PLATFORM_H +#define NCNN_PLATFORM_H + +#define NCNN_STDIO 1 +#define NCNN_STRING 1 +#define NCNN_SIMPLEOCV 0 +#define NCNN_SIMPLEOMP 0 +#define NCNN_SIMPLESTL 0 +#define NCNN_THREADS 1 +#define NCNN_BENCHMARK 0 +#define NCNN_C_API 1 +#define NCNN_PLATFORM_API 1 +#define NCNN_PIXEL 1 +#define NCNN_PIXEL_ROTATE 1 +#define NCNN_PIXEL_AFFINE 1 +#define NCNN_PIXEL_DRAWING 1 +#define NCNN_VULKAN 1 +#define NCNN_SYSTEM_GLSLANG 0 +#define NCNN_RUNTIME_CPU 1 +#define NCNN_GNU_INLINE_ASM 1 +#define NCNN_AVX 0 +#define NCNN_XOP 0 +#define NCNN_FMA 0 +#define NCNN_F16C 0 +#define NCNN_AVX2 0 +#define NCNN_AVXVNNI 0 +#define NCNN_AVX512 0 +#define NCNN_AVX512VNNI 0 +#define NCNN_AVX512BF16 0 +#define NCNN_AVX512FP16 0 +#define NCNN_VFPV4 0 +#if __aarch64__ +#define NCNN_ARM82 1 +#define NCNN_ARM82DOT 1 +#define NCNN_ARM82FP16FML 1 +#define NCNN_ARM84BF16 1 +#define NCNN_ARM84I8MM 1 +#define NCNN_ARM86SVE 1 +#define NCNN_ARM86SVE2 1 +#define NCNN_ARM86SVEBF16 1 +#define NCNN_ARM86SVEI8MM 1 +#define NCNN_ARM86SVEF32MM 1 +#endif // __aarch64__ +#define NCNN_MSA 0 +#define NCNN_LSX 0 +#define NCNN_MMI 0 +#define NCNN_RVV 0 +#define NCNN_INT8 1 +#define NCNN_BF16 1 +#define NCNN_FORCE_INLINE 1 + +#define NCNN_VERSION_STRING "1.0.20230223" + +#include "ncnn_export.h" + +#ifdef __cplusplus + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#include +#else +#include +#endif +#endif // NCNN_THREADS + +#if __ANDROID_API__ >= 26 +#define VK_USE_PLATFORM_ANDROID_KHR +#endif // __ANDROID_API__ >= 26 + +namespace ncnn { + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { InitializeSRWLock(&srwlock); } + ~Mutex() {} + void lock_() { AcquireSRWLockExclusive(&srwlock); } + void unlock() { ReleaseSRWLockExclusive(&srwlock); } +private: + friend class ConditionVariable; + // NOTE SRWLock is available from windows vista + SRWLOCK srwlock; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { InitializeConditionVariable(&condvar); } + ~ConditionVariable() {} + void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } + void broadcast() { WakeAllConditionVariable(&condvar); } + void signal() { WakeConditionVariable(&condvar); } +private: + CONDITION_VARIABLE condvar; +}; + +static unsigned __stdcall start_wrapper(void* args); +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } + ~Thread() {} + void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } +private: + friend unsigned __stdcall start_wrapper(void* args) + { + Thread* t = (Thread*)args; + t->_start(t->_args); + return 0; + } + HANDLE handle; + void* (*_start)(void*); + void* _args; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { key = TlsAlloc(); } + ~ThreadLocalStorage() { TlsFree(key); } + void set(void* value) { TlsSetValue(key, (LPVOID)value); } + void* get() { return (void*)TlsGetValue(key); } +private: + DWORD key; +}; +#else // (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { pthread_mutex_init(&mutex, 0); } + ~Mutex() { pthread_mutex_destroy(&mutex); } + void lock_() { pthread_mutex_lock(&mutex); } + void unlock() { pthread_mutex_unlock(&mutex); } +private: + friend class ConditionVariable; + pthread_mutex_t mutex; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { pthread_cond_init(&cond, 0); } + ~ConditionVariable() { pthread_cond_destroy(&cond); } + void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } + void broadcast() { pthread_cond_broadcast(&cond); } + void signal() { pthread_cond_signal(&cond); } +private: + pthread_cond_t cond; +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } + ~Thread() {} + void join() { pthread_join(t, 0); } +private: + pthread_t t; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { pthread_key_create(&key, 0); } + ~ThreadLocalStorage() { pthread_key_delete(key); } + void set(void* value) { pthread_setspecific(key, value); } + void* get() { return pthread_getspecific(key); } +private: + pthread_key_t key; +}; +#endif // (defined _WIN32 && !(defined __MINGW32__)) +#else // NCNN_THREADS +class NCNN_EXPORT Mutex +{ +public: + Mutex() {} + ~Mutex() {} + void lock_() {} + void unlock() {} +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() {} + ~ConditionVariable() {} + void wait(Mutex& /*mutex*/) {} + void broadcast() {} + void signal() {} +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {} + ~Thread() {} + void join() {} +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { data = 0; } + ~ThreadLocalStorage() {} + void set(void* value) { data = value; } + void* get() { return data; } +private: + void* data; +}; +#endif // NCNN_THREADS + +class NCNN_EXPORT MutexLockGuard +{ +public: + MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock_(); } + ~MutexLockGuard() { mutex.unlock(); } +private: + Mutex& mutex; +}; + +} // namespace ncnn + +#if NCNN_SIMPLESTL +#include "simplestl.h" +#else +#include +#include +#include +#include +#endif + +#endif // __cplusplus + +#if NCNN_STDIO +#if NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \ + __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0) +#else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0) +#endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#else +#define NCNN_LOGE(...) +#endif + + +#if NCNN_FORCE_INLINE +#ifdef _MSC_VER + #define NCNN_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) +#elif defined(__CLANG__) + #if __has_attribute(__always_inline__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) + #else + #define NCNN_FORCEINLINE inline + #endif +#else + #define NCNN_FORCEINLINE inline +#endif +#else + #define NCNN_FORCEINLINE inline +#endif + +#endif // NCNN_PLATFORM_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simpleocv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simpleocv.h new file mode 100644 index 0000000..55ede15 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simpleocv.h @@ -0,0 +1,501 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOCV_H +#define NCNN_SIMPLEOCV_H + +#include "platform.h" + +#if NCNN_SIMPLEOCV + +#include +#include +#include "allocator.h" +#include "mat.h" + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif + +#ifndef NCNN_XADD +using ncnn::NCNN_XADD; +#endif + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +enum +{ + CV_LOAD_IMAGE_UNCHANGED = -1, + CV_LOAD_IMAGE_GRAYSCALE = 0, + CV_LOAD_IMAGE_COLOR = 1, +}; + +enum +{ + CV_IMWRITE_JPEG_QUALITY = 1 +}; + +// minimal opencv style data structure implementation +namespace cv { + +template +static inline _Tp saturate_cast(int v) +{ + return _Tp(v); +} +template<> +inline uchar saturate_cast(int v) +{ + return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); +} + +template +struct Scalar_ +{ + Scalar_() + { + v[0] = 0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0) + { + v[0] = _v0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = _v3; + } + + const _Tp operator[](const int i) const + { + return v[i]; + } + + _Tp operator[](const int i) + { + return v[i]; + } + + _Tp v[4]; +}; + +typedef Scalar_ Scalar; + +template +struct Point_ +{ + Point_() + : x(0), y(0) + { + } + Point_(_Tp _x, _Tp _y) + : x(_x), y(_y) + { + } + + template + operator Point_<_Tp2>() const + { + return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); + } + + _Tp x; + _Tp y; +}; + +typedef Point_ Point; +typedef Point_ Point2f; + +template +struct Size_ +{ + Size_() + : width(0), height(0) + { + } + Size_(_Tp _w, _Tp _h) + : width(_w), height(_h) + { + } + + template + operator Size_<_Tp2>() const + { + return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp width; + _Tp height; +}; + +typedef Size_ Size; +typedef Size_ Size2f; + +template +struct Rect_ +{ + Rect_() + : x(0), y(0), width(0), height(0) + { + } + Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) + : x(_x), y(_y), width(_w), height(_h) + { + } + Rect_(Point_<_Tp> _p, Size_<_Tp> _size) + : x(_p.x), y(_p.y), width(_size.width), height(_size.height) + { + } + + template + operator Rect_<_Tp2>() const + { + return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp x; + _Tp y; + _Tp width; + _Tp height; + + // area + _Tp area() const + { + return width * height; + } +}; + +template +static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); + a.width = std::min(a.x + a.width, b.x + b.width) - x1; + a.height = std::min(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + if (a.width <= 0 || a.height <= 0) + a = Rect_<_Tp>(); + return a; +} + +template +static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + return a; +} + +template +static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c &= b; +} + +template +static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c |= b; +} + +typedef Rect_ Rect; +typedef Rect_ Rect2f; + +#define CV_8UC1 1 +#define CV_8UC3 3 +#define CV_8UC4 4 +#define CV_32FC1 4 + +struct NCNN_EXPORT Mat +{ + Mat() + : data(0), refcount(0), rows(0), cols(0), c(0) + { + } + + Mat(int _rows, int _cols, int flags) + : data(0), refcount(0) + { + create(_rows, _cols, flags); + } + + // copy + Mat(const Mat& m) + : data(m.data), refcount(m.refcount) + { + if (refcount) + NCNN_XADD(refcount, 1); + + rows = m.rows; + cols = m.cols; + c = m.c; + } + + Mat(int _rows, int _cols, int flags, void* _data) + : data((unsigned char*)_data), refcount(0) + { + rows = _rows; + cols = _cols; + c = flags; + } + + ~Mat() + { + release(); + } + + // assign + Mat& operator=(const Mat& m) + { + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + + rows = m.rows; + cols = m.cols; + c = m.c; + + return *this; + } + + Mat& operator=(const Scalar& s) + { + if (total() > 0) + { + uchar* p = data; + for (int i = 0; i < cols * rows; i++) + { + for (int j = 0; j < c; j++) + { + *p++ = s[j]; + } + } + } + + return *this; + } + + void create(int _rows, int _cols, int flags) + { + release(); + + rows = _rows; + cols = _cols; + c = flags; + + if (total() > 0) + { + // refcount address must be aligned, so we expand totalsize here + size_t totalsize = (total() + 3) >> 2 << 2; + data = (uchar*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((uchar*)data) + totalsize); + *refcount = 1; + } + } + + void release() + { + if (refcount && NCNN_XADD(refcount, -1) == 1) + ncnn::fastFree(data); + + data = 0; + + rows = 0; + cols = 0; + c = 0; + + refcount = 0; + } + + Mat clone() const + { + if (empty()) + return Mat(); + + Mat m(rows, cols, c); + + if (total() > 0) + { + memcpy(m.data, data, total()); + } + + return m; + } + + bool empty() const + { + return data == 0 || total() == 0; + } + + int channels() const + { + return c; + } + + int type() const + { + return c; + } + + size_t total() const + { + return cols * rows * c; + } + + const uchar* ptr(int y) const + { + return data + y * cols * c; + } + + uchar* ptr(int y) + { + return data + y * cols * c; + } + + template + const _Tp* ptr(int y) const + { + return (const _Tp*)data + y * cols * c; + } + + template + _Tp* ptr(int y) + { + return (_Tp*)data + y * cols * c; + } + + // roi + Mat operator()(const Rect& roi) const + { + if (empty()) + return Mat(); + + Mat m(roi.height, roi.width, c); + + int sy = roi.y; + for (int y = 0; y < roi.height; y++) + { + const uchar* sptr = ptr(sy) + roi.x * c; + uchar* dptr = m.ptr(y); + memcpy(dptr, sptr, roi.width * c); + sy++; + } + + return m; + } + + uchar* data; + + // pointer to the reference counter; + // when points to user-allocated data, the pointer is NULL + int* refcount; + + int rows; + int cols; + + int c; +}; + +enum ImreadModes +{ + IMREAD_UNCHANGED = -1, + IMREAD_GRAYSCALE = 0, + IMREAD_COLOR = 1 +}; + +NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); + +enum ImwriteFlags +{ + IMWRITE_JPEG_QUALITY = 1 +}; + +NCNN_EXPORT bool imwrite(const std::string& path, const Mat& m, const std::vector& params = std::vector()); + +NCNN_EXPORT void imshow(const std::string& name, const Mat& m); + +NCNN_EXPORT int waitKey(int delay = 0); + +#if NCNN_PIXEL +NCNN_EXPORT void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); +#endif // NCNN_PIXEL + +#if NCNN_PIXEL_DRAWING + +enum +{ + FILLED = -1 +}; + +NCNN_EXPORT void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void rectangle(Mat& img, Rect rec, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void circle(Mat& img, Point center, int radius, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void line(Mat& img, Point p0, Point p1, const Scalar& color, int thickness = 1); + +enum +{ + FONT_HERSHEY_SIMPLEX = 0 +}; + +NCNN_EXPORT void putText(Mat& img, const std::string& text, Point org, int fontFace, double fontScale, Scalar color, int thickness = 1); + +NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontScale, int thickness, int* baseLine); + +#endif // NCNN_PIXEL_DRAWING + +} // namespace cv + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma pop_macro("min") +#pragma pop_macro("max") +#endif + +#endif // NCNN_SIMPLEOCV + +#endif // NCNN_SIMPLEOCV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simpleomp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simpleomp.h new file mode 100644 index 0000000..13e2452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simpleomp.h @@ -0,0 +1,53 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOMP_H +#define NCNN_SIMPLEOMP_H + +#include "platform.h" + +#if NCNN_SIMPLEOMP + +#include + +// This minimal openmp runtime implementation only supports the llvm openmp abi +// and only supports #pragma omp parallel for num_threads(X) + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT int omp_get_max_threads(); + +NCNN_EXPORT void omp_set_num_threads(int num_threads); + +NCNN_EXPORT int omp_get_dynamic(); + +NCNN_EXPORT void omp_set_dynamic(int dynamic); + +NCNN_EXPORT int omp_get_num_threads(); + +NCNN_EXPORT int omp_get_thread_num(); + +NCNN_EXPORT int kmp_get_blocktime(); + +NCNN_EXPORT void kmp_set_blocktime(int blocktime); + +#ifdef __cplusplus +} +#endif + +#endif // NCNN_SIMPLEOMP + +#endif // NCNN_SIMPLEOMP_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simplestl.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simplestl.h new file mode 100644 index 0000000..00ff468 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/simplestl.h @@ -0,0 +1,565 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLESTL_H +#define NCNN_SIMPLESTL_H + +#include +#include +#include + +#if !NCNN_SIMPLESTL + +#include + +#else + +// allocation functions +NCNN_EXPORT void* operator new(size_t size); +NCNN_EXPORT void* operator new[](size_t size); +// placement allocation functions +NCNN_EXPORT void* operator new(size_t size, void* ptr); +NCNN_EXPORT void* operator new[](size_t size, void* ptr); +// deallocation functions +NCNN_EXPORT void operator delete(void* ptr); +NCNN_EXPORT void operator delete[](void* ptr); +// deallocation functions since c++14 +#if __cplusplus >= 201402L +NCNN_EXPORT void operator delete(void* ptr, size_t sz); +NCNN_EXPORT void operator delete[](void* ptr, size_t sz); +#endif +// placement deallocation functions +NCNN_EXPORT void operator delete(void* ptr, void* voidptr2); +NCNN_EXPORT void operator delete[](void* ptr, void* voidptr2); + +#endif + +// minimal stl data structure implementation +namespace std { + +template +const T& max(const T& a, const T& b) +{ + return (a < b) ? b : a; +} + +template +const T& min(const T& a, const T& b) +{ + return (a > b) ? b : a; +} + +template +void swap(T& a, T& b) +{ + T temp(a); + a = b; + b = temp; +} + +template +struct pair +{ + pair() + : first(), second() + { + } + pair(const T1& t1, const T2& t2) + : first(t1), second(t2) + { + } + + T1 first; + T2 second; +}; + +template +bool operator==(const pair& x, const pair& y) +{ + return (x.first == y.first && x.second == y.second); +} +template +bool operator<(const pair& x, const pair& y) +{ + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} +template +bool operator!=(const pair& x, const pair& y) +{ + return !(x == y); +} +template +bool operator>(const pair& x, const pair& y) +{ + return y < x; +} +template +bool operator<=(const pair& x, const pair& y) +{ + return !(y < x); +} +template +bool operator>=(const pair& x, const pair& y) +{ + return !(x < y); +} + +template +pair make_pair(const T1& t1, const T2& t2) +{ + return pair(t1, t2); +} + +template +struct node +{ + node* prev_; + node* next_; + T data_; + + node() + : prev_(0), next_(0), data_() + { + } + node(const T& t) + : prev_(0), next_(0), data_(t) + { + } +}; + +template +struct iter_list +{ + iter_list() + : curr_(0) + { + } + iter_list(node* n) + : curr_(n) + { + } + iter_list(const iter_list& i) + : curr_(i.curr_) + { + } + ~iter_list() + { + } + + iter_list& operator=(const iter_list& i) + { + curr_ = i.curr_; + return *this; + } + + T& operator*() + { + return curr_->data_; + } + T* operator->() + { + return &(curr_->data_); + } + + bool operator==(const iter_list& i) + { + return curr_ == i.curr_; + } + bool operator!=(const iter_list& i) + { + return curr_ != i.curr_; + } + + iter_list& operator++() + { + curr_ = curr_->next_; + return *this; + } + iter_list& operator--() + { + curr_ = curr_->prev_; + return *this; + } + + node* curr_; +}; + +template +struct list +{ + typedef iter_list iterator; + + list() + { + head_ = new node(); + tail_ = head_; + count_ = 0; + } + ~list() + { + clear(); + delete head_; + } + list(const list& l) + { + head_ = new node(); + tail_ = head_; + count_ = 0; + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + } + + list& operator=(const list& l) + { + if (this == &l) + { + return *this; + } + clear(); + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + return *this; + } + + void clear() + { + while (count_ > 0) + { + pop_front(); + } + } + + void pop_front() + { + if (count_ > 0) + { + head_ = head_->next_; + delete head_->prev_; + head_->prev_ = 0; + --count_; + } + } + + size_t size() const + { + return count_; + } + iter_list begin() const + { + return iter_list(head_); + } + iter_list end() const + { + return iter_list(tail_); + } + bool empty() const + { + return count_ == 0; + } + + void push_back(const T& t) + { + if (count_ == 0) + { + head_ = new node(t); + head_->prev_ = 0; + head_->next_ = tail_; + tail_->prev_ = head_; + count_ = 1; + } + else + { + node* temp = new node(t); + temp->prev_ = tail_->prev_; + temp->next_ = tail_; + tail_->prev_->next_ = temp; + tail_->prev_ = temp; + ++count_; + } + } + + iter_list erase(iter_list pos) + { + if (pos != end()) + { + node* temp = pos.curr_; + if (temp == head_) + { + ++pos; + temp->next_->prev_ = 0; + head_ = temp->next_; + } + else + { + --pos; + temp->next_->prev_ = temp->prev_; + temp->prev_->next_ = temp->next_; + ++pos; + } + delete temp; + --count_; + } + return pos; + } + +protected: + node* head_; + node* tail_; + size_t count_; +}; + +template +struct greater +{ + bool operator()(const T& x, const T& y) const + { + return (x > y); + } +}; + +template +struct less +{ + bool operator()(const T& x, const T& y) const + { + return (x < y); + } +}; + +template +void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp) +{ + // [TODO] heap sort should be used here, but we simply use bubble sort now + for (RandomAccessIter i = first; i < middle; ++i) + { + // bubble sort + for (RandomAccessIter j = last - 1; j > first; --j) + { + if (comp(*j, *(j - 1))) + { + swap(*j, *(j - 1)); + } + } + } +} + +template +struct vector +{ + vector() + : data_(0), size_(0), capacity_(0) + { + } + vector(const size_t new_size, const T& value = T()) + : data_(0), size_(0), capacity_(0) + { + resize(new_size, value); + } + ~vector() + { + clear(); + } + vector(const vector& v) + : data_(0), size_(0), capacity_(0) + { + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + } + + vector& operator=(const vector& v) + { + if (this == &v) + { + return *this; + } + resize(0); + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + return *this; + } + + void resize(const size_t new_size, const T& value = T()) + { + try_alloc(new_size); + if (new_size > size_) + { + for (size_t i = size_; i < new_size; i++) + { + new (&data_[i]) T(value); + } + } + else if (new_size < size_) + { + for (size_t i = new_size; i < size_; i++) + { + data_[i].~T(); + } + } + size_ = new_size; + } + + void clear() + { + for (size_t i = 0; i < size_; i++) + { + data_[i].~T(); + } + delete[](char*) data_; + data_ = 0; + size_ = 0; + capacity_ = 0; + } + + T* data() const + { + return data_; + } + size_t size() const + { + return size_; + } + T& operator[](size_t i) const + { + return data_[i]; + } + T* begin() const + { + return &data_[0]; + } + T* end() const + { + return &data_[size_]; + } + bool empty() const + { + return size_ == 0; + } + + void push_back(const T& t) + { + try_alloc(size_ + 1); + new (&data_[size_]) T(t); + size_++; + } + + void insert(T* pos, T* b, T* e) + { + vector* v = 0; + if (b >= begin() && b < end()) + { + //the same vector + v = new vector(*this); + b = v->begin() + (b - begin()); + e = v->begin() + (e - begin()); + } + size_t diff = pos - begin(); + try_alloc(size_ + (e - b)); + pos = begin() + diff; + memmove(pos + (e - b), pos, (end() - pos) * sizeof(T)); + size_t len = e - b; + size_ += len; + for (size_t i = 0; i < len; i++) + { + *pos = *b; + pos++; + b++; + } + delete v; + } + + T* erase(T* pos) + { + pos->~T(); + memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T)); + size_--; + return pos; + } + +protected: + T* data_; + size_t size_; + size_t capacity_; + void try_alloc(size_t new_size) + { + if (new_size * 3 / 2 > capacity_ / 2) + { + capacity_ = new_size * 2; + T* new_data = (T*)new char[capacity_ * sizeof(T)]; + memset(static_cast(new_data), 0, capacity_ * sizeof(T)); + if (data_) + { + memmove(new_data, data_, sizeof(T) * size_); + delete[](char*) data_; + } + data_ = new_data; + } + } +}; + +struct NCNN_EXPORT string : public vector +{ + string() + { + } + string(const char* str) + { + size_t len = strlen(str); + resize(len); + memcpy(data_, str, len); + } + const char* c_str() const + { + return (const char*)data_; + } + bool operator==(const string& str2) const + { + return strcmp(data_, str2.data_) == 0; + } + bool operator==(const char* str2) const + { + return strcmp(data_, str2) == 0; + } + bool operator!=(const char* str2) const + { + return strcmp(data_, str2) != 0; + } + string& operator+=(const string& str1) + { + insert(end(), str1.begin(), str1.end()); + return *this; + } +}; + +inline string operator+(const string& str1, const string& str2) +{ + string str(str1); + str.insert(str.end(), str2.begin(), str2.end()); + return str; +} + +} // namespace std + +#endif // NCNN_SIMPLESTL_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/vulkan_header_fix.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/vulkan_header_fix.h new file mode 100644 index 0000000..103ac3e --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/include/ncnn/vulkan_header_fix.h @@ -0,0 +1,259 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_VULKAN_HEADER_FIX_H +#define NCNN_VULKAN_HEADER_FIX_H + +#include + +// This header contains new structure and function declearation to fix build with old vulkan sdk + +#if VK_HEADER_VERSION < 70 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES (VkStructureType)1000094000 +typedef enum VkSubgroupFeatureFlagBits +{ + VK_SUBGROUP_FEATURE_BASIC_BIT = 0x00000001, + VK_SUBGROUP_FEATURE_VOTE_BIT = 0x00000002, + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT = 0x00000004, + VK_SUBGROUP_FEATURE_BALLOT_BIT = 0x00000008, + VK_SUBGROUP_FEATURE_SHUFFLE_BIT = 0x00000010, + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT = 0x00000020, + VK_SUBGROUP_FEATURE_CLUSTERED_BIT = 0x00000040, + VK_SUBGROUP_FEATURE_QUAD_BIT = 0x00000080, + VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV = 0x00000100, + VK_SUBGROUP_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSubgroupFeatureFlagBits; +typedef VkFlags VkSubgroupFeatureFlags; +typedef struct VkPhysicalDeviceSubgroupProperties +{ + VkStructureType sType; + void* pNext; + uint32_t subgroupSize; + VkShaderStageFlags supportedStages; + VkSubgroupFeatureFlags supportedOperations; + VkBool32 quadOperationsInAllStages; +} VkPhysicalDeviceSubgroupProperties; +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES (VkStructureType)1000168000 +#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT (VkStructureType)1000168001 +typedef struct VkPhysicalDeviceMaintenance3Properties +{ + VkStructureType sType; + void* pNext; + uint32_t maxPerSetDescriptors; + VkDeviceSize maxMemoryAllocationSize; +} VkPhysicalDeviceMaintenance3Properties; +typedef struct VkDescriptorSetLayoutSupport +{ + VkStructureType sType; + void* pNext; + VkBool32 supported; +} VkDescriptorSetLayoutSupport; +typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR; +typedef VkDescriptorSetLayoutSupport VkDescriptorSetLayoutSupportKHR; +typedef void(VKAPI_PTR* PFN_vkGetDescriptorSetLayoutSupportKHR)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport); +#endif // VK_HEADER_VERSION < 70 + +#if VK_HEADER_VERSION < 80 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR (VkStructureType)1000177000 +typedef struct VkPhysicalDevice8BitStorageFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; +} VkPhysicalDevice8BitStorageFeaturesKHR; +#define VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR (VkStructureType)1000109000 +#define VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR (VkStructureType)1000109001 +#define VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR (VkStructureType)1000109002 +#define VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR (VkStructureType)1000109003 +#define VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR (VkStructureType)1000109004 +#define VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR (VkStructureType)1000109005 +#define VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR (VkStructureType)1000109006 +typedef struct VkAttachmentDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription2KHR; +typedef struct VkAttachmentReference2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t attachment; + VkImageLayout layout; + VkImageAspectFlags aspectMask; +} VkAttachmentReference2KHR; +typedef struct VkSubpassDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t viewMask; + uint32_t inputAttachmentCount; + const VkAttachmentReference2KHR* pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference2KHR* pColorAttachments; + const VkAttachmentReference2KHR* pResolveAttachments; + const VkAttachmentReference2KHR* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t* pPreserveAttachments; +} VkSubpassDescription2KHR; +typedef struct VkSubpassDependency2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; +} VkSubpassDependency2KHR; +typedef struct VkRenderPassCreateInfo2KHR +{ + VkStructureType sType; + const void* pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription2KHR* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription2KHR* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency2KHR* pDependencies; + uint32_t correlatedViewMaskCount; + const uint32_t* pCorrelatedViewMasks; +} VkRenderPassCreateInfo2KHR; +typedef struct VkSubpassBeginInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassContents contents; +} VkSubpassBeginInfoKHR; + +typedef struct VkSubpassEndInfoKHR +{ + VkStructureType sType; + const void* pNext; +} VkSubpassEndInfoKHR; +typedef VkResult(VKAPI_PTR* PFN_vkCreateRenderPass2KHR)(VkDevice device, const VkRenderPassCreateInfo2KHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void(VKAPI_PTR* PFN_vkCmdBeginRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfoKHR* pSubpassBeginInfo); +typedef void(VKAPI_PTR* PFN_vkCmdNextSubpass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfoKHR* pSubpassBeginInfo, const VkSubpassEndInfoKHR* pSubpassEndInfo); +typedef void(VKAPI_PTR* PFN_vkCmdEndRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR* pSubpassEndInfo); +#endif // VK_HEADER_VERSION < 80 + +#if VK_HEADER_VERSION < 95 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR (VkStructureType)1000082000 +typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceFloat16Int8FeaturesKHR; +#endif // VK_HEADER_VERSION < 95 + +#if VK_HEADER_VERSION < 97 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT +{ + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; +#endif // VK_HEADER_VERSION < 97 + +#if VK_HEADER_VERSION < 101 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV (VkStructureType)1000249000 +#define VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249002 +typedef enum VkComponentTypeNV +{ + VK_COMPONENT_TYPE_FLOAT16_NV = 0, + VK_COMPONENT_TYPE_FLOAT32_NV = 1, + VK_COMPONENT_TYPE_FLOAT64_NV = 2, + VK_COMPONENT_TYPE_SINT8_NV = 3, + VK_COMPONENT_TYPE_SINT16_NV = 4, + VK_COMPONENT_TYPE_SINT32_NV = 5, + VK_COMPONENT_TYPE_SINT64_NV = 6, + VK_COMPONENT_TYPE_UINT8_NV = 7, + VK_COMPONENT_TYPE_UINT16_NV = 8, + VK_COMPONENT_TYPE_UINT32_NV = 9, + VK_COMPONENT_TYPE_UINT64_NV = 10, + VK_COMPONENT_TYPE_BEGIN_RANGE_NV = VK_COMPONENT_TYPE_FLOAT16_NV, + VK_COMPONENT_TYPE_END_RANGE_NV = VK_COMPONENT_TYPE_UINT64_NV, + VK_COMPONENT_TYPE_RANGE_SIZE_NV = (VK_COMPONENT_TYPE_UINT64_NV - VK_COMPONENT_TYPE_FLOAT16_NV + 1), + VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkComponentTypeNV; +typedef enum VkScopeNV +{ + VK_SCOPE_DEVICE_NV = 1, + VK_SCOPE_WORKGROUP_NV = 2, + VK_SCOPE_SUBGROUP_NV = 3, + VK_SCOPE_QUEUE_FAMILY_NV = 5, + VK_SCOPE_BEGIN_RANGE_NV = VK_SCOPE_DEVICE_NV, + VK_SCOPE_END_RANGE_NV = VK_SCOPE_QUEUE_FAMILY_NV, + VK_SCOPE_RANGE_SIZE_NV = (VK_SCOPE_QUEUE_FAMILY_NV - VK_SCOPE_DEVICE_NV + 1), + VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkScopeNV; +typedef struct VkCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + uint32_t MSize; + uint32_t NSize; + uint32_t KSize; + VkComponentTypeNV AType; + VkComponentTypeNV BType; + VkComponentTypeNV CType; + VkComponentTypeNV DType; + VkScopeNV scope; +} VkCooperativeMatrixPropertiesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV +{ + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrix; + VkBool32 cooperativeMatrixRobustBufferAccess; +} VkPhysicalDeviceCooperativeMatrixFeaturesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + VkShaderStageFlags cooperativeMatrixSupportedStages; +} VkPhysicalDeviceCooperativeMatrixPropertiesNV; +typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); +#endif // VK_HEADER_VERSION < 101 + +#if VK_HEADER_VERSION < 208 +typedef enum VkInstanceCreateFlagBits +{ + VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR = 0x00000001, + VK_INSTANCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkInstanceCreateFlagBits; +#endif // VK_HEADER_VERSION < 208 + +#endif // NCNN_VULKAN_HEADER_FIX_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/OGLCompilerTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/OGLCompilerTargets.cmake new file mode 100644 index 0000000..89bb144 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/OGLCompilerTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OGLCompilerTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OGLCompiler) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OGLCompiler ALIAS glslang::OGLCompiler) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/OSDependentTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/OSDependentTargets.cmake new file mode 100644 index 0000000..c345456 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/OSDependentTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OSDependentTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OSDependent) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OSDependent ALIAS glslang::OSDependent) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/SPIRVTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/SPIRVTargets.cmake new file mode 100644 index 0000000..1c614ab --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/SPIRVTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `SPIRVTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::SPIRV) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(SPIRV ALIAS glslang::SPIRV) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-config-version.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-config-version.cmake new file mode 100644 index 0000000..c7acdba --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-config-version.cmake @@ -0,0 +1,70 @@ +# This is a basic version file for the Config-mode of find_package(). +# It is used by write_basic_package_version_file() as input file for configure_file() +# to create a version-file which can be installed along a config.cmake file. +# +# The created file sets PACKAGE_VERSION_EXACT if the current version string and +# the requested version string are exactly the same and it sets +# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, +# but only if the requested major version is the same as the current one. +# The variable CVF_VERSION must be set before calling configure_file(). + + +set(PACKAGE_VERSION "11.12.0") + +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION) + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + + if("11.12.0" MATCHES "^([0-9]+)\\.") + set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}") + if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0) + string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}") + endif() + else() + set(CVF_VERSION_MAJOR "11.12.0") + endif() + + if(PACKAGE_FIND_VERSION_RANGE) + # both endpoints of the range must have the expected major version + math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1") + if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT))) + set(PACKAGE_VERSION_COMPATIBLE FALSE) + elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX))) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + else() + if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + + if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + endif() + endif() +endif() + + +# if the installed project requested no architecture check, don't perform the check +if("FALSE") + return() +endif() + +# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: +if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "") + return() +endif() + +# check that the installed version has the same 32/64bit-ness as the one which is currently searching: +if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8") + math(EXPR installedBits "8 * 8") + set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") + set(PACKAGE_VERSION_UNSUITABLE TRUE) +endif() diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-config.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-config.cmake new file mode 100644 index 0000000..5ebe599 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-config.cmake @@ -0,0 +1,27 @@ + +####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() ####### +####### Any changes to this file will be overwritten by the next CMake run #### +####### The input file was glslang-config.cmake.in ######## + +get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) + +macro(set_and_check _var _file) + set(${_var} "${_file}") + if(NOT EXISTS "${_file}") + message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") + endif() +endmacro() + +macro(check_required_components _NAME) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(NOT ${_NAME}_${comp}_FOUND) + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_NAME}_FOUND FALSE) + endif() + endif() + endforeach() +endmacro() + +#################################################################################### + include("${PACKAGE_PREFIX_DIR}/lib/cmake/glslang/glslang-targets.cmake") + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets-release.cmake new file mode 100644 index 0000000..e4de522 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "glslang::OSDependent" for configuration "Release" +set_property(TARGET glslang::OSDependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OSDependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOSDependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OSDependent ) +list(APPEND _cmake_import_check_files_for_glslang::OSDependent "${_IMPORT_PREFIX}/lib/libOSDependent.a" ) + +# Import target "glslang::glslang" for configuration "Release" +set_property(TARGET glslang::glslang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::glslang PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libglslang.a" + ) + +list(APPEND _cmake_import_check_targets glslang::glslang ) +list(APPEND _cmake_import_check_files_for_glslang::glslang "${_IMPORT_PREFIX}/lib/libglslang.a" ) + +# Import target "glslang::MachineIndependent" for configuration "Release" +set_property(TARGET glslang::MachineIndependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::MachineIndependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::MachineIndependent ) +list(APPEND _cmake_import_check_files_for_glslang::MachineIndependent "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" ) + +# Import target "glslang::GenericCodeGen" for configuration "Release" +set_property(TARGET glslang::GenericCodeGen APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::GenericCodeGen PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" + ) + +list(APPEND _cmake_import_check_targets glslang::GenericCodeGen ) +list(APPEND _cmake_import_check_files_for_glslang::GenericCodeGen "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" ) + +# Import target "glslang::OGLCompiler" for configuration "Release" +set_property(TARGET glslang::OGLCompiler APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OGLCompiler PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OGLCompiler ) +list(APPEND _cmake_import_check_files_for_glslang::OGLCompiler "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" ) + +# Import target "glslang::SPIRV" for configuration "Release" +set_property(TARGET glslang::SPIRV APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::SPIRV PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libSPIRV.a" + ) + +list(APPEND _cmake_import_check_targets glslang::SPIRV ) +list(APPEND _cmake_import_check_files_for_glslang::SPIRV "${_IMPORT_PREFIX}/lib/libSPIRV.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets.cmake new file mode 100644 index 0000000..2173b87 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslang/glslang-targets.cmake @@ -0,0 +1,135 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS glslang::OSDependent glslang::glslang glslang::MachineIndependent glslang::GenericCodeGen glslang::OGLCompiler glslang::SPIRV) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target glslang::OSDependent +add_library(glslang::OSDependent STATIC IMPORTED) + +set_target_properties(glslang::OSDependent PROPERTIES + INTERFACE_LINK_LIBRARIES "-pthread" +) + +# Create imported target glslang::glslang +add_library(glslang::glslang STATIC IMPORTED) + +set_target_properties(glslang::glslang PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::MachineIndependent +add_library(glslang::MachineIndependent STATIC IMPORTED) + +set_target_properties(glslang::MachineIndependent PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::GenericCodeGen +add_library(glslang::GenericCodeGen STATIC IMPORTED) + +# Create imported target glslang::OGLCompiler +add_library(glslang::OGLCompiler STATIC IMPORTED) + +# Create imported target glslang::SPIRV +add_library(glslang::SPIRV STATIC IMPORTED) + +set_target_properties(glslang::SPIRV PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/glslang-targets-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslangTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslangTargets.cmake new file mode 100644 index 0000000..6f4e3bc --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/glslangTargets.cmake @@ -0,0 +1,15 @@ + + message(WARNING "Using `glslangTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::glslang) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + if(OFF) + add_library(glslang ALIAS glslang::glslang) + else() + add_library(glslang ALIAS glslang::glslang) + add_library(MachineIndependent ALIAS glslang::MachineIndependent) + add_library(GenericCodeGen ALIAS glslang::GenericCodeGen) + endif() + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnn-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnn-release.cmake new file mode 100644 index 0000000..37f24ba --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnn-release.cmake @@ -0,0 +1,19 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "ncnn" for configuration "Release" +set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(ncnn PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a" + ) + +list(APPEND _cmake_import_check_targets ncnn ) +list(APPEND _cmake_import_check_files_for_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnn.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnn.cmake new file mode 100644 index 0000000..f224c1b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnn.cmake @@ -0,0 +1,125 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS ncnn) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target ncnn +add_library(ncnn STATIC IMPORTED) + +set_target_properties(ncnn PROPERTIES + INTERFACE_COMPILE_OPTIONS "-fno-rtti;-fno-exceptions" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" + INTERFACE_LINK_LIBRARIES "-fopenmp;-static-openmp;-Wl,-wrap,__kmp_affinity_determine_capable;Threads::Threads;Vulkan::Vulkan;\$;\$;android;jnigraphics;log" + INTERFACE_POSITION_INDEPENDENT_CODE "ON" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/ncnn-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# Make sure the targets which have been exported in some other +# export set exist. +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) +foreach(_target "glslang::glslang" "glslang::SPIRV" ) + if(NOT TARGET "${_target}" ) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets "${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets} ${_target}") + endif() +endforeach() + +if(DEFINED ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + if(CMAKE_FIND_PACKAGE_NAME) + set( ${CMAKE_FIND_PACKAGE_NAME}_FOUND FALSE) + set( ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + else() + message(FATAL_ERROR "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + endif() +endif() +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnnConfig.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnnConfig.cmake new file mode 100644 index 0000000..c1e5613 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/cmake/ncnn/ncnnConfig.cmake @@ -0,0 +1,42 @@ +set(NCNN_OPENMP ON) +set(NCNN_THREADS ON) +set(NCNN_VULKAN ON) +set(NCNN_SHARED_LIB OFF) +set(NCNN_SYSTEM_GLSLANG OFF) + +if(NCNN_OPENMP) + find_package(OpenMP) +endif() + +if(NCNN_THREADS) + set(CMAKE_THREAD_PREFER_PTHREAD TRUE) + set(THREADS_PREFER_PTHREAD_FLAG TRUE) + find_package(Threads REQUIRED) +endif() + +if(NCNN_VULKAN) + find_package(Vulkan REQUIRED) + + if(NOT NCNN_SHARED_LIB) + if(NCNN_SYSTEM_GLSLANG) + find_package(glslang QUIET) + if(NOT glslang_FOUND) + set(GLSLANG_TARGET_DIR "") + include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake) + include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake) + if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + # hlsl support can be optional + include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + endif() + include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake) + include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake) + endif() + else() + set(glslang_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/glslang") + find_package(glslang QUIET) + endif() + + endif() +endif() + +include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libGenericCodeGen.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libGenericCodeGen.a new file mode 100644 index 0000000..3b1609a Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libGenericCodeGen.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libMachineIndependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libMachineIndependent.a new file mode 100644 index 0000000..a6e5466 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libMachineIndependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libOGLCompiler.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libOGLCompiler.a new file mode 100644 index 0000000..42d107d Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libOGLCompiler.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libOSDependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libOSDependent.a new file mode 100644 index 0000000..4e5b7b7 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libOSDependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libSPIRV.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libSPIRV.a new file mode 100644 index 0000000..0af550e Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libSPIRV.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libglslang.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libglslang.a new file mode 100644 index 0000000..85adfb2 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libglslang.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libncnn.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libncnn.a new file mode 100644 index 0000000..84c302a Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/libncnn.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/pkgconfig/ncnn.pc b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/pkgconfig/ncnn.pc new file mode 100644 index 0000000..e683e4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/arm64-v8a/lib/pkgconfig/ncnn.pc @@ -0,0 +1,11 @@ +prefix=${pcfiledir}/../.. +librarydir=${prefix}/lib +includedir=${prefix}/include + +Name: ncnn +Description: high-performance neural network inference framework optimized for the mobile platform +Version: 1.0.20230223 +URL: https://github.com/Tencent/ncnn +Libs: -L"${librarydir}" -lncnn +Cflags: -I"${includedir}" + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/BaseTypes.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/BaseTypes.h new file mode 100644 index 0000000..156d98b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/BaseTypes.h @@ -0,0 +1,609 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _BASICTYPES_INCLUDED_ +#define _BASICTYPES_INCLUDED_ + +namespace glslang { + +// +// Basic type. Arrays, vectors, sampler details, etc., are orthogonal to this. +// +enum TBasicType { + EbtVoid, + EbtFloat, + EbtDouble, + EbtFloat16, + EbtInt8, + EbtUint8, + EbtInt16, + EbtUint16, + EbtInt, + EbtUint, + EbtInt64, + EbtUint64, + EbtBool, + EbtAtomicUint, + EbtSampler, + EbtStruct, + EbtBlock, + EbtAccStruct, + EbtReference, + EbtRayQuery, +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type + EbtSpirvType, +#endif + + // HLSL types that live only temporarily. + EbtString, + + EbtNumTypes +}; + +// +// Storage qualifiers. Should align with different kinds of storage or +// resource or GLSL storage qualifier. Expansion is deprecated. +// +// N.B.: You probably DON'T want to add anything here, but rather just add it +// to the built-in variables. See the comment above TBuiltInVariable. +// +// A new built-in variable will normally be an existing qualifier, like 'in', 'out', etc. +// DO NOT follow the design pattern of, say EvqInstanceId, etc. +// +enum TStorageQualifier { + EvqTemporary, // For temporaries (within a function), read/write + EvqGlobal, // For globals read/write + EvqConst, // User-defined constant values, will be semantically constant and constant folded + EvqVaryingIn, // pipeline input, read only, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqVaryingOut, // pipeline output, read/write, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqUniform, // read only, shared with app + EvqBuffer, // read/write, shared with app + EvqShared, // compute shader's read/write 'shared' qualifier +#ifndef GLSLANG_WEB + EvqSpirvStorageClass, // spirv_storage_class +#endif + + EvqPayload, + EvqPayloadIn, + EvqHitAttr, + EvqCallableData, + EvqCallableDataIn, + + EvqtaskPayloadSharedEXT, + + // parameters + EvqIn, // also, for 'in' in the grammar before we know if it's a pipeline input or an 'in' parameter + EvqOut, // also, for 'out' in the grammar before we know if it's a pipeline output or an 'out' parameter + EvqInOut, + EvqConstReadOnly, // input; also other read-only types having neither a constant value nor constant-value semantics + + // built-ins read by vertex shader + EvqVertexId, + EvqInstanceId, + + // built-ins written by vertex shader + EvqPosition, + EvqPointSize, + EvqClipVertex, + + // built-ins read by fragment shader + EvqFace, + EvqFragCoord, + EvqPointCoord, + + // built-ins written by fragment shader + EvqFragColor, + EvqFragDepth, + EvqFragStencil, + + // end of list + EvqLast +}; + +// +// Subcategories of the TStorageQualifier, simply to give a direct mapping +// between built-in variable names and an numerical value (the enum). +// +// For backward compatibility, there is some redundancy between the +// TStorageQualifier and these. Existing members should both be maintained accurately. +// However, any new built-in variable (and any existing non-redundant one) +// must follow the pattern that the specific built-in is here, and only its +// general qualifier is in TStorageQualifier. +// +// Something like gl_Position, which is sometimes 'in' and sometimes 'out' +// shows up as two different built-in variables in a single stage, but +// only has a single enum in TBuiltInVariable, so both the +// TStorageQualifier and the TBuitinVariable are needed to distinguish +// between them. +// +enum TBuiltInVariable { + EbvNone, + EbvNumWorkGroups, + EbvWorkGroupSize, + EbvWorkGroupId, + EbvLocalInvocationId, + EbvGlobalInvocationId, + EbvLocalInvocationIndex, + EbvNumSubgroups, + EbvSubgroupID, + EbvSubGroupSize, + EbvSubGroupInvocation, + EbvSubGroupEqMask, + EbvSubGroupGeMask, + EbvSubGroupGtMask, + EbvSubGroupLeMask, + EbvSubGroupLtMask, + EbvSubgroupSize2, + EbvSubgroupInvocation2, + EbvSubgroupEqMask2, + EbvSubgroupGeMask2, + EbvSubgroupGtMask2, + EbvSubgroupLeMask2, + EbvSubgroupLtMask2, + EbvVertexId, + EbvInstanceId, + EbvVertexIndex, + EbvInstanceIndex, + EbvBaseVertex, + EbvBaseInstance, + EbvDrawId, + EbvPosition, + EbvPointSize, + EbvClipVertex, + EbvClipDistance, + EbvCullDistance, + EbvNormal, + EbvVertex, + EbvMultiTexCoord0, + EbvMultiTexCoord1, + EbvMultiTexCoord2, + EbvMultiTexCoord3, + EbvMultiTexCoord4, + EbvMultiTexCoord5, + EbvMultiTexCoord6, + EbvMultiTexCoord7, + EbvFrontColor, + EbvBackColor, + EbvFrontSecondaryColor, + EbvBackSecondaryColor, + EbvTexCoord, + EbvFogFragCoord, + EbvInvocationId, + EbvPrimitiveId, + EbvLayer, + EbvViewportIndex, + EbvPatchVertices, + EbvTessLevelOuter, + EbvTessLevelInner, + EbvBoundingBox, + EbvTessCoord, + EbvColor, + EbvSecondaryColor, + EbvFace, + EbvFragCoord, + EbvPointCoord, + EbvFragColor, + EbvFragData, + EbvFragDepth, + EbvFragStencilRef, + EbvSampleId, + EbvSamplePosition, + EbvSampleMask, + EbvHelperInvocation, + + EbvBaryCoordNoPersp, + EbvBaryCoordNoPerspCentroid, + EbvBaryCoordNoPerspSample, + EbvBaryCoordSmooth, + EbvBaryCoordSmoothCentroid, + EbvBaryCoordSmoothSample, + EbvBaryCoordPullModel, + + EbvViewIndex, + EbvDeviceIndex, + + EbvShadingRateKHR, + EbvPrimitiveShadingRateKHR, + + EbvFragSizeEXT, + EbvFragInvocationCountEXT, + + EbvSecondaryFragDataEXT, + EbvSecondaryFragColorEXT, + + EbvViewportMaskNV, + EbvSecondaryPositionNV, + EbvSecondaryViewportMaskNV, + EbvPositionPerViewNV, + EbvViewportMaskPerViewNV, + EbvFragFullyCoveredNV, + EbvFragmentSizeNV, + EbvInvocationsPerPixelNV, + // ray tracing + EbvLaunchId, + EbvLaunchSize, + EbvInstanceCustomIndex, + EbvGeometryIndex, + EbvWorldRayOrigin, + EbvWorldRayDirection, + EbvObjectRayOrigin, + EbvObjectRayDirection, + EbvRayTmin, + EbvRayTmax, + EbvCullMask, + EbvHitT, + EbvHitKind, + EbvObjectToWorld, + EbvObjectToWorld3x4, + EbvWorldToObject, + EbvWorldToObject3x4, + EbvIncomingRayFlags, + EbvCurrentRayTimeNV, + // barycentrics + EbvBaryCoordNV, + EbvBaryCoordNoPerspNV, + EbvBaryCoordEXT, + EbvBaryCoordNoPerspEXT, + // mesh shaders + EbvTaskCountNV, + EbvPrimitiveCountNV, + EbvPrimitiveIndicesNV, + EbvClipDistancePerViewNV, + EbvCullDistancePerViewNV, + EbvLayerPerViewNV, + EbvMeshViewCountNV, + EbvMeshViewIndicesNV, + //GL_EXT_mesh_shader + EbvPrimitivePointIndicesEXT, + EbvPrimitiveLineIndicesEXT, + EbvPrimitiveTriangleIndicesEXT, + EbvCullPrimitiveEXT, + + // sm builtins + EbvWarpsPerSM, + EbvSMCount, + EbvWarpID, + EbvSMID, + + // HLSL built-ins that live only temporarily, until they get remapped + // to one of the above. + EbvFragDepthGreater, + EbvFragDepthLesser, + EbvGsOutputStream, + EbvOutputPatch, + EbvInputPatch, + + // structbuffer types + EbvAppendConsume, // no need to differentiate append and consume + EbvRWStructuredBuffer, + EbvStructuredBuffer, + EbvByteAddressBuffer, + EbvRWByteAddressBuffer, + + EbvLast +}; + +// In this enum, order matters; users can assume higher precision is a bigger value +// and EpqNone is 0. +enum TPrecisionQualifier { + EpqNone = 0, + EpqLow, + EpqMedium, + EpqHigh +}; + +#ifdef GLSLANG_WEB +__inline const char* GetStorageQualifierString(TStorageQualifier q) { return ""; } +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) { return ""; } +#else +// These will show up in error messages +__inline const char* GetStorageQualifierString(TStorageQualifier q) +{ + switch (q) { + case EvqTemporary: return "temp"; break; + case EvqGlobal: return "global"; break; + case EvqConst: return "const"; break; + case EvqConstReadOnly: return "const (read only)"; break; +#ifndef GLSLANG_WEB + case EvqSpirvStorageClass: return "spirv_storage_class"; break; +#endif + case EvqVaryingIn: return "in"; break; + case EvqVaryingOut: return "out"; break; + case EvqUniform: return "uniform"; break; + case EvqBuffer: return "buffer"; break; + case EvqShared: return "shared"; break; + case EvqIn: return "in"; break; + case EvqOut: return "out"; break; + case EvqInOut: return "inout"; break; + case EvqVertexId: return "gl_VertexId"; break; + case EvqInstanceId: return "gl_InstanceId"; break; + case EvqPosition: return "gl_Position"; break; + case EvqPointSize: return "gl_PointSize"; break; + case EvqClipVertex: return "gl_ClipVertex"; break; + case EvqFace: return "gl_FrontFacing"; break; + case EvqFragCoord: return "gl_FragCoord"; break; + case EvqPointCoord: return "gl_PointCoord"; break; + case EvqFragColor: return "fragColor"; break; + case EvqFragDepth: return "gl_FragDepth"; break; + case EvqFragStencil: return "gl_FragStencilRefARB"; break; + case EvqPayload: return "rayPayloadNV"; break; + case EvqPayloadIn: return "rayPayloadInNV"; break; + case EvqHitAttr: return "hitAttributeNV"; break; + case EvqCallableData: return "callableDataNV"; break; + case EvqCallableDataIn: return "callableDataInNV"; break; + case EvqtaskPayloadSharedEXT: return "taskPayloadSharedEXT"; break; + default: return "unknown qualifier"; + } +} + +__inline const char* GetBuiltInVariableString(TBuiltInVariable v) +{ + switch (v) { + case EbvNone: return ""; + case EbvNumWorkGroups: return "NumWorkGroups"; + case EbvWorkGroupSize: return "WorkGroupSize"; + case EbvWorkGroupId: return "WorkGroupID"; + case EbvLocalInvocationId: return "LocalInvocationID"; + case EbvGlobalInvocationId: return "GlobalInvocationID"; + case EbvLocalInvocationIndex: return "LocalInvocationIndex"; + case EbvNumSubgroups: return "NumSubgroups"; + case EbvSubgroupID: return "SubgroupID"; + case EbvSubGroupSize: return "SubGroupSize"; + case EbvSubGroupInvocation: return "SubGroupInvocation"; + case EbvSubGroupEqMask: return "SubGroupEqMask"; + case EbvSubGroupGeMask: return "SubGroupGeMask"; + case EbvSubGroupGtMask: return "SubGroupGtMask"; + case EbvSubGroupLeMask: return "SubGroupLeMask"; + case EbvSubGroupLtMask: return "SubGroupLtMask"; + case EbvSubgroupSize2: return "SubgroupSize"; + case EbvSubgroupInvocation2: return "SubgroupInvocationID"; + case EbvSubgroupEqMask2: return "SubgroupEqMask"; + case EbvSubgroupGeMask2: return "SubgroupGeMask"; + case EbvSubgroupGtMask2: return "SubgroupGtMask"; + case EbvSubgroupLeMask2: return "SubgroupLeMask"; + case EbvSubgroupLtMask2: return "SubgroupLtMask"; + case EbvVertexId: return "VertexId"; + case EbvInstanceId: return "InstanceId"; + case EbvVertexIndex: return "VertexIndex"; + case EbvInstanceIndex: return "InstanceIndex"; + case EbvBaseVertex: return "BaseVertex"; + case EbvBaseInstance: return "BaseInstance"; + case EbvDrawId: return "DrawId"; + case EbvPosition: return "Position"; + case EbvPointSize: return "PointSize"; + case EbvClipVertex: return "ClipVertex"; + case EbvClipDistance: return "ClipDistance"; + case EbvCullDistance: return "CullDistance"; + case EbvNormal: return "Normal"; + case EbvVertex: return "Vertex"; + case EbvMultiTexCoord0: return "MultiTexCoord0"; + case EbvMultiTexCoord1: return "MultiTexCoord1"; + case EbvMultiTexCoord2: return "MultiTexCoord2"; + case EbvMultiTexCoord3: return "MultiTexCoord3"; + case EbvMultiTexCoord4: return "MultiTexCoord4"; + case EbvMultiTexCoord5: return "MultiTexCoord5"; + case EbvMultiTexCoord6: return "MultiTexCoord6"; + case EbvMultiTexCoord7: return "MultiTexCoord7"; + case EbvFrontColor: return "FrontColor"; + case EbvBackColor: return "BackColor"; + case EbvFrontSecondaryColor: return "FrontSecondaryColor"; + case EbvBackSecondaryColor: return "BackSecondaryColor"; + case EbvTexCoord: return "TexCoord"; + case EbvFogFragCoord: return "FogFragCoord"; + case EbvInvocationId: return "InvocationID"; + case EbvPrimitiveId: return "PrimitiveID"; + case EbvLayer: return "Layer"; + case EbvViewportIndex: return "ViewportIndex"; + case EbvPatchVertices: return "PatchVertices"; + case EbvTessLevelOuter: return "TessLevelOuter"; + case EbvTessLevelInner: return "TessLevelInner"; + case EbvBoundingBox: return "BoundingBox"; + case EbvTessCoord: return "TessCoord"; + case EbvColor: return "Color"; + case EbvSecondaryColor: return "SecondaryColor"; + case EbvFace: return "Face"; + case EbvFragCoord: return "FragCoord"; + case EbvPointCoord: return "PointCoord"; + case EbvFragColor: return "FragColor"; + case EbvFragData: return "FragData"; + case EbvFragDepth: return "FragDepth"; + case EbvFragStencilRef: return "FragStencilRef"; + case EbvSampleId: return "SampleId"; + case EbvSamplePosition: return "SamplePosition"; + case EbvSampleMask: return "SampleMaskIn"; + case EbvHelperInvocation: return "HelperInvocation"; + + case EbvBaryCoordNoPersp: return "BaryCoordNoPersp"; + case EbvBaryCoordNoPerspCentroid: return "BaryCoordNoPerspCentroid"; + case EbvBaryCoordNoPerspSample: return "BaryCoordNoPerspSample"; + case EbvBaryCoordSmooth: return "BaryCoordSmooth"; + case EbvBaryCoordSmoothCentroid: return "BaryCoordSmoothCentroid"; + case EbvBaryCoordSmoothSample: return "BaryCoordSmoothSample"; + case EbvBaryCoordPullModel: return "BaryCoordPullModel"; + + case EbvViewIndex: return "ViewIndex"; + case EbvDeviceIndex: return "DeviceIndex"; + + case EbvFragSizeEXT: return "FragSizeEXT"; + case EbvFragInvocationCountEXT: return "FragInvocationCountEXT"; + + case EbvSecondaryFragDataEXT: return "SecondaryFragDataEXT"; + case EbvSecondaryFragColorEXT: return "SecondaryFragColorEXT"; + + case EbvViewportMaskNV: return "ViewportMaskNV"; + case EbvSecondaryPositionNV: return "SecondaryPositionNV"; + case EbvSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; + case EbvPositionPerViewNV: return "PositionPerViewNV"; + case EbvViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; + case EbvFragFullyCoveredNV: return "FragFullyCoveredNV"; + case EbvFragmentSizeNV: return "FragmentSizeNV"; + case EbvInvocationsPerPixelNV: return "InvocationsPerPixelNV"; + case EbvLaunchId: return "LaunchIdNV"; + case EbvLaunchSize: return "LaunchSizeNV"; + case EbvInstanceCustomIndex: return "InstanceCustomIndexNV"; + case EbvGeometryIndex: return "GeometryIndexEXT"; + case EbvWorldRayOrigin: return "WorldRayOriginNV"; + case EbvWorldRayDirection: return "WorldRayDirectionNV"; + case EbvObjectRayOrigin: return "ObjectRayOriginNV"; + case EbvObjectRayDirection: return "ObjectRayDirectionNV"; + case EbvRayTmin: return "ObjectRayTminNV"; + case EbvRayTmax: return "ObjectRayTmaxNV"; + case EbvHitT: return "HitTNV"; + case EbvHitKind: return "HitKindNV"; + case EbvIncomingRayFlags: return "IncomingRayFlagsNV"; + case EbvObjectToWorld: return "ObjectToWorldNV"; + case EbvWorldToObject: return "WorldToObjectNV"; + case EbvCurrentRayTimeNV: return "CurrentRayTimeNV"; + + case EbvBaryCoordEXT: + case EbvBaryCoordNV: return "BaryCoordKHR"; + case EbvBaryCoordNoPerspEXT: + case EbvBaryCoordNoPerspNV: return "BaryCoordNoPerspKHR"; + + case EbvTaskCountNV: return "TaskCountNV"; + case EbvPrimitiveCountNV: return "PrimitiveCountNV"; + case EbvPrimitiveIndicesNV: return "PrimitiveIndicesNV"; + case EbvClipDistancePerViewNV: return "ClipDistancePerViewNV"; + case EbvCullDistancePerViewNV: return "CullDistancePerViewNV"; + case EbvLayerPerViewNV: return "LayerPerViewNV"; + case EbvMeshViewCountNV: return "MeshViewCountNV"; + case EbvMeshViewIndicesNV: return "MeshViewIndicesNV"; + // GL_EXT_mesh_shader + case EbvPrimitivePointIndicesEXT: return "PrimitivePointIndicesEXT"; + case EbvPrimitiveLineIndicesEXT: return "PrimitiveLineIndicesEXT"; + case EbvPrimitiveTriangleIndicesEXT: return "PrimitiveTriangleIndicesEXT"; + case EbvCullPrimitiveEXT: return "CullPrimitiveEXT"; + + case EbvWarpsPerSM: return "WarpsPerSMNV"; + case EbvSMCount: return "SMCountNV"; + case EbvWarpID: return "WarpIDNV"; + case EbvSMID: return "SMIDNV"; + + case EbvShadingRateKHR: return "ShadingRateKHR"; + case EbvPrimitiveShadingRateKHR: return "PrimitiveShadingRateKHR"; + + default: return "unknown built-in variable"; + } +} + +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) +{ + switch (p) { + case EpqNone: return ""; break; + case EpqLow: return "lowp"; break; + case EpqMedium: return "mediump"; break; + case EpqHigh: return "highp"; break; + default: return "unknown precision qualifier"; + } +} +#endif + +__inline bool isTypeSignedInt(TBasicType type) +{ + switch (type) { + case EbtInt8: + case EbtInt16: + case EbtInt: + case EbtInt64: + return true; + default: + return false; + } +} + +__inline bool isTypeUnsignedInt(TBasicType type) +{ + switch (type) { + case EbtUint8: + case EbtUint16: + case EbtUint: + case EbtUint64: + return true; + default: + return false; + } +} + +__inline bool isTypeInt(TBasicType type) +{ + return isTypeSignedInt(type) || isTypeUnsignedInt(type); +} + +__inline bool isTypeFloat(TBasicType type) +{ + switch (type) { + case EbtFloat: + case EbtDouble: + case EbtFloat16: + return true; + default: + return false; + } +} + +__inline int getTypeRank(TBasicType type) +{ + int res = -1; + switch(type) { + case EbtInt8: + case EbtUint8: + res = 0; + break; + case EbtInt16: + case EbtUint16: + res = 1; + break; + case EbtInt: + case EbtUint: + res = 2; + break; + case EbtInt64: + case EbtUint64: + res = 3; + break; + default: + assert(false); + break; + } + return res; +} + +} // end namespace glslang + +#endif // _BASICTYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/Common.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/Common.h new file mode 100644 index 0000000..a5b41cb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/Common.h @@ -0,0 +1,340 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _COMMON_INCLUDED_ +#define _COMMON_INCLUDED_ + +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__ANDROID__) || (defined(_MSC_VER) && _MSC_VER < 1700) +#include +namespace std { +template +std::string to_string(const T& val) { + std::ostringstream os; + os << val; + return os.str(); +} +} +#endif + +#if (defined(_MSC_VER) && _MSC_VER < 1900 /*vs2015*/) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) + #include + #ifndef snprintf + #define snprintf sprintf_s + #endif + #define safe_vsprintf(buf,max,format,args) vsnprintf_s((buf), (max), (max), (format), (args)) +#elif defined (solaris) + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#else + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1800 + #include + inline long long int strtoll (const char* str, char** endptr, int base) + { + return _strtoi64(str, endptr, base); + } + inline unsigned long long int strtoull (const char* str, char** endptr, int base) + { + return _strtoui64(str, endptr, base); + } + inline long long int atoll (const char* str) + { + return strtoll(str, NULL, 10); + } +#endif + +#if defined(_MSC_VER) +#define strdup _strdup +#endif + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4786) // Don't warn about too long identifiers + #pragma warning(disable : 4514) // unused inline method + #pragma warning(disable : 4201) // nameless union +#endif + +#include "PoolAlloc.h" + +// +// Put POOL_ALLOCATOR_NEW_DELETE in base classes to make them use this scheme. +// +#define POOL_ALLOCATOR_NEW_DELETE(A) \ + void* operator new(size_t s) { return (A).allocate(s); } \ + void* operator new(size_t, void *_Where) { return (_Where); } \ + void operator delete(void*) { } \ + void operator delete(void *, void *) { } \ + void* operator new[](size_t s) { return (A).allocate(s); } \ + void* operator new[](size_t, void *_Where) { return (_Where); } \ + void operator delete[](void*) { } \ + void operator delete[](void *, void *) { } + +namespace glslang { + + // + // Pool version of string. + // + typedef pool_allocator TStringAllocator; + typedef std::basic_string , TStringAllocator> TString; + +} // end namespace glslang + +// Repackage the std::hash for use by unordered map/set with a TString key. +namespace std { + + template<> struct hash { + std::size_t operator()(const glslang::TString& s) const + { + const unsigned _FNV_offset_basis = 2166136261U; + const unsigned _FNV_prime = 16777619U; + unsigned _Val = _FNV_offset_basis; + size_t _Count = s.size(); + const char* _First = s.c_str(); + for (size_t _Next = 0; _Next < _Count; ++_Next) + { + _Val ^= (unsigned)_First[_Next]; + _Val *= _FNV_prime; + } + + return _Val; + } + }; +} + +namespace glslang { + +inline TString* NewPoolTString(const char* s) +{ + void* memory = GetThreadPoolAllocator().allocate(sizeof(TString)); + return new(memory) TString(s); +} + +template inline T* NewPoolObject(T*) +{ + return new(GetThreadPoolAllocator().allocate(sizeof(T))) T; +} + +template inline T* NewPoolObject(T, int instances) +{ + return new(GetThreadPoolAllocator().allocate(instances * sizeof(T))) T[instances]; +} + +// +// Pool allocator versions of vectors, lists, and maps +// +template class TVector : public std::vector > { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + typedef typename std::vector >::size_type size_type; + TVector() : std::vector >() {} + TVector(const pool_allocator& a) : std::vector >(a) {} + TVector(size_type i) : std::vector >(i) {} + TVector(size_type i, const T& val) : std::vector >(i, val) {} +}; + +template class TList : public std::list > { +}; + +template > +class TMap : public std::map > > { +}; + +template , class PRED = std::equal_to > +class TUnorderedMap : public std::unordered_map > > { +}; + +template > +class TSet : public std::set > { +}; + +// +// Persistent string memory. Should only be used for strings that survive +// across compiles/links. +// +typedef std::basic_string TPersistString; + +// +// templatized min and max functions. +// +template T Min(const T a, const T b) { return a < b ? a : b; } +template T Max(const T a, const T b) { return a > b ? a : b; } + +// +// Create a TString object from an integer. +// +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) +inline const TString String(const int i, const int base = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + _itoa_s(i, text, sizeof(text), base); + return text; +} +#else +inline const TString String(const int i, const int /*base*/ = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + + // we assume base 10 for all cases + snprintf(text, sizeof(text), "%d", i); + + return text; +} +#endif + +struct TSourceLoc { + void init() + { + name = nullptr; string = 0; line = 0; column = 0; + } + void init(int stringNum) { init(); string = stringNum; } + // Returns the name if it exists. Otherwise, returns the string number. + std::string getStringNameOrNum(bool quoteStringName = true) const + { + if (name != nullptr) { + TString qstr = quoteStringName ? ("\"" + *name + "\"") : *name; + std::string ret_str(qstr.c_str()); + return ret_str; + } + return std::to_string((long long)string); + } + const char* getFilename() const + { + if (name == nullptr) + return nullptr; + return name->c_str(); + } + const char* getFilenameStr() const { return name == nullptr ? "" : name->c_str(); } + TString* name; // descriptive name for this string, when a textual name is available, otherwise nullptr + int string; + int line; + int column; +}; + +class TPragmaTable : public TMap { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) +}; + +const int MaxTokenLength = 1024; + +template bool IsPow2(T powerOf2) +{ + if (powerOf2 <= 0) + return false; + + return (powerOf2 & (powerOf2 - 1)) == 0; +} + +// Round number up to a multiple of the given powerOf2, which is not +// a power, just a number that must be a power of 2. +template void RoundToPow2(T& number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + number = (number + powerOf2 - 1) & ~(powerOf2 - 1); +} + +template bool IsMultipleOfPow2(T number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + return ! (number & (powerOf2 - 1)); +} + +// Returns log2 of an integer power of 2. +// T should be integral. +template int IntLog2(T n) +{ + assert(IsPow2(n)); + int result = 0; + while ((T(1) << result) != n) { + result++; + } + return result; +} + +inline bool IsInfinity(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_NINF: + case _FPCLASS_PINF: + return true; + default: + return false; + } +#else + return std::isinf(x); +#endif +} + +inline bool IsNan(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_SNAN: + case _FPCLASS_QNAN: + return true; + default: + return false; + } +#else + return std::isnan(x); +#endif +} + +} // end namespace glslang + +#endif // _COMMON_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ConstantUnion.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ConstantUnion.h new file mode 100644 index 0000000..c4ffb85 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ConstantUnion.h @@ -0,0 +1,974 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _CONSTANT_UNION_INCLUDED_ +#define _CONSTANT_UNION_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" + +namespace glslang { + +class TConstUnion { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnion() : iConst(0), type(EbtInt) { } + + void setI8Const(signed char i) + { + i8Const = i; + type = EbtInt8; + } + + void setU8Const(unsigned char u) + { + u8Const = u; + type = EbtUint8; + } + + void setI16Const(signed short i) + { + i16Const = i; + type = EbtInt16; + } + + void setU16Const(unsigned short u) + { + u16Const = u; + type = EbtUint16; + } + + void setIConst(int i) + { + iConst = i; + type = EbtInt; + } + + void setUConst(unsigned int u) + { + uConst = u; + type = EbtUint; + } + + void setI64Const(long long i64) + { + i64Const = i64; + type = EbtInt64; + } + + void setU64Const(unsigned long long u64) + { + u64Const = u64; + type = EbtUint64; + } + + void setDConst(double d) + { + dConst = d; + type = EbtDouble; + } + + void setBConst(bool b) + { + bConst = b; + type = EbtBool; + } + + void setSConst(const TString* s) + { + sConst = s; + type = EbtString; + } + + signed char getI8Const() const { return i8Const; } + unsigned char getU8Const() const { return u8Const; } + signed short getI16Const() const { return i16Const; } + unsigned short getU16Const() const { return u16Const; } + int getIConst() const { return iConst; } + unsigned int getUConst() const { return uConst; } + long long getI64Const() const { return i64Const; } + unsigned long long getU64Const() const { return u64Const; } + double getDConst() const { return dConst; } + bool getBConst() const { return bConst; } + const TString* getSConst() const { return sConst; } + + bool operator==(const signed char i) const + { + if (i == i8Const) + return true; + + return false; + } + + bool operator==(const unsigned char u) const + { + if (u == u8Const) + return true; + + return false; + } + + bool operator==(const signed short i) const + { + if (i == i16Const) + return true; + + return false; + } + + bool operator==(const unsigned short u) const + { + if (u == u16Const) + return true; + + return false; + } + + bool operator==(const int i) const + { + if (i == iConst) + return true; + + return false; + } + + bool operator==(const unsigned int u) const + { + if (u == uConst) + return true; + + return false; + } + + bool operator==(const long long i64) const + { + if (i64 == i64Const) + return true; + + return false; + } + + bool operator==(const unsigned long long u64) const + { + if (u64 == u64Const) + return true; + + return false; + } + + bool operator==(const double d) const + { + if (d == dConst) + return true; + + return false; + } + + bool operator==(const bool b) const + { + if (b == bConst) + return true; + + return false; + } + + bool operator==(const TConstUnion& constant) const + { + if (constant.type != type) + return false; + + switch (type) { + case EbtInt: + if (constant.iConst == iConst) + return true; + + break; + case EbtUint: + if (constant.uConst == uConst) + return true; + + break; + case EbtBool: + if (constant.bConst == bConst) + return true; + + break; + case EbtDouble: + if (constant.dConst == dConst) + return true; + + break; + +#ifndef GLSLANG_WEB + case EbtInt16: + if (constant.i16Const == i16Const) + return true; + + break; + case EbtUint16: + if (constant.u16Const == u16Const) + return true; + + break; + case EbtInt8: + if (constant.i8Const == i8Const) + return true; + + break; + case EbtUint8: + if (constant.u8Const == u8Const) + return true; + + break; + case EbtInt64: + if (constant.i64Const == i64Const) + return true; + + break; + case EbtUint64: + if (constant.u64Const == u64Const) + return true; + + break; +#endif + default: + assert(false && "Default missing"); + } + + return false; + } + + bool operator!=(const signed char i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned char u) const + { + return !operator==(u); + } + + bool operator!=(const signed short i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned short u) const + { + return !operator==(u); + } + + bool operator!=(const int i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned int u) const + { + return !operator==(u); + } + + bool operator!=(const long long i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned long long u) const + { + return !operator==(u); + } + + bool operator!=(const float f) const + { + return !operator==(f); + } + + bool operator!=(const bool b) const + { + return !operator==(b); + } + + bool operator!=(const TConstUnion& constant) const + { + return !operator==(constant); + } + + bool operator>(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { + case EbtInt: + if (iConst > constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst > constant.uConst) + return true; + + return false; + case EbtDouble: + if (dConst > constant.dConst) + return true; + + return false; +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const > constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const > constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const > constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const > constant.u16Const) + return true; + + return false; + case EbtInt64: + if (i64Const > constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const > constant.u64Const) + return true; + + return false; +#endif + default: + assert(false && "Default missing"); + return false; + } + } + + bool operator<(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const < constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const < constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const < constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const < constant.u16Const) + return true; + return false; + case EbtInt64: + if (i64Const < constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const < constant.u64Const) + return true; + + return false; +#endif + case EbtDouble: + if (dConst < constant.dConst) + return true; + + return false; + case EbtInt: + if (iConst < constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst < constant.uConst) + return true; + + return false; + default: + assert(false && "Default missing"); + return false; + } + } + + TConstUnion operator+(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst + constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst + constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst + constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const + constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const + constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const + constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const + constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const + constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const + constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator-(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst - constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst - constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst - constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const - constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const - constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const - constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const - constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const - constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const - constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator*(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst * constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst * constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst * constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const * constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const * constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const * constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const * constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const * constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const * constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator%(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst % constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst % constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const % constant.i8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const % constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const % constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const % constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const % constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const % constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator>>(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const >> constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const >> constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const >> constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const >> constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const >> constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const >> constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const >> constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const >> constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const >> constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const >> constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const >> constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const >> constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst >> constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst >> constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst >> constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst >> constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst >> constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst >> constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst >> constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst >> constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst >> constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst >> constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst >> constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst >> constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; +#ifndef GLSLANG_WEB + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const >> constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const >> constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const >> constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const >> constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const >> constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const >> constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator<<(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const << constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const << constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const << constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const << constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const << constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const << constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const << constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const << constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const << constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const << constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const << constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const << constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const << constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const << constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const << constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const << constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const << constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const << constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const << constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const << constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const << constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const << constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const << constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const << constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const << constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const << constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const << constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const << constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const << constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const << constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const << constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const << constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const << constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const << constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const << constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const << constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const << constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const << constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const << constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const << constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const << constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const << constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst << constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst << constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst << constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst << constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst << constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst << constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst << constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst << constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst << constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst << constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst << constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst << constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst & constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst & constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const & constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const & constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const & constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const & constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const & constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const & constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator|(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst | constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst | constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const | constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const | constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const | constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const | constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const | constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const | constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator^(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst ^ constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst ^ constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const ^ constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const ^ constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const ^ constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const ^ constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const ^ constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const ^ constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator~() const + { + TConstUnion returnValue; + switch (type) { + case EbtInt: returnValue.setIConst(~iConst); break; + case EbtUint: returnValue.setUConst(~uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(~i8Const); break; + case EbtUint8: returnValue.setU8Const(~u8Const); break; + case EbtInt16: returnValue.setI16Const(~i16Const); break; + case EbtUint16: returnValue.setU16Const(~u16Const); break; + case EbtInt64: returnValue.setI64Const(~i64Const); break; + case EbtUint64: returnValue.setU64Const(~u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst && constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator||(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst || constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TBasicType getType() const { return type; } + +private: + union { + signed char i8Const; // used for i8vec, scalar int8s + unsigned char u8Const; // used for u8vec, scalar uint8s + signed short i16Const; // used for i16vec, scalar int16s + unsigned short u16Const; // used for u16vec, scalar uint16s + int iConst; // used for ivec, scalar ints + unsigned int uConst; // used for uvec, scalar uints + long long i64Const; // used for i64vec, scalar int64s + unsigned long long u64Const; // used for u64vec, scalar uint64s + bool bConst; // used for bvec, scalar bools + double dConst; // used for vec, dvec, mat, dmat, scalar floats and doubles + const TString* sConst; // string constant + }; + + TBasicType type; +}; + +// Encapsulate having a pointer to an array of TConstUnion, +// which only needs to be allocated if its size is going to be +// bigger than 0. +// +// One convenience is being able to use [] to go inside the array, instead +// of C++ assuming it as an array of pointers to vectors. +// +// General usage is that the size is known up front, and it is +// created once with the proper size. +// +class TConstUnionArray { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnionArray() : unionArray(nullptr) { } + virtual ~TConstUnionArray() { } + + explicit TConstUnionArray(int size) + { + if (size == 0) + unionArray = nullptr; + else + unionArray = new TConstUnionVector(size); + } + TConstUnionArray(const TConstUnionArray& a) = default; + TConstUnionArray(const TConstUnionArray& a, int start, int size) + { + unionArray = new TConstUnionVector(size); + for (int i = 0; i < size; ++i) + (*unionArray)[i] = a[start + i]; + } + + // Use this constructor for a smear operation + TConstUnionArray(int size, const TConstUnion& val) + { + unionArray = new TConstUnionVector(size, val); + } + + int size() const { return unionArray ? (int)unionArray->size() : 0; } + TConstUnion& operator[](size_t index) { return (*unionArray)[index]; } + const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; } + bool operator==(const TConstUnionArray& rhs) const + { + // this includes the case that both are unallocated + if (unionArray == rhs.unionArray) + return true; + + if (! unionArray || ! rhs.unionArray) + return false; + + return *unionArray == *rhs.unionArray; + } + bool operator!=(const TConstUnionArray& rhs) const { return ! operator==(rhs); } + + double dot(const TConstUnionArray& rhs) + { + assert(rhs.unionArray->size() == unionArray->size()); + double sum = 0.0; + + for (size_t comp = 0; comp < unionArray->size(); ++comp) + sum += (*this)[comp].getDConst() * rhs[comp].getDConst(); + + return sum; + } + + bool empty() const { return unionArray == nullptr; } + +protected: + typedef TVector TConstUnionVector; + TConstUnionVector* unionArray; +}; + +} // end namespace glslang + +#endif // _CONSTANT_UNION_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/InfoSink.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/InfoSink.h new file mode 100644 index 0000000..dceb603 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/InfoSink.h @@ -0,0 +1,144 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INFOSINK_INCLUDED_ +#define _INFOSINK_INCLUDED_ + +#include "../Include/Common.h" +#include + +namespace glslang { + +// +// TPrefixType is used to centralize how info log messages start. +// See below. +// +enum TPrefixType { + EPrefixNone, + EPrefixWarning, + EPrefixError, + EPrefixInternalError, + EPrefixUnimplemented, + EPrefixNote +}; + +enum TOutputStream { + ENull = 0, + EDebugger = 0x01, + EStdOut = 0x02, + EString = 0x04, +}; +// +// Encapsulate info logs for all objects that have them. +// +// The methods are a general set of tools for getting a variety of +// messages and types inserted into the log. +// +class TInfoSinkBase { +public: + TInfoSinkBase() : outputStream(4) {} + void erase() { sink.erase(); } + TInfoSinkBase& operator<<(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(char c) { append(1, c); return *this; } + TInfoSinkBase& operator<<(const char* s) { append(s); return *this; } + TInfoSinkBase& operator<<(int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(unsigned int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(float n) { const int size = 40; char buf[size]; + snprintf(buf, size, (fabs(n) > 1e-8 && fabs(n) < 1e8) || n == 0.0f ? "%f" : "%g", n); + append(buf); + return *this; } + TInfoSinkBase& operator+(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const char* s) { append(s); return *this; } + const char* c_str() const { return sink.c_str(); } + void prefix(TPrefixType message) { + switch(message) { + case EPrefixNone: break; + case EPrefixWarning: append("WARNING: "); break; + case EPrefixError: append("ERROR: "); break; + case EPrefixInternalError: append("INTERNAL ERROR: "); break; + case EPrefixUnimplemented: append("UNIMPLEMENTED: "); break; + case EPrefixNote: append("NOTE: "); break; + default: append("UNKNOWN ERROR: "); break; + } + } + void location(const TSourceLoc& loc) { + const int maxSize = 24; + char locText[maxSize]; + snprintf(locText, maxSize, ":%d", loc.line); + append(loc.getStringNameOrNum(false).c_str()); + append(locText); + append(": "); + } + void message(TPrefixType message, const char* s) { + prefix(message); + append(s); + append("\n"); + } + void message(TPrefixType message, const char* s, const TSourceLoc& loc) { + prefix(message); + location(loc); + append(s); + append("\n"); + } + + void setOutputStream(int output = 4) + { + outputStream = output; + } + +protected: + void append(const char* s); + + void append(int count, char c); + void append(const TPersistString& t); + void append(const TString& t); + + void checkMem(size_t growth) { if (sink.capacity() < sink.size() + growth + 2) + sink.reserve(sink.capacity() + sink.capacity() / 2); } + void appendToStream(const char* s); + TPersistString sink; + int outputStream; +}; + +} // end namespace glslang + +class TInfoSink { +public: + glslang::TInfoSinkBase info; + glslang::TInfoSinkBase debug; +}; + +#endif // _INFOSINK_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/InitializeGlobals.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/InitializeGlobals.h new file mode 100644 index 0000000..95d0a40 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/InitializeGlobals.h @@ -0,0 +1,44 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef __INITIALIZE_GLOBALS_INCLUDED_ +#define __INITIALIZE_GLOBALS_INCLUDED_ + +namespace glslang { + +bool InitializePoolIndex(); + +} // end namespace glslang + +#endif // __INITIALIZE_GLOBALS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/PoolAlloc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/PoolAlloc.h new file mode 100644 index 0000000..1f5cac7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/PoolAlloc.h @@ -0,0 +1,318 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _POOLALLOC_INCLUDED_ +#define _POOLALLOC_INCLUDED_ + +#ifdef _DEBUG +# define GUARD_BLOCKS // define to enable guard block sanity checking +#endif + +// +// This header defines an allocator that can be used to efficiently +// allocate a large number of small requests for heap memory, with the +// intention that they are not individually deallocated, but rather +// collectively deallocated at one time. +// +// This simultaneously +// +// * Makes each individual allocation much more efficient; the +// typical allocation is trivial. +// * Completely avoids the cost of doing individual deallocation. +// * Saves the trouble of tracking down and plugging a large class of leaks. +// +// Individual classes can use this allocator by supplying their own +// new and delete methods. +// +// STL containers can use this allocator by using the pool_allocator +// class as the allocator (second) template argument. +// + +#include +#include +#include + +namespace glslang { + +// If we are using guard blocks, we must track each individual +// allocation. If we aren't using guard blocks, these +// never get instantiated, so won't have any impact. +// + +class TAllocation { +public: + TAllocation(size_t size, unsigned char* mem, TAllocation* prev = 0) : + size(size), mem(mem), prevAlloc(prev) { + // Allocations are bracketed: + // [allocationHeader][initialGuardBlock][userData][finalGuardBlock] + // This would be cleaner with if (guardBlockSize)..., but that + // makes the compiler print warnings about 0 length memsets, + // even with the if() protecting them. +# ifdef GUARD_BLOCKS + memset(preGuard(), guardBlockBeginVal, guardBlockSize); + memset(data(), userDataFill, size); + memset(postGuard(), guardBlockEndVal, guardBlockSize); +# endif + } + + void check() const { + checkGuardBlock(preGuard(), guardBlockBeginVal, "before"); + checkGuardBlock(postGuard(), guardBlockEndVal, "after"); + } + + void checkAllocList() const; + + // Return total size needed to accommodate user buffer of 'size', + // plus our tracking data. + inline static size_t allocationSize(size_t size) { + return size + 2 * guardBlockSize + headerSize(); + } + + // Offset from surrounding buffer to get to user data buffer. + inline static unsigned char* offsetAllocation(unsigned char* m) { + return m + guardBlockSize + headerSize(); + } + +private: + void checkGuardBlock(unsigned char* blockMem, unsigned char val, const char* locText) const; + + // Find offsets to pre and post guard blocks, and user data buffer + unsigned char* preGuard() const { return mem + headerSize(); } + unsigned char* data() const { return preGuard() + guardBlockSize; } + unsigned char* postGuard() const { return data() + size; } + + size_t size; // size of the user data area + unsigned char* mem; // beginning of our allocation (pts to header) + TAllocation* prevAlloc; // prior allocation in the chain + + const static unsigned char guardBlockBeginVal; + const static unsigned char guardBlockEndVal; + const static unsigned char userDataFill; + + const static size_t guardBlockSize; +# ifdef GUARD_BLOCKS + inline static size_t headerSize() { return sizeof(TAllocation); } +# else + inline static size_t headerSize() { return 0; } +# endif +}; + +// +// There are several stacks. One is to track the pushing and popping +// of the user, and not yet implemented. The others are simply a +// repositories of free pages or used pages. +// +// Page stacks are linked together with a simple header at the beginning +// of each allocation obtained from the underlying OS. Multi-page allocations +// are returned to the OS. Individual page allocations are kept for future +// re-use. +// +// The "page size" used is not, nor must it match, the underlying OS +// page size. But, having it be about that size or equal to a set of +// pages is likely most optimal. +// +class TPoolAllocator { +public: + TPoolAllocator(int growthIncrement = 8*1024, int allocationAlignment = 16); + + // + // Don't call the destructor just to free up the memory, call pop() + // + ~TPoolAllocator(); + + // + // Call push() to establish a new place to pop memory too. Does not + // have to be called to get things started. + // + void push(); + + // + // Call pop() to free all memory allocated since the last call to push(), + // or if no last call to push, frees all memory since first allocation. + // + void pop(); + + // + // Call popAll() to free all memory allocated. + // + void popAll(); + + // + // Call allocate() to actually acquire memory. Returns 0 if no memory + // available, otherwise a properly aligned pointer to 'numBytes' of memory. + // + void* allocate(size_t numBytes); + + // + // There is no deallocate. The point of this class is that + // deallocation can be skipped by the user of it, as the model + // of use is to simultaneously deallocate everything at once + // by calling pop(), and to not have to solve memory leak problems. + // + +protected: + friend struct tHeader; + + struct tHeader { + tHeader(tHeader* nextPage, size_t pageCount) : +#ifdef GUARD_BLOCKS + lastAllocation(0), +#endif + nextPage(nextPage), pageCount(pageCount) { } + + ~tHeader() { +#ifdef GUARD_BLOCKS + if (lastAllocation) + lastAllocation->checkAllocList(); +#endif + } + +#ifdef GUARD_BLOCKS + TAllocation* lastAllocation; +#endif + tHeader* nextPage; + size_t pageCount; + }; + + struct tAllocState { + size_t offset; + tHeader* page; + }; + typedef std::vector tAllocStack; + + // Track allocations if and only if we're using guard blocks +#ifndef GUARD_BLOCKS + void* initializeAllocation(tHeader*, unsigned char* memory, size_t) { +#else + void* initializeAllocation(tHeader* block, unsigned char* memory, size_t numBytes) { + new(memory) TAllocation(numBytes, memory, block->lastAllocation); + block->lastAllocation = reinterpret_cast(memory); +#endif + + // This is optimized entirely away if GUARD_BLOCKS is not defined. + return TAllocation::offsetAllocation(memory); + } + + size_t pageSize; // granularity of allocation from the OS + size_t alignment; // all returned allocations will be aligned at + // this granularity, which will be a power of 2 + size_t alignmentMask; + size_t headerSkip; // amount of memory to skip to make room for the + // header (basically, size of header, rounded + // up to make it aligned + size_t currentPageOffset; // next offset in top of inUseList to allocate from + tHeader* freeList; // list of popped memory + tHeader* inUseList; // list of all memory currently being used + tAllocStack stack; // stack of where to allocate from, to partition pool + + int numCalls; // just an interesting statistic + size_t totalBytes; // just an interesting statistic +private: + TPoolAllocator& operator=(const TPoolAllocator&); // don't allow assignment operator + TPoolAllocator(const TPoolAllocator&); // don't allow default copy constructor +}; + +// +// There could potentially be many pools with pops happening at +// different times. But a simple use is to have a global pop +// with everyone using the same global allocator. +// +extern TPoolAllocator& GetThreadPoolAllocator(); +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); + +// +// This STL compatible allocator is intended to be used as the allocator +// parameter to templatized STL containers, like vector and map. +// +// It will use the pools for allocation, and not +// do any deallocation, but will still do destruction. +// +template +class pool_allocator { +public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + template + struct rebind { + typedef pool_allocator other; + }; + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pool_allocator() : allocator(GetThreadPoolAllocator()) { } + pool_allocator(TPoolAllocator& a) : allocator(a) { } + pool_allocator(const pool_allocator& p) : allocator(p.allocator) { } + + template + pool_allocator(const pool_allocator& p) : allocator(p.getAllocator()) { } + + pointer allocate(size_type n) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + pointer allocate(size_type n, const void*) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + + void deallocate(void*, size_type) { } + void deallocate(pointer, size_type) { } + + pointer _Charalloc(size_t n) { + return reinterpret_cast(getAllocator().allocate(n)); } + + void construct(pointer p, const T& val) { new ((void *)p) T(val); } + void destroy(pointer p) { p->T::~T(); } + + bool operator==(const pool_allocator& rhs) const { return &getAllocator() == &rhs.getAllocator(); } + bool operator!=(const pool_allocator& rhs) const { return &getAllocator() != &rhs.getAllocator(); } + + size_type max_size() const { return static_cast(-1) / sizeof(T); } + size_type max_size(int size) const { return static_cast(-1) / size; } + + TPoolAllocator& getAllocator() const { return allocator; } + + pool_allocator select_on_container_copy_construction() const { return pool_allocator{}; } + +protected: + pool_allocator& operator=(const pool_allocator&) { return *this; } + TPoolAllocator& allocator; +}; + +} // end namespace glslang + +#endif // _POOLALLOC_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ResourceLimits.h new file mode 100644 index 0000000..b36c8d6 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ResourceLimits.h @@ -0,0 +1,159 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _RESOURCE_LIMITS_INCLUDED_ +#define _RESOURCE_LIMITS_INCLUDED_ + +struct TLimits { + bool nonInductiveForLoops; + bool whileLoops; + bool doWhileLoops; + bool generalUniformIndexing; + bool generalAttributeMatrixVectorIndexing; + bool generalVaryingIndexing; + bool generalSamplerIndexing; + bool generalVariableIndexing; + bool generalConstantMatrixVectorIndexing; +}; + +struct TBuiltInResource { + int maxLights; + int maxClipPlanes; + int maxTextureUnits; + int maxTextureCoords; + int maxVertexAttribs; + int maxVertexUniformComponents; + int maxVaryingFloats; + int maxVertexTextureImageUnits; + int maxCombinedTextureImageUnits; + int maxTextureImageUnits; + int maxFragmentUniformComponents; + int maxDrawBuffers; + int maxVertexUniformVectors; + int maxVaryingVectors; + int maxFragmentUniformVectors; + int maxVertexOutputVectors; + int maxFragmentInputVectors; + int minProgramTexelOffset; + int maxProgramTexelOffset; + int maxClipDistances; + int maxComputeWorkGroupCountX; + int maxComputeWorkGroupCountY; + int maxComputeWorkGroupCountZ; + int maxComputeWorkGroupSizeX; + int maxComputeWorkGroupSizeY; + int maxComputeWorkGroupSizeZ; + int maxComputeUniformComponents; + int maxComputeTextureImageUnits; + int maxComputeImageUniforms; + int maxComputeAtomicCounters; + int maxComputeAtomicCounterBuffers; + int maxVaryingComponents; + int maxVertexOutputComponents; + int maxGeometryInputComponents; + int maxGeometryOutputComponents; + int maxFragmentInputComponents; + int maxImageUnits; + int maxCombinedImageUnitsAndFragmentOutputs; + int maxCombinedShaderOutputResources; + int maxImageSamples; + int maxVertexImageUniforms; + int maxTessControlImageUniforms; + int maxTessEvaluationImageUniforms; + int maxGeometryImageUniforms; + int maxFragmentImageUniforms; + int maxCombinedImageUniforms; + int maxGeometryTextureImageUnits; + int maxGeometryOutputVertices; + int maxGeometryTotalOutputComponents; + int maxGeometryUniformComponents; + int maxGeometryVaryingComponents; + int maxTessControlInputComponents; + int maxTessControlOutputComponents; + int maxTessControlTextureImageUnits; + int maxTessControlUniformComponents; + int maxTessControlTotalOutputComponents; + int maxTessEvaluationInputComponents; + int maxTessEvaluationOutputComponents; + int maxTessEvaluationTextureImageUnits; + int maxTessEvaluationUniformComponents; + int maxTessPatchComponents; + int maxPatchVertices; + int maxTessGenLevel; + int maxViewports; + int maxVertexAtomicCounters; + int maxTessControlAtomicCounters; + int maxTessEvaluationAtomicCounters; + int maxGeometryAtomicCounters; + int maxFragmentAtomicCounters; + int maxCombinedAtomicCounters; + int maxAtomicCounterBindings; + int maxVertexAtomicCounterBuffers; + int maxTessControlAtomicCounterBuffers; + int maxTessEvaluationAtomicCounterBuffers; + int maxGeometryAtomicCounterBuffers; + int maxFragmentAtomicCounterBuffers; + int maxCombinedAtomicCounterBuffers; + int maxAtomicCounterBufferSize; + int maxTransformFeedbackBuffers; + int maxTransformFeedbackInterleavedComponents; + int maxCullDistances; + int maxCombinedClipAndCullDistances; + int maxSamples; + int maxMeshOutputVerticesNV; + int maxMeshOutputPrimitivesNV; + int maxMeshWorkGroupSizeX_NV; + int maxMeshWorkGroupSizeY_NV; + int maxMeshWorkGroupSizeZ_NV; + int maxTaskWorkGroupSizeX_NV; + int maxTaskWorkGroupSizeY_NV; + int maxTaskWorkGroupSizeZ_NV; + int maxMeshViewCountNV; + int maxMeshOutputVerticesEXT; + int maxMeshOutputPrimitivesEXT; + int maxMeshWorkGroupSizeX_EXT; + int maxMeshWorkGroupSizeY_EXT; + int maxMeshWorkGroupSizeZ_EXT; + int maxTaskWorkGroupSizeX_EXT; + int maxTaskWorkGroupSizeY_EXT; + int maxTaskWorkGroupSizeZ_EXT; + int maxMeshViewCountEXT; + int maxDualSourceDrawBuffersEXT; + + TLimits limits; +}; + +#endif // _RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ShHandle.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ShHandle.h new file mode 100644 index 0000000..df07bd8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/ShHandle.h @@ -0,0 +1,176 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SHHANDLE_INCLUDED_ +#define _SHHANDLE_INCLUDED_ + +// +// Machine independent part of the compiler private objects +// sent as ShHandle to the driver. +// +// This should not be included by driver code. +// + +#define SH_EXPORTING +#include "../Public/ShaderLang.h" +#include "../MachineIndependent/Versions.h" +#include "InfoSink.h" + +class TCompiler; +class TLinker; +class TUniformMap; + +// +// The base class used to back handles returned to the driver. +// +class TShHandleBase { +public: + TShHandleBase() { pool = new glslang::TPoolAllocator; } + virtual ~TShHandleBase() { delete pool; } + virtual TCompiler* getAsCompiler() { return 0; } + virtual TLinker* getAsLinker() { return 0; } + virtual TUniformMap* getAsUniformMap() { return 0; } + virtual glslang::TPoolAllocator* getPool() const { return pool; } +private: + glslang::TPoolAllocator* pool; +}; + +// +// The base class for the machine dependent linker to derive from +// for managing where uniforms live. +// +class TUniformMap : public TShHandleBase { +public: + TUniformMap() { } + virtual ~TUniformMap() { } + virtual TUniformMap* getAsUniformMap() { return this; } + virtual int getLocation(const char* name) = 0; + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink infoSink; +}; + +class TIntermNode; + +// +// The base class for the machine dependent compiler to derive from +// for managing object code from the compile. +// +class TCompiler : public TShHandleBase { +public: + TCompiler(EShLanguage l, TInfoSink& sink) : infoSink(sink) , language(l), haveValidObjectCode(false) { } + virtual ~TCompiler() { } + EShLanguage getLanguage() { return language; } + virtual TInfoSink& getInfoSink() { return infoSink; } + + virtual bool compile(TIntermNode* root, int version = 0, EProfile profile = ENoProfile) = 0; + + virtual TCompiler* getAsCompiler() { return this; } + virtual bool linkable() { return haveValidObjectCode; } + + TInfoSink& infoSink; +protected: + TCompiler& operator=(TCompiler&); + + EShLanguage language; + bool haveValidObjectCode; +}; + +// +// Link operations are based on a list of compile results... +// +typedef glslang::TVector TCompilerList; +typedef glslang::TVector THandleList; + +// +// The base class for the machine dependent linker to derive from +// to manage the resulting executable. +// + +class TLinker : public TShHandleBase { +public: + TLinker(EShExecutable e, TInfoSink& iSink) : + infoSink(iSink), + executable(e), + haveReturnableObjectCode(false), + appAttributeBindings(0), + fixedAttributeBindings(0), + excludedAttributes(0), + excludedCount(0), + uniformBindings(0) { } + virtual TLinker* getAsLinker() { return this; } + virtual ~TLinker() { } + virtual bool link(TCompilerList&, TUniformMap*) = 0; + virtual bool link(THandleList&) { return false; } + virtual void setAppAttributeBindings(const ShBindingTable* t) { appAttributeBindings = t; } + virtual void setFixedAttributeBindings(const ShBindingTable* t) { fixedAttributeBindings = t; } + virtual void getAttributeBindings(ShBindingTable const **t) const = 0; + virtual void setExcludedAttributes(const int* attributes, int count) { excludedAttributes = attributes; excludedCount = count; } + virtual ShBindingTable* getUniformBindings() const { return uniformBindings; } + virtual const void* getObjectCode() const { return 0; } // a real compiler would be returning object code here + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink& infoSink; +protected: + TLinker& operator=(TLinker&); + EShExecutable executable; + bool haveReturnableObjectCode; // true when objectCode is acceptable to send to driver + + const ShBindingTable* appAttributeBindings; + const ShBindingTable* fixedAttributeBindings; + const int* excludedAttributes; + int excludedCount; + ShBindingTable* uniformBindings; // created by the linker +}; + +// +// This is the interface between the machine independent code +// and the machine dependent code. +// +// The machine dependent code should derive from the classes +// above. Then Construct*() and Delete*() will create and +// destroy the machine dependent objects, which contain the +// above machine independent information. +// +TCompiler* ConstructCompiler(EShLanguage, int); + +TShHandleBase* ConstructLinker(EShExecutable, int); +TShHandleBase* ConstructBindings(); +void DeleteLinker(TShHandleBase*); +void DeleteBindingList(TShHandleBase* bindingList); + +TUniformMap* ConstructUniformMap(); +void DeleteCompiler(TCompiler*); + +void DeleteUniformMap(TUniformMap*); + +#endif // _SHHANDLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/SpirvIntrinsics.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/SpirvIntrinsics.h new file mode 100644 index 0000000..3c7d72c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/SpirvIntrinsics.h @@ -0,0 +1,128 @@ +// +// Copyright(C) 2021 Advanced Micro Devices, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#ifndef GLSLANG_WEB + +// +// GL_EXT_spirv_intrinsics +// +#include "Common.h" + +namespace glslang { + +class TIntermTyped; +class TIntermConstantUnion; +class TType; + +// SPIR-V requirements +struct TSpirvRequirement { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // capability = [..] + TSet extensions; + // extension = [..] + TSet capabilities; +}; + +// SPIR-V execution modes +struct TSpirvExecutionMode { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_execution_mode + TMap> modes; + // spirv_execution_mode_id + TMap > modeIds; +}; + +// SPIR-V decorations +struct TSpirvDecorate { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_decorate + TMap > decorates; + // spirv_decorate_id + TMap> decorateIds; + // spirv_decorate_string + TMap > decorateStrings; +}; + +// SPIR-V instruction +struct TSpirvInstruction { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvInstruction() { set = ""; id = -1; } + + bool operator==(const TSpirvInstruction& rhs) const { return set == rhs.set && id == rhs.id; } + bool operator!=(const TSpirvInstruction& rhs) const { return !operator==(rhs); } + + // spirv_instruction + TString set; + int id; +}; + +// SPIR-V type parameter +struct TSpirvTypeParameter { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvTypeParameter(const TIntermConstantUnion* arg) { constant = arg; } + + bool operator==(const TSpirvTypeParameter& rhs) const { return constant == rhs.constant; } + bool operator!=(const TSpirvTypeParameter& rhs) const { return !operator==(rhs); } + + const TIntermConstantUnion* constant; +}; + +typedef TVector TSpirvTypeParameters; + +// SPIR-V type +struct TSpirvType { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + bool operator==(const TSpirvType& rhs) const + { + return spirvInst == rhs.spirvInst && typeParams == rhs.typeParams; + } + bool operator!=(const TSpirvType& rhs) const { return !operator==(rhs); } + + // spirv_type + TSpirvInstruction spirvInst; + TSpirvTypeParameters typeParams; +}; + +} // end namespace glslang + +#endif // GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/Types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/Types.h new file mode 100644 index 0000000..a6f47e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/Types.h @@ -0,0 +1,2901 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2015-2016 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _TYPES_INCLUDED +#define _TYPES_INCLUDED + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" +#include "../Public/ShaderLang.h" +#include "arrays.h" +#include "SpirvIntrinsics.h" + +#include + +namespace glslang { + +class TIntermAggregate; + +const int GlslangMaxTypeLength = 200; // TODO: need to print block/struct one member per line, so this can stay bounded + +const char* const AnonymousPrefix = "anon@"; // for something like a block whose members can be directly accessed +inline bool IsAnonymous(const TString& name) +{ + return name.compare(0, 5, AnonymousPrefix) == 0; +} + +// +// Details within a sampler type +// +enum TSamplerDim { + EsdNone, + Esd1D, + Esd2D, + Esd3D, + EsdCube, + EsdRect, + EsdBuffer, + EsdSubpass, // goes only with non-sampled image (image is true) + EsdNumDims +}; + +struct TSampler { // misnomer now; includes images, textures without sampler, and textures with sampler + TBasicType type : 8; // type returned by sampler + TSamplerDim dim : 8; + bool arrayed : 1; + bool shadow : 1; + bool ms : 1; + bool image : 1; // image, combined should be false + bool combined : 1; // true means texture is combined with a sampler, false means texture with no sampler + bool sampler : 1; // true means a pure sampler, other fields should be clear() + +#ifdef GLSLANG_WEB + bool is1D() const { return false; } + bool isBuffer() const { return false; } + bool isRect() const { return false; } + bool isSubpass() const { return false; } + bool isCombined() const { return true; } + bool isImage() const { return false; } + bool isImageClass() const { return false; } + bool isMultiSample() const { return false; } + bool isExternal() const { return false; } + void setExternal(bool e) { } + bool isYuv() const { return false; } +#else + unsigned int vectorSize : 3; // vector return type size. + // Some languages support structures as sample results. Storing the whole structure in the + // TSampler is too large, so there is an index to a separate table. + static const unsigned structReturnIndexBits = 4; // number of index bits to use. + static const unsigned structReturnSlots = (1< TTypeList; + +typedef TVector TIdentifierList; + +// +// Following are a series of helper enums for managing layouts and qualifiers, +// used for TPublicType, TType, others. +// + +enum TLayoutPacking { + ElpNone, + ElpShared, // default, but different than saying nothing + ElpStd140, + ElpStd430, + ElpPacked, + ElpScalar, + ElpCount // If expanding, see bitfield width below +}; + +enum TLayoutMatrix { + ElmNone, + ElmRowMajor, + ElmColumnMajor, // default, but different than saying nothing + ElmCount // If expanding, see bitfield width below +}; + +// Union of geometry shader and tessellation shader geometry types. +// They don't go into TType, but rather have current state per shader or +// active parser type (TPublicType). +enum TLayoutGeometry { + ElgNone, + ElgPoints, + ElgLines, + ElgLinesAdjacency, + ElgLineStrip, + ElgTriangles, + ElgTrianglesAdjacency, + ElgTriangleStrip, + ElgQuads, + ElgIsolines, +}; + +enum TVertexSpacing { + EvsNone, + EvsEqual, + EvsFractionalEven, + EvsFractionalOdd +}; + +enum TVertexOrder { + EvoNone, + EvoCw, + EvoCcw +}; + +// Note: order matters, as type of format is done by comparison. +enum TLayoutFormat { + ElfNone, + + // Float image + ElfRgba32f, + ElfRgba16f, + ElfR32f, + ElfRgba8, + ElfRgba8Snorm, + + ElfEsFloatGuard, // to help with comparisons + + ElfRg32f, + ElfRg16f, + ElfR11fG11fB10f, + ElfR16f, + ElfRgba16, + ElfRgb10A2, + ElfRg16, + ElfRg8, + ElfR16, + ElfR8, + ElfRgba16Snorm, + ElfRg16Snorm, + ElfRg8Snorm, + ElfR16Snorm, + ElfR8Snorm, + + ElfFloatGuard, // to help with comparisons + + // Int image + ElfRgba32i, + ElfRgba16i, + ElfRgba8i, + ElfR32i, + + ElfEsIntGuard, // to help with comparisons + + ElfRg32i, + ElfRg16i, + ElfRg8i, + ElfR16i, + ElfR8i, + ElfR64i, + + ElfIntGuard, // to help with comparisons + + // Uint image + ElfRgba32ui, + ElfRgba16ui, + ElfRgba8ui, + ElfR32ui, + + ElfEsUintGuard, // to help with comparisons + + ElfRg32ui, + ElfRg16ui, + ElfRgb10a2ui, + ElfRg8ui, + ElfR16ui, + ElfR8ui, + ElfR64ui, + + ElfCount +}; + +enum TLayoutDepth { + EldNone, + EldAny, + EldGreater, + EldLess, + EldUnchanged, + + EldCount +}; + +enum TLayoutStencil { + ElsNone, + ElsRefUnchangedFrontAMD, + ElsRefGreaterFrontAMD, + ElsRefLessFrontAMD, + ElsRefUnchangedBackAMD, + ElsRefGreaterBackAMD, + ElsRefLessBackAMD, + + ElsCount +}; + +enum TBlendEquationShift { + // No 'EBlendNone': + // These are used as bit-shift amounts. A mask of such shifts will have type 'int', + // and in that space, 0 means no bits set, or none. In this enum, 0 means (1 << 0), a bit is set. + EBlendMultiply, + EBlendScreen, + EBlendOverlay, + EBlendDarken, + EBlendLighten, + EBlendColordodge, + EBlendColorburn, + EBlendHardlight, + EBlendSoftlight, + EBlendDifference, + EBlendExclusion, + EBlendHslHue, + EBlendHslSaturation, + EBlendHslColor, + EBlendHslLuminosity, + EBlendAllEquations, + + EBlendCount +}; + +enum TInterlockOrdering { + EioNone, + EioPixelInterlockOrdered, + EioPixelInterlockUnordered, + EioSampleInterlockOrdered, + EioSampleInterlockUnordered, + EioShadingRateInterlockOrdered, + EioShadingRateInterlockUnordered, + + EioCount, +}; + +enum TShaderInterface +{ + // Includes both uniform blocks and buffer blocks + EsiUniform = 0, + EsiInput, + EsiOutput, + EsiNone, + + EsiCount +}; + +class TQualifier { +public: + static const int layoutNotSet = -1; + + void clear() + { + precision = EpqNone; + invariant = false; + makeTemporary(); + declaredBuiltIn = EbvNone; +#ifndef GLSLANG_WEB + noContraction = false; + nullInit = false; + spirvByReference = false; + spirvLiteral = false; +#endif + defaultBlock = false; + } + + // drop qualifiers that don't belong in a temporary variable + void makeTemporary() + { + semanticName = nullptr; + storage = EvqTemporary; + builtIn = EbvNone; + clearInterstage(); + clearMemory(); + specConstant = false; + nonUniform = false; + nullInit = false; + defaultBlock = false; + clearLayout(); +#ifndef GLSLANG_WEB + spirvStorageClass = -1; + spirvDecorate = nullptr; + spirvByReference = false; + spirvLiteral = false; +#endif + } + + void clearInterstage() + { + clearInterpolation(); +#ifndef GLSLANG_WEB + patch = false; + sample = false; +#endif + } + + void clearInterpolation() + { + centroid = false; + smooth = false; + flat = false; +#ifndef GLSLANG_WEB + nopersp = false; + explicitInterp = false; + pervertexNV = false; + perPrimitiveNV = false; + perViewNV = false; + perTaskNV = false; +#endif + pervertexEXT = false; + } + + void clearMemory() + { +#ifndef GLSLANG_WEB + coherent = false; + devicecoherent = false; + queuefamilycoherent = false; + workgroupcoherent = false; + subgroupcoherent = false; + shadercallcoherent = false; + nonprivate = false; + volatil = false; + restrict = false; + readonly = false; + writeonly = false; +#endif + } + + const char* semanticName; + TStorageQualifier storage : 6; + TBuiltInVariable builtIn : 9; + TBuiltInVariable declaredBuiltIn : 9; + static_assert(EbvLast < 256, "need to increase size of TBuiltInVariable bitfields!"); + TPrecisionQualifier precision : 3; + bool invariant : 1; // require canonical treatment for cross-shader invariance + bool centroid : 1; + bool smooth : 1; + bool flat : 1; + // having a constant_id is not sufficient: expressions have no id, but are still specConstant + bool specConstant : 1; + bool nonUniform : 1; + bool explicitOffset : 1; + bool defaultBlock : 1; // default blocks with matching names have structures merged when linking + +#ifdef GLSLANG_WEB + bool isWriteOnly() const { return false; } + bool isReadOnly() const { return false; } + bool isRestrict() const { return false; } + bool isCoherent() const { return false; } + bool isVolatile() const { return false; } + bool isSample() const { return false; } + bool isMemory() const { return false; } + bool isMemoryQualifierImageAndSSBOOnly() const { return false; } + bool bufferReferenceNeedsVulkanMemoryModel() const { return false; } + bool isInterpolation() const { return flat || smooth; } + bool isExplicitInterpolation() const { return false; } + bool isAuxiliary() const { return centroid; } + bool isPatch() const { return false; } + bool isNoContraction() const { return false; } + void setNoContraction() { } + bool isPervertexNV() const { return false; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() {} + bool isNullInit() const { return false; } + void setSpirvByReference() { } + bool isSpirvByReference() { return false; } + void setSpirvLiteral() { } + bool isSpirvLiteral() { return false; } +#else + bool noContraction: 1; // prevent contraction and reassociation, e.g., for 'precise' keyword, and expressions it affects + bool nopersp : 1; + bool explicitInterp : 1; + bool pervertexNV : 1; + bool pervertexEXT : 1; + bool perPrimitiveNV : 1; + bool perViewNV : 1; + bool perTaskNV : 1; + bool patch : 1; + bool sample : 1; + bool restrict : 1; + bool readonly : 1; + bool writeonly : 1; + bool coherent : 1; + bool volatil : 1; + bool devicecoherent : 1; + bool queuefamilycoherent : 1; + bool workgroupcoherent : 1; + bool subgroupcoherent : 1; + bool shadercallcoherent : 1; + bool nonprivate : 1; + bool nullInit : 1; + bool spirvByReference : 1; + bool spirvLiteral : 1; + bool isWriteOnly() const { return writeonly; } + bool isReadOnly() const { return readonly; } + bool isRestrict() const { return restrict; } + bool isCoherent() const { return coherent; } + bool isVolatile() const { return volatil; } + bool isSample() const { return sample; } + bool isMemory() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly || nonprivate; + } + bool isMemoryQualifierImageAndSSBOOnly() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly; + } + bool bufferReferenceNeedsVulkanMemoryModel() const + { + // include qualifiers that map to load/store availability/visibility/nonprivate memory access operands + return subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || nonprivate; + } + bool isInterpolation() const + { + return flat || smooth || nopersp || explicitInterp; + } + bool isExplicitInterpolation() const + { + return explicitInterp; + } + bool isAuxiliary() const + { + return centroid || patch || sample || pervertexNV || pervertexEXT; + } + bool isPatch() const { return patch; } + bool isNoContraction() const { return noContraction; } + void setNoContraction() { noContraction = true; } + bool isPervertexNV() const { return pervertexNV; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() { nullInit = true; } + bool isNullInit() const { return nullInit; } + void setSpirvByReference() { spirvByReference = true; } + bool isSpirvByReference() const { return spirvByReference; } + void setSpirvLiteral() { spirvLiteral = true; } + bool isSpirvLiteral() const { return spirvLiteral; } +#endif + + bool isPipeInput() const + { + switch (storage) { + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + return true; + default: + return false; + } + } + + bool isPipeOutput() const + { + switch (storage) { + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + bool isParamInput() const + { + switch (storage) { + case EvqIn: + case EvqInOut: + case EvqConstReadOnly: + return true; + default: + return false; + } + } + + bool isParamOutput() const + { + switch (storage) { + case EvqOut: + case EvqInOut: + return true; + default: + return false; + } + } + + bool isUniformOrBuffer() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + return true; + default: + return false; + } + } + + bool isUniform() const + { + switch (storage) { + case EvqUniform: + return true; + default: + return false; + } + } + + bool isIo() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + // non-built-in symbols that might link between compilation units + bool isLinkable() const + { + switch (storage) { + case EvqGlobal: + case EvqVaryingIn: + case EvqVaryingOut: + case EvqUniform: + case EvqBuffer: + case EvqShared: + return true; + default: + return false; + } + } + + TBlockStorageClass getBlockStorage() const { + if (storage == EvqUniform && !isPushConstant()) { + return EbsUniform; + } + else if (storage == EvqUniform) { + return EbsPushConstant; + } + else if (storage == EvqBuffer) { + return EbsStorageBuffer; + } + return EbsNone; + } + + void setBlockStorage(TBlockStorageClass newBacking) { +#ifndef GLSLANG_WEB + layoutPushConstant = (newBacking == EbsPushConstant); +#endif + switch (newBacking) { + case EbsUniform : + if (layoutPacking == ElpStd430) { + // std430 would not be valid + layoutPacking = ElpStd140; + } + storage = EvqUniform; + break; + case EbsStorageBuffer : + storage = EvqBuffer; + break; +#ifndef GLSLANG_WEB + case EbsPushConstant : + storage = EvqUniform; + layoutSet = TQualifier::layoutSetEnd; + layoutBinding = TQualifier::layoutBindingEnd; + break; +#endif + default: + break; + } + } + +#ifdef GLSLANG_WEB + bool isPerView() const { return false; } + bool isTaskMemory() const { return false; } + bool isArrayedIo(EShLanguage language) const { return false; } +#else + bool isPerPrimitive() const { return perPrimitiveNV; } + bool isPerView() const { return perViewNV; } + bool isTaskMemory() const { return perTaskNV; } + bool isTaskPayload() const { return storage == EvqtaskPayloadSharedEXT; } + bool isAnyPayload() const { + return storage == EvqPayload || storage == EvqPayloadIn; + } + bool isAnyCallable() const { + return storage == EvqCallableData || storage == EvqCallableDataIn; + } + + // True if this type of IO is supposed to be arrayed with extra level for per-vertex data + bool isArrayedIo(EShLanguage language) const + { + switch (language) { + case EShLangGeometry: + return isPipeInput(); + case EShLangTessControl: + return ! patch && (isPipeInput() || isPipeOutput()); + case EShLangTessEvaluation: + return ! patch && isPipeInput(); + case EShLangFragment: + return (pervertexNV || pervertexEXT) && isPipeInput(); + case EShLangMesh: + return ! perTaskNV && isPipeOutput(); + + default: + return false; + } + } +#endif + + // Implementing an embedded layout-qualifier class here, since C++ can't have a real class bitfield + void clearLayout() // all layout + { + clearUniformLayout(); + +#ifndef GLSLANG_WEB + layoutPushConstant = false; + layoutBufferReference = false; + layoutPassthrough = false; + layoutViewportRelative = false; + // -2048 as the default value indicating layoutSecondaryViewportRelative is not set + layoutSecondaryViewportRelativeOffset = -2048; + layoutShaderRecord = false; + layoutBufferReferenceAlign = layoutBufferReferenceAlignEnd; + layoutFormat = ElfNone; +#endif + + clearInterstageLayout(); + + layoutSpecConstantId = layoutSpecConstantIdEnd; + } + void clearInterstageLayout() + { + layoutLocation = layoutLocationEnd; + layoutComponent = layoutComponentEnd; +#ifndef GLSLANG_WEB + layoutIndex = layoutIndexEnd; + clearStreamLayout(); + clearXfbLayout(); +#endif + } + +#ifndef GLSLANG_WEB + void clearStreamLayout() + { + layoutStream = layoutStreamEnd; + } + void clearXfbLayout() + { + layoutXfbBuffer = layoutXfbBufferEnd; + layoutXfbStride = layoutXfbStrideEnd; + layoutXfbOffset = layoutXfbOffsetEnd; + } +#endif + + bool hasNonXfbLayout() const + { + return hasUniformLayout() || + hasAnyLocation() || + hasStream() || + hasFormat() || + isShaderRecord() || + isPushConstant() || + hasBufferReference(); + } + bool hasLayout() const + { + return hasNonXfbLayout() || + hasXfb(); + } + + TLayoutMatrix layoutMatrix : 3; + TLayoutPacking layoutPacking : 4; + int layoutOffset; + int layoutAlign; + + unsigned int layoutLocation : 12; + static const unsigned int layoutLocationEnd = 0xFFF; + + unsigned int layoutComponent : 3; + static const unsigned int layoutComponentEnd = 4; + + unsigned int layoutSet : 7; + static const unsigned int layoutSetEnd = 0x3F; + + unsigned int layoutBinding : 16; + static const unsigned int layoutBindingEnd = 0xFFFF; + + unsigned int layoutIndex : 8; + static const unsigned int layoutIndexEnd = 0xFF; + + unsigned int layoutStream : 8; + static const unsigned int layoutStreamEnd = 0xFF; + + unsigned int layoutXfbBuffer : 4; + static const unsigned int layoutXfbBufferEnd = 0xF; + + unsigned int layoutXfbStride : 14; + static const unsigned int layoutXfbStrideEnd = 0x3FFF; + + unsigned int layoutXfbOffset : 13; + static const unsigned int layoutXfbOffsetEnd = 0x1FFF; + + unsigned int layoutAttachment : 8; // for input_attachment_index + static const unsigned int layoutAttachmentEnd = 0XFF; + + unsigned int layoutSpecConstantId : 11; + static const unsigned int layoutSpecConstantIdEnd = 0x7FF; + +#ifndef GLSLANG_WEB + // stored as log2 of the actual alignment value + unsigned int layoutBufferReferenceAlign : 6; + static const unsigned int layoutBufferReferenceAlignEnd = 0x3F; + + TLayoutFormat layoutFormat : 8; + + bool layoutPushConstant; + bool layoutBufferReference; + bool layoutPassthrough; + bool layoutViewportRelative; + int layoutSecondaryViewportRelativeOffset; + bool layoutShaderRecord; + + // GL_EXT_spirv_intrinsics + int spirvStorageClass; + TSpirvDecorate* spirvDecorate; +#endif + + bool hasUniformLayout() const + { + return hasMatrix() || + hasPacking() || + hasOffset() || + hasBinding() || + hasSet() || + hasAlign(); + } + void clearUniformLayout() // only uniform specific + { + layoutMatrix = ElmNone; + layoutPacking = ElpNone; + layoutOffset = layoutNotSet; + layoutAlign = layoutNotSet; + + layoutSet = layoutSetEnd; + layoutBinding = layoutBindingEnd; +#ifndef GLSLANG_WEB + layoutAttachment = layoutAttachmentEnd; +#endif + } + + bool hasMatrix() const + { + return layoutMatrix != ElmNone; + } + bool hasPacking() const + { + return layoutPacking != ElpNone; + } + bool hasAlign() const + { + return layoutAlign != layoutNotSet; + } + bool hasAnyLocation() const + { + return hasLocation() || + hasComponent() || + hasIndex(); + } + bool hasLocation() const + { + return layoutLocation != layoutLocationEnd; + } + bool hasSet() const + { + return layoutSet != layoutSetEnd; + } + bool hasBinding() const + { + return layoutBinding != layoutBindingEnd; + } +#ifdef GLSLANG_WEB + bool hasOffset() const { return false; } + bool isNonPerspective() const { return false; } + bool hasIndex() const { return false; } + unsigned getIndex() const { return 0; } + bool hasComponent() const { return false; } + bool hasStream() const { return false; } + bool hasFormat() const { return false; } + bool hasXfb() const { return false; } + bool hasXfbBuffer() const { return false; } + bool hasXfbStride() const { return false; } + bool hasXfbOffset() const { return false; } + bool hasAttachment() const { return false; } + TLayoutFormat getFormat() const { return ElfNone; } + bool isPushConstant() const { return false; } + bool isShaderRecord() const { return false; } + bool hasBufferReference() const { return false; } + bool hasBufferReferenceAlign() const { return false; } + bool isNonUniform() const { return false; } +#else + bool hasOffset() const + { + return layoutOffset != layoutNotSet; + } + bool isNonPerspective() const { return nopersp; } + bool hasIndex() const + { + return layoutIndex != layoutIndexEnd; + } + unsigned getIndex() const { return layoutIndex; } + bool hasComponent() const + { + return layoutComponent != layoutComponentEnd; + } + bool hasStream() const + { + return layoutStream != layoutStreamEnd; + } + bool hasFormat() const + { + return layoutFormat != ElfNone; + } + bool hasXfb() const + { + return hasXfbBuffer() || + hasXfbStride() || + hasXfbOffset(); + } + bool hasXfbBuffer() const + { + return layoutXfbBuffer != layoutXfbBufferEnd; + } + bool hasXfbStride() const + { + return layoutXfbStride != layoutXfbStrideEnd; + } + bool hasXfbOffset() const + { + return layoutXfbOffset != layoutXfbOffsetEnd; + } + bool hasAttachment() const + { + return layoutAttachment != layoutAttachmentEnd; + } + TLayoutFormat getFormat() const { return layoutFormat; } + bool isPushConstant() const { return layoutPushConstant; } + bool isShaderRecord() const { return layoutShaderRecord; } + bool hasBufferReference() const { return layoutBufferReference; } + bool hasBufferReferenceAlign() const + { + return layoutBufferReferenceAlign != layoutBufferReferenceAlignEnd; + } + bool isNonUniform() const + { + return nonUniform; + } + + // GL_EXT_spirv_intrinsics + bool hasSprivDecorate() const { return spirvDecorate != nullptr; } + void setSpirvDecorate(int decoration, const TIntermAggregate* args = nullptr); + void setSpirvDecorateId(int decoration, const TIntermAggregate* args); + void setSpirvDecorateString(int decoration, const TIntermAggregate* args); + const TSpirvDecorate& getSpirvDecorate() const { assert(spirvDecorate); return *spirvDecorate; } + TSpirvDecorate& getSpirvDecorate() { assert(spirvDecorate); return *spirvDecorate; } + TString getSpirvDecorateQualifierString() const; +#endif + bool hasSpecConstantId() const + { + // Not the same thing as being a specialization constant, this + // is just whether or not it was declared with an ID. + return layoutSpecConstantId != layoutSpecConstantIdEnd; + } + bool isSpecConstant() const + { + // True if type is a specialization constant, whether or not it + // had a specialization-constant ID, and false if it is not a + // true front-end constant. + return specConstant; + } + bool isFrontEndConstant() const + { + // True if the front-end knows the final constant value. + // This allows front-end constant folding. + return storage == EvqConst && ! specConstant; + } + bool isConstant() const + { + // True if is either kind of constant; specialization or regular. + return isFrontEndConstant() || isSpecConstant(); + } + void makeSpecConstant() + { + storage = EvqConst; + specConstant = true; + } + static const char* getLayoutPackingString(TLayoutPacking packing) + { + switch (packing) { + case ElpStd140: return "std140"; +#ifndef GLSLANG_WEB + case ElpPacked: return "packed"; + case ElpShared: return "shared"; + case ElpStd430: return "std430"; + case ElpScalar: return "scalar"; +#endif + default: return "none"; + } + } + static const char* getLayoutMatrixString(TLayoutMatrix m) + { + switch (m) { + case ElmColumnMajor: return "column_major"; + case ElmRowMajor: return "row_major"; + default: return "none"; + } + } +#ifdef GLSLANG_WEB + static const char* getLayoutFormatString(TLayoutFormat f) { return "none"; } +#else + static const char* getLayoutFormatString(TLayoutFormat f) + { + switch (f) { + case ElfRgba32f: return "rgba32f"; + case ElfRgba16f: return "rgba16f"; + case ElfRg32f: return "rg32f"; + case ElfRg16f: return "rg16f"; + case ElfR11fG11fB10f: return "r11f_g11f_b10f"; + case ElfR32f: return "r32f"; + case ElfR16f: return "r16f"; + case ElfRgba16: return "rgba16"; + case ElfRgb10A2: return "rgb10_a2"; + case ElfRgba8: return "rgba8"; + case ElfRg16: return "rg16"; + case ElfRg8: return "rg8"; + case ElfR16: return "r16"; + case ElfR8: return "r8"; + case ElfRgba16Snorm: return "rgba16_snorm"; + case ElfRgba8Snorm: return "rgba8_snorm"; + case ElfRg16Snorm: return "rg16_snorm"; + case ElfRg8Snorm: return "rg8_snorm"; + case ElfR16Snorm: return "r16_snorm"; + case ElfR8Snorm: return "r8_snorm"; + + case ElfRgba32i: return "rgba32i"; + case ElfRgba16i: return "rgba16i"; + case ElfRgba8i: return "rgba8i"; + case ElfRg32i: return "rg32i"; + case ElfRg16i: return "rg16i"; + case ElfRg8i: return "rg8i"; + case ElfR32i: return "r32i"; + case ElfR16i: return "r16i"; + case ElfR8i: return "r8i"; + + case ElfRgba32ui: return "rgba32ui"; + case ElfRgba16ui: return "rgba16ui"; + case ElfRgba8ui: return "rgba8ui"; + case ElfRg32ui: return "rg32ui"; + case ElfRg16ui: return "rg16ui"; + case ElfRgb10a2ui: return "rgb10_a2ui"; + case ElfRg8ui: return "rg8ui"; + case ElfR32ui: return "r32ui"; + case ElfR16ui: return "r16ui"; + case ElfR8ui: return "r8ui"; + case ElfR64ui: return "r64ui"; + case ElfR64i: return "r64i"; + default: return "none"; + } + } + static const char* getLayoutDepthString(TLayoutDepth d) + { + switch (d) { + case EldAny: return "depth_any"; + case EldGreater: return "depth_greater"; + case EldLess: return "depth_less"; + case EldUnchanged: return "depth_unchanged"; + default: return "none"; + } + } + static const char* getLayoutStencilString(TLayoutStencil s) + { + switch (s) { + case ElsRefUnchangedFrontAMD: return "stencil_ref_unchanged_front_amd"; + case ElsRefGreaterFrontAMD: return "stencil_ref_greater_front_amd"; + case ElsRefLessFrontAMD: return "stencil_ref_less_front_amd"; + case ElsRefUnchangedBackAMD: return "stencil_ref_unchanged_back_amd"; + case ElsRefGreaterBackAMD: return "stencil_ref_greater_back_amd"; + case ElsRefLessBackAMD: return "stencil_ref_less_back_amd"; + default: return "none"; + } + } + static const char* getBlendEquationString(TBlendEquationShift e) + { + switch (e) { + case EBlendMultiply: return "blend_support_multiply"; + case EBlendScreen: return "blend_support_screen"; + case EBlendOverlay: return "blend_support_overlay"; + case EBlendDarken: return "blend_support_darken"; + case EBlendLighten: return "blend_support_lighten"; + case EBlendColordodge: return "blend_support_colordodge"; + case EBlendColorburn: return "blend_support_colorburn"; + case EBlendHardlight: return "blend_support_hardlight"; + case EBlendSoftlight: return "blend_support_softlight"; + case EBlendDifference: return "blend_support_difference"; + case EBlendExclusion: return "blend_support_exclusion"; + case EBlendHslHue: return "blend_support_hsl_hue"; + case EBlendHslSaturation: return "blend_support_hsl_saturation"; + case EBlendHslColor: return "blend_support_hsl_color"; + case EBlendHslLuminosity: return "blend_support_hsl_luminosity"; + case EBlendAllEquations: return "blend_support_all_equations"; + default: return "unknown"; + } + } + static const char* getGeometryString(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return "points"; + case ElgLines: return "lines"; + case ElgLinesAdjacency: return "lines_adjacency"; + case ElgLineStrip: return "line_strip"; + case ElgTriangles: return "triangles"; + case ElgTrianglesAdjacency: return "triangles_adjacency"; + case ElgTriangleStrip: return "triangle_strip"; + case ElgQuads: return "quads"; + case ElgIsolines: return "isolines"; + default: return "none"; + } + } + static const char* getVertexSpacingString(TVertexSpacing spacing) + { + switch (spacing) { + case EvsEqual: return "equal_spacing"; + case EvsFractionalEven: return "fractional_even_spacing"; + case EvsFractionalOdd: return "fractional_odd_spacing"; + default: return "none"; + } + } + static const char* getVertexOrderString(TVertexOrder order) + { + switch (order) { + case EvoCw: return "cw"; + case EvoCcw: return "ccw"; + default: return "none"; + } + } + static int mapGeometryToSize(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return 1; + case ElgLines: return 2; + case ElgLinesAdjacency: return 4; + case ElgTriangles: return 3; + case ElgTrianglesAdjacency: return 6; + default: return 0; + } + } + static const char* getInterlockOrderingString(TInterlockOrdering order) + { + switch (order) { + case EioPixelInterlockOrdered: return "pixel_interlock_ordered"; + case EioPixelInterlockUnordered: return "pixel_interlock_unordered"; + case EioSampleInterlockOrdered: return "sample_interlock_ordered"; + case EioSampleInterlockUnordered: return "sample_interlock_unordered"; + case EioShadingRateInterlockOrdered: return "shading_rate_interlock_ordered"; + case EioShadingRateInterlockUnordered: return "shading_rate_interlock_unordered"; + default: return "none"; + } + } +#endif +}; + +// Qualifiers that don't need to be keep per object. They have shader scope, not object scope. +// So, they will not be part of TType, TQualifier, etc. +struct TShaderQualifiers { + TLayoutGeometry geometry; // geometry/tessellation shader in/out primitives + bool pixelCenterInteger; // fragment shader + bool originUpperLeft; // fragment shader + int invocations; + int vertices; // for tessellation "vertices", geometry & mesh "max_vertices" + TVertexSpacing spacing; + TVertexOrder order; + bool pointMode; + int localSize[3]; // compute shader + bool localSizeNotDefault[3]; // compute shader + int localSizeSpecId[3]; // compute shader specialization id for gl_WorkGroupSize +#ifndef GLSLANG_WEB + bool earlyFragmentTests; // fragment input + bool postDepthCoverage; // fragment input + bool earlyAndLateFragmentTestsAMD; //fragment input + TLayoutDepth layoutDepth; + TLayoutStencil layoutStencil; + bool blendEquation; // true if any blend equation was specified + int numViews; // multiview extenstions + TInterlockOrdering interlockOrdering; + bool layoutOverrideCoverage; // true if layout override_coverage set + bool layoutDerivativeGroupQuads; // true if layout derivative_group_quadsNV set + bool layoutDerivativeGroupLinear; // true if layout derivative_group_linearNV set + int primitives; // mesh shader "max_primitives"DerivativeGroupLinear; // true if layout derivative_group_linearNV set + bool layoutPrimitiveCulling; // true if layout primitive_culling set + TLayoutDepth getDepth() const { return layoutDepth; } + TLayoutStencil getStencil() const { return layoutStencil; } +#else + TLayoutDepth getDepth() const { return EldNone; } +#endif + + void init() + { + geometry = ElgNone; + originUpperLeft = false; + pixelCenterInteger = false; + invocations = TQualifier::layoutNotSet; + vertices = TQualifier::layoutNotSet; + spacing = EvsNone; + order = EvoNone; + pointMode = false; + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + earlyFragmentTests = false; + earlyAndLateFragmentTestsAMD = false; + postDepthCoverage = false; + layoutDepth = EldNone; + layoutStencil = ElsNone; + blendEquation = false; + numViews = TQualifier::layoutNotSet; + layoutOverrideCoverage = false; + layoutDerivativeGroupQuads = false; + layoutDerivativeGroupLinear = false; + layoutPrimitiveCulling = false; + primitives = TQualifier::layoutNotSet; + interlockOrdering = EioNone; +#endif + } + +#ifdef GLSLANG_WEB + bool hasBlendEquation() const { return false; } +#else + bool hasBlendEquation() const { return blendEquation; } +#endif + + // Merge in characteristics from the 'src' qualifier. They can override when + // set, but never erase when not set. + void merge(const TShaderQualifiers& src) + { + if (src.geometry != ElgNone) + geometry = src.geometry; + if (src.pixelCenterInteger) + pixelCenterInteger = src.pixelCenterInteger; + if (src.originUpperLeft) + originUpperLeft = src.originUpperLeft; + if (src.invocations != TQualifier::layoutNotSet) + invocations = src.invocations; + if (src.vertices != TQualifier::layoutNotSet) + vertices = src.vertices; + if (src.spacing != EvsNone) + spacing = src.spacing; + if (src.order != EvoNone) + order = src.order; + if (src.pointMode) + pointMode = true; + for (int i = 0; i < 3; ++i) { + if (src.localSize[i] > 1) + localSize[i] = src.localSize[i]; + } + for (int i = 0; i < 3; ++i) { + localSizeNotDefault[i] = src.localSizeNotDefault[i] || localSizeNotDefault[i]; + } + for (int i = 0; i < 3; ++i) { + if (src.localSizeSpecId[i] != TQualifier::layoutNotSet) + localSizeSpecId[i] = src.localSizeSpecId[i]; + } +#ifndef GLSLANG_WEB + if (src.earlyFragmentTests) + earlyFragmentTests = true; + if (src.earlyAndLateFragmentTestsAMD) + earlyAndLateFragmentTestsAMD = true; + if (src.postDepthCoverage) + postDepthCoverage = true; + if (src.layoutDepth) + layoutDepth = src.layoutDepth; + if (src.layoutStencil) + layoutStencil = src.layoutStencil; + if (src.blendEquation) + blendEquation = src.blendEquation; + if (src.numViews != TQualifier::layoutNotSet) + numViews = src.numViews; + if (src.layoutOverrideCoverage) + layoutOverrideCoverage = src.layoutOverrideCoverage; + if (src.layoutDerivativeGroupQuads) + layoutDerivativeGroupQuads = src.layoutDerivativeGroupQuads; + if (src.layoutDerivativeGroupLinear) + layoutDerivativeGroupLinear = src.layoutDerivativeGroupLinear; + if (src.primitives != TQualifier::layoutNotSet) + primitives = src.primitives; + if (src.interlockOrdering != EioNone) + interlockOrdering = src.interlockOrdering; + if (src.layoutPrimitiveCulling) + layoutPrimitiveCulling = src.layoutPrimitiveCulling; +#endif + } +}; + +// +// TPublicType is just temporarily used while parsing and not quite the same +// information kept per node in TType. Due to the bison stack, it can't have +// types that it thinks have non-trivial constructors. It should +// just be used while recognizing the grammar, not anything else. +// Once enough is known about the situation, the proper information +// moved into a TType, or the parse context, etc. +// +class TPublicType { +public: + TBasicType basicType; + TSampler sampler; + TQualifier qualifier; + TShaderQualifiers shaderQualifiers; + int vectorSize : 4; + int matrixCols : 4; + int matrixRows : 4; + bool coopmat : 1; + TArraySizes* arraySizes; + const TType* userDef; + TSourceLoc loc; + TArraySizes* typeParameters; +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type directive + TSpirvType* spirvType; +#endif + +#ifdef GLSLANG_WEB + bool isCoopmat() const { return false; } +#else + bool isCoopmat() const { return coopmat; } +#endif + + void initType(const TSourceLoc& l) + { + basicType = EbtVoid; + vectorSize = 1; + matrixRows = 0; + matrixCols = 0; + arraySizes = nullptr; + userDef = nullptr; + loc = l; + typeParameters = nullptr; + coopmat = false; +#ifndef GLSLANG_WEB + spirvType = nullptr; +#endif + } + + void initQualifiers(bool global = false) + { + qualifier.clear(); + if (global) + qualifier.storage = EvqGlobal; + } + + void init(const TSourceLoc& l, bool global = false) + { + initType(l); + sampler.clear(); + initQualifiers(global); + shaderQualifiers.init(); + } + + void setVector(int s) + { + matrixRows = 0; + matrixCols = 0; + vectorSize = s; + } + + void setMatrix(int c, int r) + { + matrixRows = r; + matrixCols = c; + vectorSize = 0; + } + + bool isScalar() const + { + return matrixCols == 0 && vectorSize == 1 && arraySizes == nullptr && userDef == nullptr; + } + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + void setSpirvType(const TSpirvInstruction& spirvInst, const TSpirvTypeParameters* typeParams = nullptr); +#endif + + // "Image" is a superset of "Subpass" + bool isImage() const { return basicType == EbtSampler && sampler.isImage(); } + bool isSubpass() const { return basicType == EbtSampler && sampler.isSubpass(); } +}; + +// +// Base class for things that have a type. +// +class TType { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // for "empty" type (no args) or simple scalar/vector/matrix + explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for explicit precision qualifier + TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + qualifier.precision = p; + assert(p >= EpqNone && p <= EpqHigh); + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for turning a TPublicType into a TType, using a shallow copy + explicit TType(const TPublicType& p) : + basicType(p.basicType), + vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), coopmat(p.coopmat), + arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters) +#ifndef GLSLANG_WEB + , spirvType(p.spirvType) +#endif + { + if (basicType == EbtSampler) + sampler = p.sampler; + else + sampler.clear(); + qualifier = p.qualifier; + if (p.userDef) { + if (p.userDef->basicType == EbtReference) { + basicType = EbtReference; + referentType = p.userDef->referentType; + } else { + structure = p.userDef->getWritableStruct(); // public type is short-lived; there are no sharing issues + } + typeName = NewPoolTString(p.userDef->getTypeName().c_str()); + } + if (p.isCoopmat() && p.typeParameters && p.typeParameters->getNumDims() > 0) { + int numBits = p.typeParameters->getDimSize(0); + if (p.basicType == EbtFloat && numBits == 16) { + basicType = EbtFloat16; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtUint && numBits == 8) { + basicType = EbtUint8; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtInt && numBits == 8) { + basicType = EbtInt8; + qualifier.precision = EpqNone; + } + } + } + // for construction of sampler types + TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) : + basicType(EbtSampler), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), + sampler(sampler), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + qualifier.clear(); + qualifier.storage = q; + } + // to efficiently make a dereferenced type + // without ever duplicating the outer structure that will be thrown away + // and using only shallow copy + TType(const TType& type, int derefIndex, bool rowMajor = false) + { + if (type.isArray()) { + shallowCopy(type); + if (type.getArraySizes()->getNumDims() == 1) { + arraySizes = nullptr; + } else { + // want our own copy of the array, so we can edit it + arraySizes = new TArraySizes; + arraySizes->copyDereferenced(*type.arraySizes); + } + } else if (type.basicType == EbtStruct || type.basicType == EbtBlock) { + // do a structure dereference + const TTypeList& memberList = *type.getStruct(); + shallowCopy(*memberList[derefIndex].type); + return; + } else { + // do a vector/matrix dereference + shallowCopy(type); + if (matrixCols > 0) { + // dereference from matrix to vector + if (rowMajor) + vectorSize = matrixCols; + else + vectorSize = matrixRows; + matrixCols = 0; + matrixRows = 0; + if (vectorSize == 1) + vector1 = true; + } else if (isVector()) { + // dereference from vector to scalar + vectorSize = 1; + vector1 = false; + } else if (isCoopMat()) { + coopmat = false; + typeParameters = nullptr; + } + } + } + // for making structures, ... + TType(TTypeList* userDef, const TString& n) : + basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + typeName = NewPoolTString(n.c_str()); + } + // For interface blocks + TType(TTypeList* userDef, const TString& n, const TQualifier& q) : + basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + typeName = NewPoolTString(n.c_str()); + } + // for block reference (first parameter must be EbtReference) + explicit TType(TBasicType t, const TType &p, const TString& n) : + basicType(t), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + assert(t == EbtReference); + typeName = NewPoolTString(n.c_str()); + qualifier.clear(); + qualifier.storage = p.qualifier.storage; + referentType = p.clone(); + } + virtual ~TType() {} + + // Not for use across pool pops; it will cause multiple instances of TType to point to the same information. + // This only works if that information (like a structure's list of types) does not change and + // the instances are sharing the same pool. + void shallowCopy(const TType& copyOf) + { + basicType = copyOf.basicType; + sampler = copyOf.sampler; + qualifier = copyOf.qualifier; + vectorSize = copyOf.vectorSize; + matrixCols = copyOf.matrixCols; + matrixRows = copyOf.matrixRows; + vector1 = copyOf.vector1; + arraySizes = copyOf.arraySizes; // copying the pointer only, not the contents + fieldName = copyOf.fieldName; + typeName = copyOf.typeName; + if (isStruct()) { + structure = copyOf.structure; + } else { + referentType = copyOf.referentType; + } + typeParameters = copyOf.typeParameters; +#ifndef GLSLANG_WEB + spirvType = copyOf.spirvType; +#endif + coopmat = copyOf.isCoopMat(); + } + + // Make complete copy of the whole type graph rooted at 'copyOf'. + void deepCopy(const TType& copyOf) + { + TMap copied; // to enable copying a type graph as a graph, not a tree + deepCopy(copyOf, copied); + } + + // Recursively make temporary + void makeTemporary() + { + getQualifier().makeTemporary(); + + if (isStruct()) + for (unsigned int i = 0; i < structure->size(); ++i) + (*structure)[i].type->makeTemporary(); + } + + TType* clone() const + { + TType *newType = new TType(); + newType->deepCopy(*this); + + return newType; + } + + void makeVector() { vector1 = true; } + + virtual void hideMember() { basicType = EbtVoid; vectorSize = 1; } + virtual bool hiddenMember() const { return basicType == EbtVoid; } + + virtual void setFieldName(const TString& n) { fieldName = NewPoolTString(n.c_str()); } + virtual const TString& getTypeName() const + { + assert(typeName); + return *typeName; + } + + virtual const TString& getFieldName() const + { + assert(fieldName); + return *fieldName; + } + TShaderInterface getShaderInterface() const + { + if (basicType != EbtBlock) + return EsiNone; + + switch (qualifier.storage) { + default: + return EsiNone; + case EvqVaryingIn: + return EsiInput; + case EvqVaryingOut: + return EsiOutput; + case EvqUniform: + case EvqBuffer: + return EsiUniform; + } + } + + virtual TBasicType getBasicType() const { return basicType; } + virtual const TSampler& getSampler() const { return sampler; } + virtual TSampler& getSampler() { return sampler; } + + virtual TQualifier& getQualifier() { return qualifier; } + virtual const TQualifier& getQualifier() const { return qualifier; } + + virtual int getVectorSize() const { return vectorSize; } // returns 1 for either scalar or vector of size 1, valid for both + virtual int getMatrixCols() const { return matrixCols; } + virtual int getMatrixRows() const { return matrixRows; } + virtual int getOuterArraySize() const { return arraySizes->getOuterSize(); } + virtual TIntermTyped* getOuterArrayNode() const { return arraySizes->getOuterNode(); } + virtual int getCumulativeArraySize() const { return arraySizes->getCumulativeSize(); } +#ifdef GLSLANG_WEB + bool isArrayOfArrays() const { return false; } +#else + bool isArrayOfArrays() const { return arraySizes != nullptr && arraySizes->getNumDims() > 1; } +#endif + virtual int getImplicitArraySize() const { return arraySizes->getImplicitSize(); } + virtual const TArraySizes* getArraySizes() const { return arraySizes; } + virtual TArraySizes* getArraySizes() { return arraySizes; } + virtual TType* getReferentType() const { return referentType; } + virtual const TArraySizes* getTypeParameters() const { return typeParameters; } + virtual TArraySizes* getTypeParameters() { return typeParameters; } + + virtual bool isScalar() const { return ! isVector() && ! isMatrix() && ! isStruct() && ! isArray(); } + virtual bool isScalarOrVec1() const { return isScalar() || vector1; } + virtual bool isScalarOrVector() const { return !isMatrix() && !isStruct() && !isArray(); } + virtual bool isVector() const { return vectorSize > 1 || vector1; } + virtual bool isMatrix() const { return matrixCols ? true : false; } + virtual bool isArray() const { return arraySizes != nullptr; } + virtual bool isSizedArray() const { return isArray() && arraySizes->isSized(); } + virtual bool isUnsizedArray() const { return isArray() && !arraySizes->isSized(); } + virtual bool isArrayVariablyIndexed() const { assert(isArray()); return arraySizes->isVariablyIndexed(); } + virtual void setArrayVariablyIndexed() { assert(isArray()); arraySizes->setVariablyIndexed(); } + virtual void updateImplicitArraySize(int size) { assert(isArray()); arraySizes->updateImplicitSize(size); } + virtual bool isStruct() const { return basicType == EbtStruct || basicType == EbtBlock; } + virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16; } + virtual bool isIntegerDomain() const + { + switch (basicType) { + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtAtomicUint: + return true; + default: + break; + } + return false; + } + virtual bool isOpaque() const { return basicType == EbtSampler +#ifndef GLSLANG_WEB + || basicType == EbtAtomicUint || basicType == EbtAccStruct || basicType == EbtRayQuery +#endif + ; } + virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; } + + // "Image" is a superset of "Subpass" + virtual bool isImage() const { return basicType == EbtSampler && getSampler().isImage(); } + virtual bool isSubpass() const { return basicType == EbtSampler && getSampler().isSubpass(); } + virtual bool isTexture() const { return basicType == EbtSampler && getSampler().isTexture(); } + // Check the block-name convention of creating a block without populating it's members: + virtual bool isUnusableName() const { return isStruct() && structure == nullptr; } + virtual bool isParameterized() const { return typeParameters != nullptr; } +#ifdef GLSLANG_WEB + bool isAtomic() const { return false; } + bool isCoopMat() const { return false; } + bool isReference() const { return false; } + bool isSpirvType() const { return false; } +#else + bool isAtomic() const { return basicType == EbtAtomicUint; } + bool isCoopMat() const { return coopmat; } + bool isReference() const { return getBasicType() == EbtReference; } + bool isSpirvType() const { return getBasicType() == EbtSpirvType; } +#endif + + // return true if this type contains any subtype which satisfies the given predicate. + template + bool contains(P predicate) const + { + if (predicate(this)) + return true; + + const auto hasa = [predicate](const TTypeLoc& tl) { return tl.type->contains(predicate); }; + + return isStruct() && std::any_of(structure->begin(), structure->end(), hasa); + } + + // Recursively checks if the type contains the given basic type + virtual bool containsBasicType(TBasicType checkType) const + { + return contains([checkType](const TType* t) { return t->basicType == checkType; } ); + } + + // Recursively check the structure for any arrays, needed for some error checks + virtual bool containsArray() const + { + return contains([](const TType* t) { return t->isArray(); } ); + } + + // Check the structure for any structures, needed for some error checks + virtual bool containsStructure() const + { + return contains([this](const TType* t) { return t != this && t->isStruct(); } ); + } + + // Recursively check the structure for any unsized arrays, needed for triggering a copyUp(). + virtual bool containsUnsizedArray() const + { + return contains([](const TType* t) { return t->isUnsizedArray(); } ); + } + + virtual bool containsOpaque() const + { + return contains([](const TType* t) { return t->isOpaque(); } ); + } + + // Recursively checks if the type contains a built-in variable + virtual bool containsBuiltIn() const + { + return contains([](const TType* t) { return t->isBuiltIn(); } ); + } + + virtual bool containsNonOpaque() const + { + const auto nonOpaque = [](const TType* t) { + switch (t->basicType) { + case EbtVoid: + case EbtFloat: + case EbtDouble: + case EbtFloat16: + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtBool: + case EbtReference: + return true; + default: + return false; + } + }; + + return contains(nonOpaque); + } + + virtual bool containsSpecializationSize() const + { + return contains([](const TType* t) { return t->isArray() && t->arraySizes->isOuterSpecialization(); } ); + } + +#ifdef GLSLANG_WEB + bool containsDouble() const { return false; } + bool contains16BitFloat() const { return false; } + bool contains64BitInt() const { return false; } + bool contains16BitInt() const { return false; } + bool contains8BitInt() const { return false; } + bool containsCoopMat() const { return false; } + bool containsReference() const { return false; } +#else + bool containsDouble() const + { + return containsBasicType(EbtDouble); + } + bool contains16BitFloat() const + { + return containsBasicType(EbtFloat16); + } + bool contains64BitInt() const + { + return containsBasicType(EbtInt64) || containsBasicType(EbtUint64); + } + bool contains16BitInt() const + { + return containsBasicType(EbtInt16) || containsBasicType(EbtUint16); + } + bool contains8BitInt() const + { + return containsBasicType(EbtInt8) || containsBasicType(EbtUint8); + } + bool containsCoopMat() const + { + return contains([](const TType* t) { return t->coopmat; } ); + } + bool containsReference() const + { + return containsBasicType(EbtReference); + } +#endif + + // Array editing methods. Array descriptors can be shared across + // type instances. This allows all uses of the same array + // to be updated at once. E.g., all nodes can be explicitly sized + // by tracking and correcting one implicit size. Or, all nodes + // can get the explicit size on a redeclaration that gives size. + // + // N.B.: Don't share with the shared symbol tables (symbols are + // marked as isReadOnly(). Such symbols with arrays that will be + // edited need to copyUp() on first use, so that + // A) the edits don't effect the shared symbol table, and + // B) the edits are shared across all users. + void updateArraySizes(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(arraySizes != nullptr); + assert(type.arraySizes != nullptr); + *arraySizes = *type.arraySizes; + } + void copyArraySizes(const TArraySizes& s) + { + // For setting a fresh new set of array sizes, not yet worrying about sharing. + arraySizes = new TArraySizes; + *arraySizes = s; + } + void transferArraySizes(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + arraySizes = s; + } + void clearArraySizes() + { + arraySizes = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyArrayInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (arraySizes == nullptr) + copyArraySizes(*s); + else + arraySizes->addInnerSizes(*s); + } + } + void changeOuterArraySize(int s) { arraySizes->changeOuterSize(s); } + + // Recursively make the implicit array size the explicit array size. + // Expicit arrays are compile-time or link-time sized, never run-time sized. + // Sometimes, policy calls for an array to be run-time sized even if it was + // never variably indexed: Don't turn a 'skipNonvariablyIndexed' array into + // an explicit array. + void adoptImplicitArraySizes(bool skipNonvariablyIndexed) + { + if (isUnsizedArray() && !(skipNonvariablyIndexed || isArrayVariablyIndexed())) + changeOuterArraySize(getImplicitArraySize()); + // For multi-dim per-view arrays, set unsized inner dimension size to 1 + if (qualifier.isPerView() && arraySizes && arraySizes->isInnerUnsized()) + arraySizes->clearInnerUnsized(); + if (isStruct() && structure->size() > 0) { + int lastMember = (int)structure->size() - 1; + for (int i = 0; i < lastMember; ++i) + (*structure)[i].type->adoptImplicitArraySizes(false); + // implement the "last member of an SSBO" policy + (*structure)[lastMember].type->adoptImplicitArraySizes(getQualifier().storage == EvqBuffer); + } + } + + + void updateTypeParameters(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(typeParameters != nullptr); + assert(type.typeParameters != nullptr); + *typeParameters = *type.typeParameters; + } + void copyTypeParameters(const TArraySizes& s) + { + // For setting a fresh new set of type parameters, not yet worrying about sharing. + typeParameters = new TArraySizes; + *typeParameters = s; + } + void transferTypeParameters(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + typeParameters = s; + } + void clearTypeParameters() + { + typeParameters = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyTypeParametersInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (typeParameters == nullptr) + copyTypeParameters(*s); + else + typeParameters->addInnerSizes(*s); + } + } + + const char* getBasicString() const + { + return TType::getBasicString(basicType); + } + + static const char* getBasicString(TBasicType t) + { + switch (t) { + case EbtFloat: return "float"; + case EbtInt: return "int"; + case EbtUint: return "uint"; + case EbtSampler: return "sampler/image"; +#ifndef GLSLANG_WEB + case EbtVoid: return "void"; + case EbtDouble: return "double"; + case EbtFloat16: return "float16_t"; + case EbtInt8: return "int8_t"; + case EbtUint8: return "uint8_t"; + case EbtInt16: return "int16_t"; + case EbtUint16: return "uint16_t"; + case EbtInt64: return "int64_t"; + case EbtUint64: return "uint64_t"; + case EbtBool: return "bool"; + case EbtAtomicUint: return "atomic_uint"; + case EbtStruct: return "structure"; + case EbtBlock: return "block"; + case EbtAccStruct: return "accelerationStructureNV"; + case EbtRayQuery: return "rayQueryEXT"; + case EbtReference: return "reference"; + case EbtString: return "string"; + case EbtSpirvType: return "spirv_type"; +#endif + default: return "unknown type"; + } + } + +#ifdef GLSLANG_WEB + TString getCompleteString() const { return ""; } + const char* getStorageQualifierString() const { return ""; } + const char* getBuiltInVariableString() const { return ""; } + const char* getPrecisionQualifierString() const { return ""; } + TString getBasicTypeString() const { return ""; } +#else + TString getCompleteString(bool syntactic = false, bool getQualifiers = true, bool getPrecision = true, + bool getType = true, TString name = "", TString structName = "") const + { + TString typeString; + + const auto appendStr = [&](const char* s) { typeString.append(s); }; + const auto appendUint = [&](unsigned int u) { typeString.append(std::to_string(u).c_str()); }; + const auto appendInt = [&](int i) { typeString.append(std::to_string(i).c_str()); }; + + if (getQualifiers) { + if (qualifier.hasSprivDecorate()) + appendStr(qualifier.getSpirvDecorateQualifierString().c_str()); + + if (qualifier.hasLayout()) { + // To reduce noise, skip this if the only layout is an xfb_buffer + // with no triggering xfb_offset. + TQualifier noXfbBuffer = qualifier; + noXfbBuffer.layoutXfbBuffer = TQualifier::layoutXfbBufferEnd; + if (noXfbBuffer.hasLayout()) { + appendStr("layout("); + if (qualifier.hasAnyLocation()) { + appendStr(" location="); + appendUint(qualifier.layoutLocation); + if (qualifier.hasComponent()) { + appendStr(" component="); + appendUint(qualifier.layoutComponent); + } + if (qualifier.hasIndex()) { + appendStr(" index="); + appendUint(qualifier.layoutIndex); + } + } + if (qualifier.hasSet()) { + appendStr(" set="); + appendUint(qualifier.layoutSet); + } + if (qualifier.hasBinding()) { + appendStr(" binding="); + appendUint(qualifier.layoutBinding); + } + if (qualifier.hasStream()) { + appendStr(" stream="); + appendUint(qualifier.layoutStream); + } + if (qualifier.hasMatrix()) { + appendStr(" "); + appendStr(TQualifier::getLayoutMatrixString(qualifier.layoutMatrix)); + } + if (qualifier.hasPacking()) { + appendStr(" "); + appendStr(TQualifier::getLayoutPackingString(qualifier.layoutPacking)); + } + if (qualifier.hasOffset()) { + appendStr(" offset="); + appendInt(qualifier.layoutOffset); + } + if (qualifier.hasAlign()) { + appendStr(" align="); + appendInt(qualifier.layoutAlign); + } + if (qualifier.hasFormat()) { + appendStr(" "); + appendStr(TQualifier::getLayoutFormatString(qualifier.layoutFormat)); + } + if (qualifier.hasXfbBuffer() && qualifier.hasXfbOffset()) { + appendStr(" xfb_buffer="); + appendUint(qualifier.layoutXfbBuffer); + } + if (qualifier.hasXfbOffset()) { + appendStr(" xfb_offset="); + appendUint(qualifier.layoutXfbOffset); + } + if (qualifier.hasXfbStride()) { + appendStr(" xfb_stride="); + appendUint(qualifier.layoutXfbStride); + } + if (qualifier.hasAttachment()) { + appendStr(" input_attachment_index="); + appendUint(qualifier.layoutAttachment); + } + if (qualifier.hasSpecConstantId()) { + appendStr(" constant_id="); + appendUint(qualifier.layoutSpecConstantId); + } + if (qualifier.layoutPushConstant) + appendStr(" push_constant"); + if (qualifier.layoutBufferReference) + appendStr(" buffer_reference"); + if (qualifier.hasBufferReferenceAlign()) { + appendStr(" buffer_reference_align="); + appendUint(1u << qualifier.layoutBufferReferenceAlign); + } + + if (qualifier.layoutPassthrough) + appendStr(" passthrough"); + if (qualifier.layoutViewportRelative) + appendStr(" layoutViewportRelative"); + if (qualifier.layoutSecondaryViewportRelativeOffset != -2048) { + appendStr(" layoutSecondaryViewportRelativeOffset="); + appendInt(qualifier.layoutSecondaryViewportRelativeOffset); + } + if (qualifier.layoutShaderRecord) + appendStr(" shaderRecordNV"); + + appendStr(")"); + } + } + + if (qualifier.invariant) + appendStr(" invariant"); + if (qualifier.noContraction) + appendStr(" noContraction"); + if (qualifier.centroid) + appendStr(" centroid"); + if (qualifier.smooth) + appendStr(" smooth"); + if (qualifier.flat) + appendStr(" flat"); + if (qualifier.nopersp) + appendStr(" noperspective"); + if (qualifier.explicitInterp) + appendStr(" __explicitInterpAMD"); + if (qualifier.pervertexNV) + appendStr(" pervertexNV"); + if (qualifier.pervertexEXT) + appendStr(" pervertexEXT"); + if (qualifier.perPrimitiveNV) + appendStr(" perprimitiveNV"); + if (qualifier.perViewNV) + appendStr(" perviewNV"); + if (qualifier.perTaskNV) + appendStr(" taskNV"); + if (qualifier.patch) + appendStr(" patch"); + if (qualifier.sample) + appendStr(" sample"); + if (qualifier.coherent) + appendStr(" coherent"); + if (qualifier.devicecoherent) + appendStr(" devicecoherent"); + if (qualifier.queuefamilycoherent) + appendStr(" queuefamilycoherent"); + if (qualifier.workgroupcoherent) + appendStr(" workgroupcoherent"); + if (qualifier.subgroupcoherent) + appendStr(" subgroupcoherent"); + if (qualifier.shadercallcoherent) + appendStr(" shadercallcoherent"); + if (qualifier.nonprivate) + appendStr(" nonprivate"); + if (qualifier.volatil) + appendStr(" volatile"); + if (qualifier.restrict) + appendStr(" restrict"); + if (qualifier.readonly) + appendStr(" readonly"); + if (qualifier.writeonly) + appendStr(" writeonly"); + if (qualifier.specConstant) + appendStr(" specialization-constant"); + if (qualifier.nonUniform) + appendStr(" nonuniform"); + if (qualifier.isNullInit()) + appendStr(" null-init"); + if (qualifier.isSpirvByReference()) + appendStr(" spirv_by_reference"); + if (qualifier.isSpirvLiteral()) + appendStr(" spirv_literal"); + appendStr(" "); + appendStr(getStorageQualifierString()); + } + if (getType) { + if (syntactic) { + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isVector() || isMatrix()) { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("d"); + break; + case EbtInt: + appendStr("i"); + break; + case EbtUint: + appendStr("u"); + break; + case EbtBool: + appendStr("b"); + break; + case EbtFloat: + default: + break; + } + if (isVector()) { + appendStr("vec"); + appendInt(vectorSize); + } else { + appendStr("mat"); + appendInt(matrixCols); + appendStr("x"); + appendInt(matrixRows); + } + } else if (isStruct() && structure) { + appendStr(" "); + appendStr(structName.c_str()); + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString(syntactic, getQualifiers, getPrecision, getType, (*structure)[i].type->getFieldName())); + hasHiddenMember = false; + } + } + appendStr("}"); + } else { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("double"); + break; + case EbtInt: + appendStr("int"); + break; + case EbtUint: + appendStr("uint"); + break; + case EbtBool: + appendStr("bool"); + break; + case EbtFloat: + appendStr("float"); + break; + default: + appendStr("unexpected"); + break; + } + } + if (name.length() > 0) { + appendStr(" "); + appendStr(name.c_str()); + } + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr("[]"); + else { + if (size == UnsizedArraySize) { + appendStr("["); + if (i == 0) + appendInt(arraySizes->getImplicitSize()); + appendStr("]"); + } + else { + appendStr("["); + appendInt(arraySizes->getDimSize(i)); + appendStr("]"); + } + } + } + } + } + else { + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr(" runtime-sized array of"); + else { + if (size == UnsizedArraySize) { + appendStr(" unsized"); + if (i == 0) { + appendStr(" "); + appendInt(arraySizes->getImplicitSize()); + } + } + else { + appendStr(" "); + appendInt(arraySizes->getDimSize(i)); + } + appendStr("-element array of"); + } + } + } + if (isParameterized()) { + appendStr("<"); + for (int i = 0; i < (int)typeParameters->getNumDims(); ++i) { + appendInt(typeParameters->getDimSize(i)); + if (i != (int)typeParameters->getNumDims() - 1) + appendStr(", "); + } + appendStr(">"); + } + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isMatrix()) { + appendStr(" "); + appendInt(matrixCols); + appendStr("X"); + appendInt(matrixRows); + appendStr(" matrix of"); + } + else if (isVector()) { + appendStr(" "); + appendInt(vectorSize); + appendStr("-component vector of"); + } + + appendStr(" "); + typeString.append(getBasicTypeString()); + + if (qualifier.builtIn != EbvNone) { + appendStr(" "); + appendStr(getBuiltInVariableString()); + } + + // Add struct/block members + if (isStruct() && structure) { + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString()); + typeString.append(" "); + typeString.append((*structure)[i].type->getFieldName()); + hasHiddenMember = false; + } + } + appendStr("}"); + } + } + } + + return typeString; + } + + TString getBasicTypeString() const + { + if (basicType == EbtSampler) + return sampler.getString(); + else + return getBasicString(); + } + + const char* getStorageQualifierString() const { return GetStorageQualifierString(qualifier.storage); } + const char* getBuiltInVariableString() const { return GetBuiltInVariableString(qualifier.builtIn); } + const char* getPrecisionQualifierString() const { return GetPrecisionQualifierString(qualifier.precision); } +#endif + + const TTypeList* getStruct() const { assert(isStruct()); return structure; } + void setStruct(TTypeList* s) { assert(isStruct()); structure = s; } + TTypeList* getWritableStruct() const { assert(isStruct()); return structure; } // This should only be used when known to not be sharing with other threads + void setBasicType(const TBasicType& t) { basicType = t; } + + int computeNumComponents() const + { + int components = 0; + + if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) { + for (TTypeList::const_iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) + components += ((*tl).type)->computeNumComponents(); + } else if (matrixCols) + components = matrixCols * matrixRows; + else + components = vectorSize; + + if (arraySizes != nullptr) { + components *= arraySizes->getCumulativeSize(); + } + + return components; + } + + // append this type's mangled name to the passed in 'name' + void appendMangledName(TString& name) const + { + buildMangledName(name); + name += ';' ; + } + + // These variables are inconsistently declared inside and outside of gl_PerVertex in glslang right now. + // They are declared inside of 'in gl_PerVertex', but sitting as standalone when they are 'out'puts. + bool isInconsistentGLPerVertexMember(const TString& name) const + { + if (name == "gl_SecondaryPositionNV" || + name == "gl_PositionPerViewNV") + return true; + return false; + } + + + // Do two structure types match? They could be declared independently, + // in different places, but still might satisfy the definition of matching. + // From the spec: + // + // "Structures must have the same name, sequence of type names, and + // type definitions, and member names to be considered the same type. + // This rule applies recursively for nested or embedded types." + // + // If type mismatch in structure, return member indices through lpidx and rpidx. + // If matching members for either block are exhausted, return -1 for exhausted + // block and the index of the unmatched member. Otherwise return {-1,-1}. + // + bool sameStructType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + // Initialize error to general type mismatch. + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + + // Most commonly, they are both nullptr, or the same pointer to the same actual structure + // TODO: Why return true when neither types are structures? + if ((!isStruct() && !right.isStruct()) || + (isStruct() && right.isStruct() && structure == right.structure)) + return true; + + if (!isStruct() || !right.isStruct()) + return false; + + // Structure names have to match + if (*typeName != *right.typeName) + return false; + + // There are inconsistencies with how gl_PerVertex is setup. For now ignore those as errors if they + // are known inconsistencies. + bool isGLPerVertex = *typeName == "gl_PerVertex"; + + // Both being nullptr was caught above, now they both have to be structures of the same number of elements + if (lpidx == nullptr && + (structure->size() != right.structure->size() && !isGLPerVertex)) { + return false; + } + + // Compare the names and types of all the members, which have to match + for (size_t li = 0, ri = 0; li < structure->size() || ri < right.structure->size(); ++li, ++ri) { + if (lpidx != nullptr) { + *lpidx = static_cast(li); + *rpidx = static_cast(ri); + } + if (li < structure->size() && ri < right.structure->size()) { + if ((*structure)[li].type->getFieldName() == (*right.structure)[ri].type->getFieldName()) { + if (*(*structure)[li].type != *(*right.structure)[ri].type) + return false; + } else { + // Skip hidden members + if ((*structure)[li].type->hiddenMember()) { + ri--; + continue; + } else if ((*right.structure)[ri].type->hiddenMember()) { + li--; + continue; + } + // If one of the members is something that's inconsistently declared, skip over it + // for now. + if (isGLPerVertex) { + if (isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + ri--; + continue; + } else if (isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + li--; + continue; + } + } else { + return false; + } + } + // If we get here, then there should only be inconsistently declared members left + } else if (li < structure->size()) { + if (!(*structure)[li].type->hiddenMember() && !isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + if (lpidx != nullptr) { + *rpidx = -1; + } + return false; + } + } else { + if (!(*right.structure)[ri].type->hiddenMember() && !isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + if (lpidx != nullptr) { + *lpidx = -1; + } + return false; + } + } + } + + return true; + } + + bool sameReferenceType(const TType& right) const + { + if (isReference() != right.isReference()) + return false; + + if (!isReference() && !right.isReference()) + return true; + + assert(referentType != nullptr); + assert(right.referentType != nullptr); + + if (referentType == right.referentType) + return true; + + return *referentType == *right.referentType; + } + + // See if two types match, in all aspects except arrayness + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return basicType == right.basicType && sameElementShape(right, lpidx, rpidx); + } + + // See if two type's arrayness match + bool sameArrayness(const TType& right) const + { + return ((arraySizes == nullptr && right.arraySizes == nullptr) || + (arraySizes != nullptr && right.arraySizes != nullptr && *arraySizes == *right.arraySizes)); + } + + // See if two type's arrayness match in everything except their outer dimension + bool sameInnerArrayness(const TType& right) const + { + assert(arraySizes != nullptr && right.arraySizes != nullptr); + return arraySizes->sameInnerArrayness(*right.arraySizes); + } + + // See if two type's parameters match + bool sameTypeParameters(const TType& right) const + { + return ((typeParameters == nullptr && right.typeParameters == nullptr) || + (typeParameters != nullptr && right.typeParameters != nullptr && *typeParameters == *right.typeParameters)); + } + +#ifndef GLSLANG_WEB + // See if two type's SPIR-V type contents match + bool sameSpirvType(const TType& right) const + { + return ((spirvType == nullptr && right.spirvType == nullptr) || + (spirvType != nullptr && right.spirvType != nullptr && *spirvType == *right.spirvType)); + } +#endif + + // See if two type's elements match in all ways except basic type + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementShape(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return ((basicType != EbtSampler && right.basicType != EbtSampler) || sampler == right.sampler) && + vectorSize == right.vectorSize && + matrixCols == right.matrixCols && + matrixRows == right.matrixRows && + vector1 == right.vector1 && + isCoopMat() == right.isCoopMat() && + sameStructType(right, lpidx, rpidx) && + sameReferenceType(right); + } + + // See if a cooperative matrix type parameter with unspecified parameters is + // an OK function parameter + bool coopMatParameterOK(const TType& right) const + { + return isCoopMat() && right.isCoopMat() && (getBasicType() == right.getBasicType()) && + typeParameters == nullptr && right.typeParameters != nullptr; + } + + bool sameCoopMatBaseType(const TType &right) const { + bool rv = coopmat && right.coopmat; + if (getBasicType() == EbtFloat || getBasicType() == EbtFloat16) + rv = right.getBasicType() == EbtFloat || right.getBasicType() == EbtFloat16; + else if (getBasicType() == EbtUint || getBasicType() == EbtUint8) + rv = right.getBasicType() == EbtUint || right.getBasicType() == EbtUint8; + else if (getBasicType() == EbtInt || getBasicType() == EbtInt8) + rv = right.getBasicType() == EbtInt || right.getBasicType() == EbtInt8; + else + rv = false; + return rv; + } + + + // See if two types match in all ways (just the actual type, not qualification) + bool operator==(const TType& right) const + { +#ifndef GLSLANG_WEB + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right) && sameSpirvType(right); +#else + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right); +#endif + } + + bool operator!=(const TType& right) const + { + return ! operator==(right); + } + + unsigned int getBufferReferenceAlignment() const + { +#ifndef GLSLANG_WEB + if (getBasicType() == glslang::EbtReference) { + return getReferentType()->getQualifier().hasBufferReferenceAlign() ? + (1u << getReferentType()->getQualifier().layoutBufferReferenceAlign) : 16u; + } +#endif + return 0; + } + +#ifndef GLSLANG_WEB + const TSpirvType& getSpirvType() const { assert(spirvType); return *spirvType; } +#endif + +protected: + // Require consumer to pick between deep copy and shallow copy. + TType(const TType& type); + TType& operator=(const TType& type); + + // Recursively copy a type graph, while preserving the graph-like + // quality. That is, don't make more than one copy of a structure that + // gets reused multiple times in the type graph. + void deepCopy(const TType& copyOf, TMap& copiedMap) + { + shallowCopy(copyOf); + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + if (copyOf.qualifier.spirvDecorate) { + qualifier.spirvDecorate = new TSpirvDecorate; + *qualifier.spirvDecorate = *copyOf.qualifier.spirvDecorate; + } + + if (copyOf.spirvType) { + spirvType = new TSpirvType; + *spirvType = *copyOf.spirvType; + } +#endif + + if (copyOf.arraySizes) { + arraySizes = new TArraySizes; + *arraySizes = *copyOf.arraySizes; + } + + if (copyOf.typeParameters) { + typeParameters = new TArraySizes; + *typeParameters = *copyOf.typeParameters; + } + + if (copyOf.isStruct() && copyOf.structure) { + auto prevCopy = copiedMap.find(copyOf.structure); + if (prevCopy != copiedMap.end()) + structure = prevCopy->second; + else { + structure = new TTypeList; + copiedMap[copyOf.structure] = structure; + for (unsigned int i = 0; i < copyOf.structure->size(); ++i) { + TTypeLoc typeLoc; + typeLoc.loc = (*copyOf.structure)[i].loc; + typeLoc.type = new TType(); + typeLoc.type->deepCopy(*(*copyOf.structure)[i].type, copiedMap); + structure->push_back(typeLoc); + } + } + } + + if (copyOf.fieldName) + fieldName = NewPoolTString(copyOf.fieldName->c_str()); + if (copyOf.typeName) + typeName = NewPoolTString(copyOf.typeName->c_str()); + } + + + void buildMangledName(TString&) const; + + TBasicType basicType : 8; + int vectorSize : 4; // 1 means either scalar or 1-component vector; see vector1 to disambiguate. + int matrixCols : 4; + int matrixRows : 4; + bool vector1 : 1; // Backward-compatible tracking of a 1-component vector distinguished from a scalar. + // GLSL 4.5 never has a 1-component vector; so this will always be false until such + // functionality is added. + // HLSL does have a 1-component vectors, so this will be true to disambiguate + // from a scalar. + bool coopmat : 1; + TQualifier qualifier; + + TArraySizes* arraySizes; // nullptr unless an array; can be shared across types + // A type can't be both a structure (EbtStruct/EbtBlock) and a reference (EbtReference), so + // conserve space by making these a union + union { + TTypeList* structure; // invalid unless this is a struct; can be shared across types + TType *referentType; // invalid unless this is an EbtReference + }; + TString *fieldName; // for structure field names + TString *typeName; // for structure type name + TSampler sampler; + TArraySizes* typeParameters;// nullptr unless a parameterized type; can be shared across types +#ifndef GLSLANG_WEB + TSpirvType* spirvType; // SPIR-V type defined by spirv_type directive +#endif +}; + +} // end namespace glslang + +#endif // _TYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/arrays.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/arrays.h new file mode 100644 index 0000000..7f047d9 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/arrays.h @@ -0,0 +1,341 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Implement types for tracking GLSL arrays, arrays of arrays, etc. +// + +#ifndef _ARRAYS_INCLUDED +#define _ARRAYS_INCLUDED + +#include + +namespace glslang { + +// This is used to mean there is no size yet (unsized), it is waiting to get a size from somewhere else. +const int UnsizedArraySize = 0; + +class TIntermTyped; +extern bool SameSpecializationConstants(TIntermTyped*, TIntermTyped*); + +// Specialization constants need both a nominal size and a node that defines +// the specialization constant being used. Array types are the same when their +// size and specialization constant nodes are the same. +struct TArraySize { + unsigned int size; + TIntermTyped* node; // nullptr means no specialization constant node + bool operator==(const TArraySize& rhs) const + { + if (size != rhs.size) + return false; + if (node == nullptr || rhs.node == nullptr) + return node == rhs.node; + + return SameSpecializationConstants(node, rhs.node); + } +}; + +// +// TSmallArrayVector is used as the container for the set of sizes in TArraySizes. +// It has generic-container semantics, while TArraySizes has array-of-array semantics. +// That is, TSmallArrayVector should be more focused on mechanism and TArraySizes on policy. +// +struct TSmallArrayVector { + // + // TODO: memory: TSmallArrayVector is intended to be smaller. + // Almost all arrays could be handled by two sizes each fitting + // in 16 bits, needing a real vector only in the cases where there + // are more than 3 sizes or a size needing more than 16 bits. + // + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSmallArrayVector() : sizes(nullptr) { } + virtual ~TSmallArrayVector() { dealloc(); } + + // For breaking into two non-shared copies, independently modifiable. + TSmallArrayVector& operator=(const TSmallArrayVector& from) + { + if (from.sizes == nullptr) + sizes = nullptr; + else { + alloc(); + *sizes = *from.sizes; + } + + return *this; + } + + int size() const + { + if (sizes == nullptr) + return 0; + return (int)sizes->size(); + } + + unsigned int frontSize() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().size; + } + + TIntermTyped* frontNode() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().node; + } + + void changeFront(unsigned int s) + { + assert(sizes != nullptr); + // this should only happen for implicitly sized arrays, not specialization constants + assert(sizes->front().node == nullptr); + sizes->front().size = s; + } + + void push_back(unsigned int e, TIntermTyped* n) + { + alloc(); + TArraySize pair = { e, n }; + sizes->push_back(pair); + } + + void push_back(const TSmallArrayVector& newDims) + { + alloc(); + sizes->insert(sizes->end(), newDims.sizes->begin(), newDims.sizes->end()); + } + + void pop_front() + { + assert(sizes != nullptr && sizes->size() > 0); + if (sizes->size() == 1) + dealloc(); + else + sizes->erase(sizes->begin()); + } + + // 'this' should currently not be holding anything, and copyNonFront + // will make it hold a copy of all but the first element of rhs. + // (This would be useful for making a type that is dereferenced by + // one dimension.) + void copyNonFront(const TSmallArrayVector& rhs) + { + assert(sizes == nullptr); + if (rhs.size() > 1) { + alloc(); + sizes->insert(sizes->begin(), rhs.sizes->begin() + 1, rhs.sizes->end()); + } + } + + unsigned int getDimSize(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].size; + } + + void setDimSize(int i, unsigned int size) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + assert((*sizes)[i].node == nullptr); + (*sizes)[i].size = size; + } + + TIntermTyped* getDimNode(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].node; + } + + bool operator==(const TSmallArrayVector& rhs) const + { + if (sizes == nullptr && rhs.sizes == nullptr) + return true; + if (sizes == nullptr || rhs.sizes == nullptr) + return false; + return *sizes == *rhs.sizes; + } + bool operator!=(const TSmallArrayVector& rhs) const { return ! operator==(rhs); } + +protected: + TSmallArrayVector(const TSmallArrayVector&); + + void alloc() + { + if (sizes == nullptr) + sizes = new TVector; + } + void dealloc() + { + delete sizes; + sizes = nullptr; + } + + TVector* sizes; // will either hold such a pointer, or in the future, hold the two array sizes +}; + +// +// Represent an array, or array of arrays, to arbitrary depth. This is not +// done through a hierarchy of types in a type tree, rather all contiguous arrayness +// in the type hierarchy is localized into this single cumulative object. +// +// The arrayness in TTtype is a pointer, so that it can be non-allocated and zero +// for the vast majority of types that are non-array types. +// +// Order Policy: these are all identical: +// - left to right order within a contiguous set of ...[..][..][..]... in the source language +// - index order 0, 1, 2, ... within the 'sizes' member below +// - outer-most to inner-most +// +struct TArraySizes { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TArraySizes() : implicitArraySize(1), variablyIndexed(false) { } + + // For breaking into two non-shared copies, independently modifiable. + TArraySizes& operator=(const TArraySizes& from) + { + implicitArraySize = from.implicitArraySize; + variablyIndexed = from.variablyIndexed; + sizes = from.sizes; + + return *this; + } + + // translate from array-of-array semantics to container semantics + int getNumDims() const { return sizes.size(); } + int getDimSize(int dim) const { return sizes.getDimSize(dim); } + TIntermTyped* getDimNode(int dim) const { return sizes.getDimNode(dim); } + void setDimSize(int dim, int size) { sizes.setDimSize(dim, size); } + int getOuterSize() const { return sizes.frontSize(); } + TIntermTyped* getOuterNode() const { return sizes.frontNode(); } + int getCumulativeSize() const + { + int size = 1; + for (int d = 0; d < sizes.size(); ++d) { + // this only makes sense in paths that have a known array size + assert(sizes.getDimSize(d) != UnsizedArraySize); + size *= sizes.getDimSize(d); + } + return size; + } + void addInnerSize() { addInnerSize((unsigned)UnsizedArraySize); } + void addInnerSize(int s) { addInnerSize((unsigned)s, nullptr); } + void addInnerSize(int s, TIntermTyped* n) { sizes.push_back((unsigned)s, n); } + void addInnerSize(TArraySize pair) { + sizes.push_back(pair.size, pair.node); + } + void addInnerSizes(const TArraySizes& s) { sizes.push_back(s.sizes); } + void changeOuterSize(int s) { sizes.changeFront((unsigned)s); } + int getImplicitSize() const { return implicitArraySize; } + void updateImplicitSize(int s) { implicitArraySize = std::max(implicitArraySize, s); } + bool isInnerUnsized() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + return true; + } + + return false; + } + bool clearInnerUnsized() + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + setDimSize(d, 1); + } + + return false; + } + bool isInnerSpecialization() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimNode(d) != nullptr) + return true; + } + + return false; + } + bool isOuterSpecialization() + { + return sizes.getDimNode(0) != nullptr; + } + + bool hasUnsized() const { return getOuterSize() == UnsizedArraySize || isInnerUnsized(); } + bool isSized() const { return getOuterSize() != UnsizedArraySize; } + void dereference() { sizes.pop_front(); } + void copyDereferenced(const TArraySizes& rhs) + { + assert(sizes.size() == 0); + if (rhs.sizes.size() > 1) + sizes.copyNonFront(rhs.sizes); + } + + bool sameInnerArrayness(const TArraySizes& rhs) const + { + if (sizes.size() != rhs.sizes.size()) + return false; + + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) != rhs.sizes.getDimSize(d) || + sizes.getDimNode(d) != rhs.sizes.getDimNode(d)) + return false; + } + + return true; + } + + void setVariablyIndexed() { variablyIndexed = true; } + bool isVariablyIndexed() const { return variablyIndexed; } + + bool operator==(const TArraySizes& rhs) const { return sizes == rhs.sizes; } + bool operator!=(const TArraySizes& rhs) const { return sizes != rhs.sizes; } + +protected: + TSmallArrayVector sizes; + + TArraySizes(const TArraySizes&); + + // For tracking maximum referenced compile-time constant index. + // Applies only to the outer-most dimension. Potentially becomes + // the implicit size of the array, if not variably indexed and + // otherwise legal. + int implicitArraySize; + bool variablyIndexed; // true if array is indexed with a non compile-time constant +}; + +} // end namespace glslang + +#endif // _ARRAYS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/glslang_c_interface.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/glslang_c_interface.h new file mode 100644 index 0000000..f540f26 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/glslang_c_interface.h @@ -0,0 +1,279 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef GLSLANG_C_IFACE_H_INCLUDED +#define GLSLANG_C_IFACE_H_INCLUDED + +#include +#include + +#include "glslang_c_shader_types.h" + +typedef struct glslang_shader_s glslang_shader_t; +typedef struct glslang_program_s glslang_program_t; + +/* TLimits counterpart */ +typedef struct glslang_limits_s { + bool non_inductive_for_loops; + bool while_loops; + bool do_while_loops; + bool general_uniform_indexing; + bool general_attribute_matrix_vector_indexing; + bool general_varying_indexing; + bool general_sampler_indexing; + bool general_variable_indexing; + bool general_constant_matrix_vector_indexing; +} glslang_limits_t; + +/* TBuiltInResource counterpart */ +typedef struct glslang_resource_s { + int max_lights; + int max_clip_planes; + int max_texture_units; + int max_texture_coords; + int max_vertex_attribs; + int max_vertex_uniform_components; + int max_varying_floats; + int max_vertex_texture_image_units; + int max_combined_texture_image_units; + int max_texture_image_units; + int max_fragment_uniform_components; + int max_draw_buffers; + int max_vertex_uniform_vectors; + int max_varying_vectors; + int max_fragment_uniform_vectors; + int max_vertex_output_vectors; + int max_fragment_input_vectors; + int min_program_texel_offset; + int max_program_texel_offset; + int max_clip_distances; + int max_compute_work_group_count_x; + int max_compute_work_group_count_y; + int max_compute_work_group_count_z; + int max_compute_work_group_size_x; + int max_compute_work_group_size_y; + int max_compute_work_group_size_z; + int max_compute_uniform_components; + int max_compute_texture_image_units; + int max_compute_image_uniforms; + int max_compute_atomic_counters; + int max_compute_atomic_counter_buffers; + int max_varying_components; + int max_vertex_output_components; + int max_geometry_input_components; + int max_geometry_output_components; + int max_fragment_input_components; + int max_image_units; + int max_combined_image_units_and_fragment_outputs; + int max_combined_shader_output_resources; + int max_image_samples; + int max_vertex_image_uniforms; + int max_tess_control_image_uniforms; + int max_tess_evaluation_image_uniforms; + int max_geometry_image_uniforms; + int max_fragment_image_uniforms; + int max_combined_image_uniforms; + int max_geometry_texture_image_units; + int max_geometry_output_vertices; + int max_geometry_total_output_components; + int max_geometry_uniform_components; + int max_geometry_varying_components; + int max_tess_control_input_components; + int max_tess_control_output_components; + int max_tess_control_texture_image_units; + int max_tess_control_uniform_components; + int max_tess_control_total_output_components; + int max_tess_evaluation_input_components; + int max_tess_evaluation_output_components; + int max_tess_evaluation_texture_image_units; + int max_tess_evaluation_uniform_components; + int max_tess_patch_components; + int max_patch_vertices; + int max_tess_gen_level; + int max_viewports; + int max_vertex_atomic_counters; + int max_tess_control_atomic_counters; + int max_tess_evaluation_atomic_counters; + int max_geometry_atomic_counters; + int max_fragment_atomic_counters; + int max_combined_atomic_counters; + int max_atomic_counter_bindings; + int max_vertex_atomic_counter_buffers; + int max_tess_control_atomic_counter_buffers; + int max_tess_evaluation_atomic_counter_buffers; + int max_geometry_atomic_counter_buffers; + int max_fragment_atomic_counter_buffers; + int max_combined_atomic_counter_buffers; + int max_atomic_counter_buffer_size; + int max_transform_feedback_buffers; + int max_transform_feedback_interleaved_components; + int max_cull_distances; + int max_combined_clip_and_cull_distances; + int max_samples; + int max_mesh_output_vertices_nv; + int max_mesh_output_primitives_nv; + int max_mesh_work_group_size_x_nv; + int max_mesh_work_group_size_y_nv; + int max_mesh_work_group_size_z_nv; + int max_task_work_group_size_x_nv; + int max_task_work_group_size_y_nv; + int max_task_work_group_size_z_nv; + int max_mesh_view_count_nv; + int max_mesh_output_vertices_ext; + int max_mesh_output_primitives_ext; + int max_mesh_work_group_size_x_ext; + int max_mesh_work_group_size_y_ext; + int max_mesh_work_group_size_z_ext; + int max_task_work_group_size_x_ext; + int max_task_work_group_size_y_ext; + int max_task_work_group_size_z_ext; + int max_mesh_view_count_ext; + int maxDualSourceDrawBuffersEXT; + + glslang_limits_t limits; +} glslang_resource_t; + +typedef struct glslang_input_s { + glslang_source_t language; + glslang_stage_t stage; + glslang_client_t client; + glslang_target_client_version_t client_version; + glslang_target_language_t target_language; + glslang_target_language_version_t target_language_version; + /** Shader source code */ + const char* code; + int default_version; + glslang_profile_t default_profile; + int force_default_version_and_profile; + int forward_compatible; + glslang_messages_t messages; + const glslang_resource_t* resource; +} glslang_input_t; + +/* Inclusion result structure allocated by C include_local/include_system callbacks */ +typedef struct glsl_include_result_s { + /* Header file name or NULL if inclusion failed */ + const char* header_name; + + /* Header contents or NULL */ + const char* header_data; + size_t header_length; + +} glsl_include_result_t; + +/* Callback for local file inclusion */ +typedef glsl_include_result_t* (*glsl_include_local_func)(void* ctx, const char* header_name, const char* includer_name, + size_t include_depth); + +/* Callback for system file inclusion */ +typedef glsl_include_result_t* (*glsl_include_system_func)(void* ctx, const char* header_name, + const char* includer_name, size_t include_depth); + +/* Callback for include result destruction */ +typedef int (*glsl_free_include_result_func)(void* ctx, glsl_include_result_t* result); + +/* Collection of callbacks for GLSL preprocessor */ +typedef struct glsl_include_callbacks_s { + glsl_include_system_func include_system; + glsl_include_local_func include_local; + glsl_free_include_result_func free_include_result; +} glsl_include_callbacks_t; + +/* SpvOptions counterpart */ +typedef struct glslang_spv_options_s { + bool generate_debug_info; + bool strip_debug_info; + bool disable_optimizer; + bool optimize_size; + bool disassemble; + bool validate; + bool emit_nonsemantic_shader_debug_info; + bool emit_nonsemantic_shader_debug_source; +} glslang_spv_options_t; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +GLSLANG_EXPORT int glslang_initialize_process(); +GLSLANG_EXPORT void glslang_finalize_process(); + +GLSLANG_EXPORT glslang_shader_t* glslang_shader_create(const glslang_input_t* input); +GLSLANG_EXPORT void glslang_shader_delete(glslang_shader_t* shader); +GLSLANG_EXPORT void glslang_shader_set_preamble(glslang_shader_t* shader, const char* s); +GLSLANG_EXPORT void glslang_shader_shift_binding(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base); +GLSLANG_EXPORT void glslang_shader_shift_binding_for_set(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base, unsigned int set); +GLSLANG_EXPORT void glslang_shader_set_options(glslang_shader_t* shader, int options); // glslang_shader_options_t +GLSLANG_EXPORT void glslang_shader_set_glsl_version(glslang_shader_t* shader, int version); +GLSLANG_EXPORT int glslang_shader_preprocess(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT int glslang_shader_parse(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT const char* glslang_shader_get_preprocessed_code(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_log(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_debug_log(glslang_shader_t* shader); + +GLSLANG_EXPORT glslang_program_t* glslang_program_create(); +GLSLANG_EXPORT void glslang_program_delete(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_add_shader(glslang_program_t* program, glslang_shader_t* shader); +GLSLANG_EXPORT int glslang_program_link(glslang_program_t* program, int messages); // glslang_messages_t +GLSLANG_EXPORT void glslang_program_add_source_text(glslang_program_t* program, glslang_stage_t stage, const char* text, size_t len); +GLSLANG_EXPORT void glslang_program_set_source_file(glslang_program_t* program, glslang_stage_t stage, const char* file); +GLSLANG_EXPORT int glslang_program_map_io(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_generate(glslang_program_t* program, glslang_stage_t stage); +GLSLANG_EXPORT void glslang_program_SPIRV_generate_with_options(glslang_program_t* program, glslang_stage_t stage, glslang_spv_options_t* spv_options); +GLSLANG_EXPORT size_t glslang_program_SPIRV_get_size(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_get(glslang_program_t* program, unsigned int*); +GLSLANG_EXPORT unsigned int* glslang_program_SPIRV_get_ptr(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_SPIRV_get_messages(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_log(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_debug_log(glslang_program_t* program); + +#ifdef __cplusplus +} +#endif + +#endif /* #ifdef GLSLANG_C_IFACE_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/glslang_c_shader_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/glslang_c_shader_types.h new file mode 100644 index 0000000..9bc2114 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/glslang_c_shader_types.h @@ -0,0 +1,227 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef C_SHADER_TYPES_H_INCLUDED +#define C_SHADER_TYPES_H_INCLUDED + +#define LAST_ELEMENT_MARKER(x) x + +/* EShLanguage counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX, + GLSLANG_STAGE_TESSCONTROL, + GLSLANG_STAGE_TESSEVALUATION, + GLSLANG_STAGE_GEOMETRY, + GLSLANG_STAGE_FRAGMENT, + GLSLANG_STAGE_COMPUTE, + GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_RAYGEN_NV = GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_INTERSECT_NV = GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_ANYHIT_NV = GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_CLOSESTHIT_NV = GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_MISS, + GLSLANG_STAGE_MISS_NV = GLSLANG_STAGE_MISS, + GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_CALLABLE_NV = GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_TASK, + GLSLANG_STAGE_TASK_NV = GLSLANG_STAGE_TASK, + GLSLANG_STAGE_MESH, + GLSLANG_STAGE_MESH_NV = GLSLANG_STAGE_MESH, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_COUNT), +} glslang_stage_t; // would be better as stage, but this is ancient now + +/* EShLanguageMask counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX_MASK = (1 << GLSLANG_STAGE_VERTEX), + GLSLANG_STAGE_TESSCONTROL_MASK = (1 << GLSLANG_STAGE_TESSCONTROL), + GLSLANG_STAGE_TESSEVALUATION_MASK = (1 << GLSLANG_STAGE_TESSEVALUATION), + GLSLANG_STAGE_GEOMETRY_MASK = (1 << GLSLANG_STAGE_GEOMETRY), + GLSLANG_STAGE_FRAGMENT_MASK = (1 << GLSLANG_STAGE_FRAGMENT), + GLSLANG_STAGE_COMPUTE_MASK = (1 << GLSLANG_STAGE_COMPUTE), + GLSLANG_STAGE_RAYGEN_MASK = (1 << GLSLANG_STAGE_RAYGEN), + GLSLANG_STAGE_RAYGEN_NV_MASK = GLSLANG_STAGE_RAYGEN_MASK, + GLSLANG_STAGE_INTERSECT_MASK = (1 << GLSLANG_STAGE_INTERSECT), + GLSLANG_STAGE_INTERSECT_NV_MASK = GLSLANG_STAGE_INTERSECT_MASK, + GLSLANG_STAGE_ANYHIT_MASK = (1 << GLSLANG_STAGE_ANYHIT), + GLSLANG_STAGE_ANYHIT_NV_MASK = GLSLANG_STAGE_ANYHIT_MASK, + GLSLANG_STAGE_CLOSESTHIT_MASK = (1 << GLSLANG_STAGE_CLOSESTHIT), + GLSLANG_STAGE_CLOSESTHIT_NV_MASK = GLSLANG_STAGE_CLOSESTHIT_MASK, + GLSLANG_STAGE_MISS_MASK = (1 << GLSLANG_STAGE_MISS), + GLSLANG_STAGE_MISS_NV_MASK = GLSLANG_STAGE_MISS_MASK, + GLSLANG_STAGE_CALLABLE_MASK = (1 << GLSLANG_STAGE_CALLABLE), + GLSLANG_STAGE_CALLABLE_NV_MASK = GLSLANG_STAGE_CALLABLE_MASK, + GLSLANG_STAGE_TASK_MASK = (1 << GLSLANG_STAGE_TASK), + GLSLANG_STAGE_TASK_NV_MASK = GLSLANG_STAGE_TASK_MASK, + GLSLANG_STAGE_MESH_MASK = (1 << GLSLANG_STAGE_MESH), + GLSLANG_STAGE_MESH_NV_MASK = GLSLANG_STAGE_MESH_MASK, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_MASK_COUNT), +} glslang_stage_mask_t; + +/* EShSource counterpart */ +typedef enum { + GLSLANG_SOURCE_NONE, + GLSLANG_SOURCE_GLSL, + GLSLANG_SOURCE_HLSL, + LAST_ELEMENT_MARKER(GLSLANG_SOURCE_COUNT), +} glslang_source_t; + +/* EShClient counterpart */ +typedef enum { + GLSLANG_CLIENT_NONE, + GLSLANG_CLIENT_VULKAN, + GLSLANG_CLIENT_OPENGL, + LAST_ELEMENT_MARKER(GLSLANG_CLIENT_COUNT), +} glslang_client_t; + +/* EShTargetLanguage counterpart */ +typedef enum { + GLSLANG_TARGET_NONE, + GLSLANG_TARGET_SPV, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_COUNT), +} glslang_target_language_t; + +/* SH_TARGET_ClientVersion counterpart */ +typedef enum { + GLSLANG_TARGET_VULKAN_1_0 = (1 << 22), + GLSLANG_TARGET_VULKAN_1_1 = (1 << 22) | (1 << 12), + GLSLANG_TARGET_VULKAN_1_2 = (1 << 22) | (2 << 12), + GLSLANG_TARGET_VULKAN_1_3 = (1 << 22) | (3 << 12), + GLSLANG_TARGET_OPENGL_450 = 450, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_CLIENT_VERSION_COUNT = 5), +} glslang_target_client_version_t; + +/* SH_TARGET_LanguageVersion counterpart */ +typedef enum { + GLSLANG_TARGET_SPV_1_0 = (1 << 16), + GLSLANG_TARGET_SPV_1_1 = (1 << 16) | (1 << 8), + GLSLANG_TARGET_SPV_1_2 = (1 << 16) | (2 << 8), + GLSLANG_TARGET_SPV_1_3 = (1 << 16) | (3 << 8), + GLSLANG_TARGET_SPV_1_4 = (1 << 16) | (4 << 8), + GLSLANG_TARGET_SPV_1_5 = (1 << 16) | (5 << 8), + GLSLANG_TARGET_SPV_1_6 = (1 << 16) | (6 << 8), + LAST_ELEMENT_MARKER(GLSLANG_TARGET_LANGUAGE_VERSION_COUNT = 7), +} glslang_target_language_version_t; + +/* EShExecutable counterpart */ +typedef enum { GLSLANG_EX_VERTEX_FRAGMENT, GLSLANG_EX_FRAGMENT } glslang_executable_t; + +// EShOptimizationLevel counterpart +// This enum is not used in the current C interface, but could be added at a later date. +// GLSLANG_OPT_NONE is the current default. +typedef enum { + GLSLANG_OPT_NO_GENERATION, + GLSLANG_OPT_NONE, + GLSLANG_OPT_SIMPLE, + GLSLANG_OPT_FULL, + LAST_ELEMENT_MARKER(GLSLANG_OPT_LEVEL_COUNT), +} glslang_optimization_level_t; + +/* EShTextureSamplerTransformMode counterpart */ +typedef enum { + GLSLANG_TEX_SAMP_TRANS_KEEP, + GLSLANG_TEX_SAMP_TRANS_UPGRADE_TEXTURE_REMOVE_SAMPLER, + LAST_ELEMENT_MARKER(GLSLANG_TEX_SAMP_TRANS_COUNT), +} glslang_texture_sampler_transform_mode_t; + +/* EShMessages counterpart */ +typedef enum { + GLSLANG_MSG_DEFAULT_BIT = 0, + GLSLANG_MSG_RELAXED_ERRORS_BIT = (1 << 0), + GLSLANG_MSG_SUPPRESS_WARNINGS_BIT = (1 << 1), + GLSLANG_MSG_AST_BIT = (1 << 2), + GLSLANG_MSG_SPV_RULES_BIT = (1 << 3), + GLSLANG_MSG_VULKAN_RULES_BIT = (1 << 4), + GLSLANG_MSG_ONLY_PREPROCESSOR_BIT = (1 << 5), + GLSLANG_MSG_READ_HLSL_BIT = (1 << 6), + GLSLANG_MSG_CASCADING_ERRORS_BIT = (1 << 7), + GLSLANG_MSG_KEEP_UNCALLED_BIT = (1 << 8), + GLSLANG_MSG_HLSL_OFFSETS_BIT = (1 << 9), + GLSLANG_MSG_DEBUG_INFO_BIT = (1 << 10), + GLSLANG_MSG_HLSL_ENABLE_16BIT_TYPES_BIT = (1 << 11), + GLSLANG_MSG_HLSL_LEGALIZATION_BIT = (1 << 12), + GLSLANG_MSG_HLSL_DX9_COMPATIBLE_BIT = (1 << 13), + GLSLANG_MSG_BUILTIN_SYMBOL_TABLE_BIT = (1 << 14), + GLSLANG_MSG_ENHANCED = (1 << 15), + LAST_ELEMENT_MARKER(GLSLANG_MSG_COUNT), +} glslang_messages_t; + +/* EShReflectionOptions counterpart */ +typedef enum { + GLSLANG_REFLECTION_DEFAULT_BIT = 0, + GLSLANG_REFLECTION_STRICT_ARRAY_SUFFIX_BIT = (1 << 0), + GLSLANG_REFLECTION_BASIC_ARRAY_SUFFIX_BIT = (1 << 1), + GLSLANG_REFLECTION_INTERMEDIATE_IOO_BIT = (1 << 2), + GLSLANG_REFLECTION_SEPARATE_BUFFERS_BIT = (1 << 3), + GLSLANG_REFLECTION_ALL_BLOCK_VARIABLES_BIT = (1 << 4), + GLSLANG_REFLECTION_UNWRAP_IO_BLOCKS_BIT = (1 << 5), + GLSLANG_REFLECTION_ALL_IO_VARIABLES_BIT = (1 << 6), + GLSLANG_REFLECTION_SHARED_STD140_SSBO_BIT = (1 << 7), + GLSLANG_REFLECTION_SHARED_STD140_UBO_BIT = (1 << 8), + LAST_ELEMENT_MARKER(GLSLANG_REFLECTION_COUNT), +} glslang_reflection_options_t; + +/* EProfile counterpart (from Versions.h) */ +typedef enum { + GLSLANG_BAD_PROFILE = 0, + GLSLANG_NO_PROFILE = (1 << 0), + GLSLANG_CORE_PROFILE = (1 << 1), + GLSLANG_COMPATIBILITY_PROFILE = (1 << 2), + GLSLANG_ES_PROFILE = (1 << 3), + LAST_ELEMENT_MARKER(GLSLANG_PROFILE_COUNT), +} glslang_profile_t; + +/* Shader options */ +typedef enum { + GLSLANG_SHADER_DEFAULT_BIT = 0, + GLSLANG_SHADER_AUTO_MAP_BINDINGS = (1 << 0), + GLSLANG_SHADER_AUTO_MAP_LOCATIONS = (1 << 1), + GLSLANG_SHADER_VULKAN_RULES_RELAXED = (1 << 2), + LAST_ELEMENT_MARKER(GLSLANG_SHADER_COUNT), +} glslang_shader_options_t; + +/* TResourceType counterpart */ +typedef enum { + GLSLANG_RESOURCE_TYPE_SAMPLER, + GLSLANG_RESOURCE_TYPE_TEXTURE, + GLSLANG_RESOURCE_TYPE_IMAGE, + GLSLANG_RESOURCE_TYPE_UBO, + GLSLANG_RESOURCE_TYPE_SSBO, + GLSLANG_RESOURCE_TYPE_UAV, + LAST_ELEMENT_MARKER(GLSLANG_RESOURCE_TYPE_COUNT), +} glslang_resource_type_t; + +#undef LAST_ELEMENT_MARKER + +#endif diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/intermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/intermediate.h new file mode 100644 index 0000000..a024002 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Include/intermediate.h @@ -0,0 +1,1850 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Definition of the in-memory high-level intermediate representation +// of shaders. This is a tree that parser creates. +// +// Nodes in the tree are defined as a hierarchy of classes derived from +// TIntermNode. Each is a node in a tree. There is no preset branching factor; +// each node can have it's own type of list of children. +// + +#ifndef __INTERMEDIATE_H +#define __INTERMEDIATE_H + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' + #pragma warning(disable : 5026) // 'glslang::TIntermUnary': move constructor was implicitly defined as deleted +#endif + +#include "../Include/Common.h" +#include "../Include/Types.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + +class TIntermediate; + +// +// Operators used by the high-level (parse tree) representation. +// +enum TOperator { + EOpNull, // if in a node, should only mean a node is still being built + EOpSequence, // denotes a list of statements, or parameters, etc. + EOpScope, // Used by debugging to denote a scoped list of statements + EOpLinkerObjects, // for aggregate node of objects the linker may need, if not reference by the rest of the AST + EOpFunctionCall, + EOpFunction, // For function definition + EOpParameters, // an aggregate listing the parameters to a function +#ifndef GLSLANG_WEB + EOpSpirvInst, +#endif + + // + // Unary operators + // + + EOpNegative, + EOpLogicalNot, + EOpVectorLogicalNot, + EOpBitwiseNot, + + EOpPostIncrement, + EOpPostDecrement, + EOpPreIncrement, + EOpPreDecrement, + + EOpCopyObject, + + EOpDeclare, // Used by debugging to force declaration of variable in correct scope + + // (u)int* -> bool + EOpConvInt8ToBool, + EOpConvUint8ToBool, + EOpConvInt16ToBool, + EOpConvUint16ToBool, + EOpConvIntToBool, + EOpConvUintToBool, + EOpConvInt64ToBool, + EOpConvUint64ToBool, + + // float* -> bool + EOpConvFloat16ToBool, + EOpConvFloatToBool, + EOpConvDoubleToBool, + + // bool -> (u)int* + EOpConvBoolToInt8, + EOpConvBoolToUint8, + EOpConvBoolToInt16, + EOpConvBoolToUint16, + EOpConvBoolToInt, + EOpConvBoolToUint, + EOpConvBoolToInt64, + EOpConvBoolToUint64, + + // bool -> float* + EOpConvBoolToFloat16, + EOpConvBoolToFloat, + EOpConvBoolToDouble, + + // int8_t -> (u)int* + EOpConvInt8ToInt16, + EOpConvInt8ToInt, + EOpConvInt8ToInt64, + EOpConvInt8ToUint8, + EOpConvInt8ToUint16, + EOpConvInt8ToUint, + EOpConvInt8ToUint64, + + // uint8_t -> (u)int* + EOpConvUint8ToInt8, + EOpConvUint8ToInt16, + EOpConvUint8ToInt, + EOpConvUint8ToInt64, + EOpConvUint8ToUint16, + EOpConvUint8ToUint, + EOpConvUint8ToUint64, + + // int8_t -> float* + EOpConvInt8ToFloat16, + EOpConvInt8ToFloat, + EOpConvInt8ToDouble, + + // uint8_t -> float* + EOpConvUint8ToFloat16, + EOpConvUint8ToFloat, + EOpConvUint8ToDouble, + + // int16_t -> (u)int* + EOpConvInt16ToInt8, + EOpConvInt16ToInt, + EOpConvInt16ToInt64, + EOpConvInt16ToUint8, + EOpConvInt16ToUint16, + EOpConvInt16ToUint, + EOpConvInt16ToUint64, + + // uint16_t -> (u)int* + EOpConvUint16ToInt8, + EOpConvUint16ToInt16, + EOpConvUint16ToInt, + EOpConvUint16ToInt64, + EOpConvUint16ToUint8, + EOpConvUint16ToUint, + EOpConvUint16ToUint64, + + // int16_t -> float* + EOpConvInt16ToFloat16, + EOpConvInt16ToFloat, + EOpConvInt16ToDouble, + + // uint16_t -> float* + EOpConvUint16ToFloat16, + EOpConvUint16ToFloat, + EOpConvUint16ToDouble, + + // int32_t -> (u)int* + EOpConvIntToInt8, + EOpConvIntToInt16, + EOpConvIntToInt64, + EOpConvIntToUint8, + EOpConvIntToUint16, + EOpConvIntToUint, + EOpConvIntToUint64, + + // uint32_t -> (u)int* + EOpConvUintToInt8, + EOpConvUintToInt16, + EOpConvUintToInt, + EOpConvUintToInt64, + EOpConvUintToUint8, + EOpConvUintToUint16, + EOpConvUintToUint64, + + // int32_t -> float* + EOpConvIntToFloat16, + EOpConvIntToFloat, + EOpConvIntToDouble, + + // uint32_t -> float* + EOpConvUintToFloat16, + EOpConvUintToFloat, + EOpConvUintToDouble, + + // int64_t -> (u)int* + EOpConvInt64ToInt8, + EOpConvInt64ToInt16, + EOpConvInt64ToInt, + EOpConvInt64ToUint8, + EOpConvInt64ToUint16, + EOpConvInt64ToUint, + EOpConvInt64ToUint64, + + // uint64_t -> (u)int* + EOpConvUint64ToInt8, + EOpConvUint64ToInt16, + EOpConvUint64ToInt, + EOpConvUint64ToInt64, + EOpConvUint64ToUint8, + EOpConvUint64ToUint16, + EOpConvUint64ToUint, + + // int64_t -> float* + EOpConvInt64ToFloat16, + EOpConvInt64ToFloat, + EOpConvInt64ToDouble, + + // uint64_t -> float* + EOpConvUint64ToFloat16, + EOpConvUint64ToFloat, + EOpConvUint64ToDouble, + + // float16_t -> (u)int* + EOpConvFloat16ToInt8, + EOpConvFloat16ToInt16, + EOpConvFloat16ToInt, + EOpConvFloat16ToInt64, + EOpConvFloat16ToUint8, + EOpConvFloat16ToUint16, + EOpConvFloat16ToUint, + EOpConvFloat16ToUint64, + + // float16_t -> float* + EOpConvFloat16ToFloat, + EOpConvFloat16ToDouble, + + // float -> (u)int* + EOpConvFloatToInt8, + EOpConvFloatToInt16, + EOpConvFloatToInt, + EOpConvFloatToInt64, + EOpConvFloatToUint8, + EOpConvFloatToUint16, + EOpConvFloatToUint, + EOpConvFloatToUint64, + + // float -> float* + EOpConvFloatToFloat16, + EOpConvFloatToDouble, + + // float64 _t-> (u)int* + EOpConvDoubleToInt8, + EOpConvDoubleToInt16, + EOpConvDoubleToInt, + EOpConvDoubleToInt64, + EOpConvDoubleToUint8, + EOpConvDoubleToUint16, + EOpConvDoubleToUint, + EOpConvDoubleToUint64, + + // float64_t -> float* + EOpConvDoubleToFloat16, + EOpConvDoubleToFloat, + + // uint64_t <-> pointer + EOpConvUint64ToPtr, + EOpConvPtrToUint64, + + // uvec2 <-> pointer + EOpConvUvec2ToPtr, + EOpConvPtrToUvec2, + + // uint64_t -> accelerationStructureEXT + EOpConvUint64ToAccStruct, + + // uvec2 -> accelerationStructureEXT + EOpConvUvec2ToAccStruct, + + // + // binary operations + // + + EOpAdd, + EOpSub, + EOpMul, + EOpDiv, + EOpMod, + EOpRightShift, + EOpLeftShift, + EOpAnd, + EOpInclusiveOr, + EOpExclusiveOr, + EOpEqual, + EOpNotEqual, + EOpVectorEqual, + EOpVectorNotEqual, + EOpLessThan, + EOpGreaterThan, + EOpLessThanEqual, + EOpGreaterThanEqual, + EOpComma, + + EOpVectorTimesScalar, + EOpVectorTimesMatrix, + EOpMatrixTimesVector, + EOpMatrixTimesScalar, + + EOpLogicalOr, + EOpLogicalXor, + EOpLogicalAnd, + + EOpIndexDirect, + EOpIndexIndirect, + EOpIndexDirectStruct, + + EOpVectorSwizzle, + + EOpMethod, + EOpScoping, + + // + // Built-in functions mapped to operators + // + + EOpRadians, + EOpDegrees, + EOpSin, + EOpCos, + EOpTan, + EOpAsin, + EOpAcos, + EOpAtan, + EOpSinh, + EOpCosh, + EOpTanh, + EOpAsinh, + EOpAcosh, + EOpAtanh, + + EOpPow, + EOpExp, + EOpLog, + EOpExp2, + EOpLog2, + EOpSqrt, + EOpInverseSqrt, + + EOpAbs, + EOpSign, + EOpFloor, + EOpTrunc, + EOpRound, + EOpRoundEven, + EOpCeil, + EOpFract, + EOpModf, + EOpMin, + EOpMax, + EOpClamp, + EOpMix, + EOpStep, + EOpSmoothStep, + + EOpIsNan, + EOpIsInf, + + EOpFma, + + EOpFrexp, + EOpLdexp, + + EOpFloatBitsToInt, + EOpFloatBitsToUint, + EOpIntBitsToFloat, + EOpUintBitsToFloat, + EOpDoubleBitsToInt64, + EOpDoubleBitsToUint64, + EOpInt64BitsToDouble, + EOpUint64BitsToDouble, + EOpFloat16BitsToInt16, + EOpFloat16BitsToUint16, + EOpInt16BitsToFloat16, + EOpUint16BitsToFloat16, + EOpPackSnorm2x16, + EOpUnpackSnorm2x16, + EOpPackUnorm2x16, + EOpUnpackUnorm2x16, + EOpPackSnorm4x8, + EOpUnpackSnorm4x8, + EOpPackUnorm4x8, + EOpUnpackUnorm4x8, + EOpPackHalf2x16, + EOpUnpackHalf2x16, + EOpPackDouble2x32, + EOpUnpackDouble2x32, + EOpPackInt2x32, + EOpUnpackInt2x32, + EOpPackUint2x32, + EOpUnpackUint2x32, + EOpPackFloat2x16, + EOpUnpackFloat2x16, + EOpPackInt2x16, + EOpUnpackInt2x16, + EOpPackUint2x16, + EOpUnpackUint2x16, + EOpPackInt4x16, + EOpUnpackInt4x16, + EOpPackUint4x16, + EOpUnpackUint4x16, + EOpPack16, + EOpPack32, + EOpPack64, + EOpUnpack32, + EOpUnpack16, + EOpUnpack8, + + EOpLength, + EOpDistance, + EOpDot, + EOpCross, + EOpNormalize, + EOpFaceForward, + EOpReflect, + EOpRefract, + + EOpMin3, + EOpMax3, + EOpMid3, + + EOpDPdx, // Fragment only + EOpDPdy, // Fragment only + EOpFwidth, // Fragment only + EOpDPdxFine, // Fragment only + EOpDPdyFine, // Fragment only + EOpFwidthFine, // Fragment only + EOpDPdxCoarse, // Fragment only + EOpDPdyCoarse, // Fragment only + EOpFwidthCoarse, // Fragment only + + EOpInterpolateAtCentroid, // Fragment only + EOpInterpolateAtSample, // Fragment only + EOpInterpolateAtOffset, // Fragment only + EOpInterpolateAtVertex, + + EOpMatrixTimesMatrix, + EOpOuterProduct, + EOpDeterminant, + EOpMatrixInverse, + EOpTranspose, + + EOpFtransform, + + EOpNoise, + + EOpEmitVertex, // geometry only + EOpEndPrimitive, // geometry only + EOpEmitStreamVertex, // geometry only + EOpEndStreamPrimitive, // geometry only + + EOpBarrier, + EOpMemoryBarrier, + EOpMemoryBarrierAtomicCounter, + EOpMemoryBarrierBuffer, + EOpMemoryBarrierImage, + EOpMemoryBarrierShared, // compute only + EOpGroupMemoryBarrier, // compute only + + EOpBallot, + EOpReadInvocation, + EOpReadFirstInvocation, + + EOpAnyInvocation, + EOpAllInvocations, + EOpAllInvocationsEqual, + + EOpSubgroupGuardStart, + EOpSubgroupBarrier, + EOpSubgroupMemoryBarrier, + EOpSubgroupMemoryBarrierBuffer, + EOpSubgroupMemoryBarrierImage, + EOpSubgroupMemoryBarrierShared, // compute only + EOpSubgroupElect, + EOpSubgroupAll, + EOpSubgroupAny, + EOpSubgroupAllEqual, + EOpSubgroupBroadcast, + EOpSubgroupBroadcastFirst, + EOpSubgroupBallot, + EOpSubgroupInverseBallot, + EOpSubgroupBallotBitExtract, + EOpSubgroupBallotBitCount, + EOpSubgroupBallotInclusiveBitCount, + EOpSubgroupBallotExclusiveBitCount, + EOpSubgroupBallotFindLSB, + EOpSubgroupBallotFindMSB, + EOpSubgroupShuffle, + EOpSubgroupShuffleXor, + EOpSubgroupShuffleUp, + EOpSubgroupShuffleDown, + EOpSubgroupAdd, + EOpSubgroupMul, + EOpSubgroupMin, + EOpSubgroupMax, + EOpSubgroupAnd, + EOpSubgroupOr, + EOpSubgroupXor, + EOpSubgroupInclusiveAdd, + EOpSubgroupInclusiveMul, + EOpSubgroupInclusiveMin, + EOpSubgroupInclusiveMax, + EOpSubgroupInclusiveAnd, + EOpSubgroupInclusiveOr, + EOpSubgroupInclusiveXor, + EOpSubgroupExclusiveAdd, + EOpSubgroupExclusiveMul, + EOpSubgroupExclusiveMin, + EOpSubgroupExclusiveMax, + EOpSubgroupExclusiveAnd, + EOpSubgroupExclusiveOr, + EOpSubgroupExclusiveXor, + EOpSubgroupClusteredAdd, + EOpSubgroupClusteredMul, + EOpSubgroupClusteredMin, + EOpSubgroupClusteredMax, + EOpSubgroupClusteredAnd, + EOpSubgroupClusteredOr, + EOpSubgroupClusteredXor, + EOpSubgroupQuadBroadcast, + EOpSubgroupQuadSwapHorizontal, + EOpSubgroupQuadSwapVertical, + EOpSubgroupQuadSwapDiagonal, + + EOpSubgroupPartition, + EOpSubgroupPartitionedAdd, + EOpSubgroupPartitionedMul, + EOpSubgroupPartitionedMin, + EOpSubgroupPartitionedMax, + EOpSubgroupPartitionedAnd, + EOpSubgroupPartitionedOr, + EOpSubgroupPartitionedXor, + EOpSubgroupPartitionedInclusiveAdd, + EOpSubgroupPartitionedInclusiveMul, + EOpSubgroupPartitionedInclusiveMin, + EOpSubgroupPartitionedInclusiveMax, + EOpSubgroupPartitionedInclusiveAnd, + EOpSubgroupPartitionedInclusiveOr, + EOpSubgroupPartitionedInclusiveXor, + EOpSubgroupPartitionedExclusiveAdd, + EOpSubgroupPartitionedExclusiveMul, + EOpSubgroupPartitionedExclusiveMin, + EOpSubgroupPartitionedExclusiveMax, + EOpSubgroupPartitionedExclusiveAnd, + EOpSubgroupPartitionedExclusiveOr, + EOpSubgroupPartitionedExclusiveXor, + + EOpSubgroupGuardStop, + + EOpMinInvocations, + EOpMaxInvocations, + EOpAddInvocations, + EOpMinInvocationsNonUniform, + EOpMaxInvocationsNonUniform, + EOpAddInvocationsNonUniform, + EOpMinInvocationsInclusiveScan, + EOpMaxInvocationsInclusiveScan, + EOpAddInvocationsInclusiveScan, + EOpMinInvocationsInclusiveScanNonUniform, + EOpMaxInvocationsInclusiveScanNonUniform, + EOpAddInvocationsInclusiveScanNonUniform, + EOpMinInvocationsExclusiveScan, + EOpMaxInvocationsExclusiveScan, + EOpAddInvocationsExclusiveScan, + EOpMinInvocationsExclusiveScanNonUniform, + EOpMaxInvocationsExclusiveScanNonUniform, + EOpAddInvocationsExclusiveScanNonUniform, + EOpSwizzleInvocations, + EOpSwizzleInvocationsMasked, + EOpWriteInvocation, + EOpMbcnt, + + EOpCubeFaceIndex, + EOpCubeFaceCoord, + EOpTime, + + EOpAtomicAdd, + EOpAtomicSubtract, + EOpAtomicMin, + EOpAtomicMax, + EOpAtomicAnd, + EOpAtomicOr, + EOpAtomicXor, + EOpAtomicExchange, + EOpAtomicCompSwap, + EOpAtomicLoad, + EOpAtomicStore, + + EOpAtomicCounterIncrement, // results in pre-increment value + EOpAtomicCounterDecrement, // results in post-decrement value + EOpAtomicCounter, + EOpAtomicCounterAdd, + EOpAtomicCounterSubtract, + EOpAtomicCounterMin, + EOpAtomicCounterMax, + EOpAtomicCounterAnd, + EOpAtomicCounterOr, + EOpAtomicCounterXor, + EOpAtomicCounterExchange, + EOpAtomicCounterCompSwap, + + EOpAny, + EOpAll, + + EOpCooperativeMatrixLoad, + EOpCooperativeMatrixStore, + EOpCooperativeMatrixMulAdd, + + EOpBeginInvocationInterlock, // Fragment only + EOpEndInvocationInterlock, // Fragment only + + EOpIsHelperInvocation, + + EOpDebugPrintf, + + // + // Branch + // + + EOpKill, // Fragment only + EOpTerminateInvocation, // Fragment only + EOpDemote, // Fragment only + EOpTerminateRayKHR, // Any-hit only + EOpIgnoreIntersectionKHR, // Any-hit only + EOpReturn, + EOpBreak, + EOpContinue, + EOpCase, + EOpDefault, + + // + // Constructors + // + + EOpConstructGuardStart, + EOpConstructInt, // these first scalar forms also identify what implicit conversion is needed + EOpConstructUint, + EOpConstructInt8, + EOpConstructUint8, + EOpConstructInt16, + EOpConstructUint16, + EOpConstructInt64, + EOpConstructUint64, + EOpConstructBool, + EOpConstructFloat, + EOpConstructDouble, + // Keep vector and matrix constructors in a consistent relative order for + // TParseContext::constructBuiltIn, which converts between 8/16/32 bit + // vector constructors + EOpConstructVec2, + EOpConstructVec3, + EOpConstructVec4, + EOpConstructMat2x2, + EOpConstructMat2x3, + EOpConstructMat2x4, + EOpConstructMat3x2, + EOpConstructMat3x3, + EOpConstructMat3x4, + EOpConstructMat4x2, + EOpConstructMat4x3, + EOpConstructMat4x4, + EOpConstructDVec2, + EOpConstructDVec3, + EOpConstructDVec4, + EOpConstructBVec2, + EOpConstructBVec3, + EOpConstructBVec4, + EOpConstructI8Vec2, + EOpConstructI8Vec3, + EOpConstructI8Vec4, + EOpConstructU8Vec2, + EOpConstructU8Vec3, + EOpConstructU8Vec4, + EOpConstructI16Vec2, + EOpConstructI16Vec3, + EOpConstructI16Vec4, + EOpConstructU16Vec2, + EOpConstructU16Vec3, + EOpConstructU16Vec4, + EOpConstructIVec2, + EOpConstructIVec3, + EOpConstructIVec4, + EOpConstructUVec2, + EOpConstructUVec3, + EOpConstructUVec4, + EOpConstructI64Vec2, + EOpConstructI64Vec3, + EOpConstructI64Vec4, + EOpConstructU64Vec2, + EOpConstructU64Vec3, + EOpConstructU64Vec4, + EOpConstructDMat2x2, + EOpConstructDMat2x3, + EOpConstructDMat2x4, + EOpConstructDMat3x2, + EOpConstructDMat3x3, + EOpConstructDMat3x4, + EOpConstructDMat4x2, + EOpConstructDMat4x3, + EOpConstructDMat4x4, + EOpConstructIMat2x2, + EOpConstructIMat2x3, + EOpConstructIMat2x4, + EOpConstructIMat3x2, + EOpConstructIMat3x3, + EOpConstructIMat3x4, + EOpConstructIMat4x2, + EOpConstructIMat4x3, + EOpConstructIMat4x4, + EOpConstructUMat2x2, + EOpConstructUMat2x3, + EOpConstructUMat2x4, + EOpConstructUMat3x2, + EOpConstructUMat3x3, + EOpConstructUMat3x4, + EOpConstructUMat4x2, + EOpConstructUMat4x3, + EOpConstructUMat4x4, + EOpConstructBMat2x2, + EOpConstructBMat2x3, + EOpConstructBMat2x4, + EOpConstructBMat3x2, + EOpConstructBMat3x3, + EOpConstructBMat3x4, + EOpConstructBMat4x2, + EOpConstructBMat4x3, + EOpConstructBMat4x4, + EOpConstructFloat16, + EOpConstructF16Vec2, + EOpConstructF16Vec3, + EOpConstructF16Vec4, + EOpConstructF16Mat2x2, + EOpConstructF16Mat2x3, + EOpConstructF16Mat2x4, + EOpConstructF16Mat3x2, + EOpConstructF16Mat3x3, + EOpConstructF16Mat3x4, + EOpConstructF16Mat4x2, + EOpConstructF16Mat4x3, + EOpConstructF16Mat4x4, + EOpConstructStruct, + EOpConstructTextureSampler, + EOpConstructNonuniform, // expected to be transformed away, not present in final AST + EOpConstructReference, + EOpConstructCooperativeMatrix, + EOpConstructAccStruct, + EOpConstructGuardEnd, + + // + // moves + // + + EOpAssign, + EOpAddAssign, + EOpSubAssign, + EOpMulAssign, + EOpVectorTimesMatrixAssign, + EOpVectorTimesScalarAssign, + EOpMatrixTimesScalarAssign, + EOpMatrixTimesMatrixAssign, + EOpDivAssign, + EOpModAssign, + EOpAndAssign, + EOpInclusiveOrAssign, + EOpExclusiveOrAssign, + EOpLeftShiftAssign, + EOpRightShiftAssign, + + // + // Array operators + // + + // Can apply to arrays, vectors, or matrices. + // Can be decomposed to a constant at compile time, but this does not always happen, + // due to link-time effects. So, consumer can expect either a link-time sized or + // run-time sized array. + EOpArrayLength, + + // + // Image operations + // + + EOpImageGuardBegin, + + EOpImageQuerySize, + EOpImageQuerySamples, + EOpImageLoad, + EOpImageStore, + EOpImageLoadLod, + EOpImageStoreLod, + EOpImageAtomicAdd, + EOpImageAtomicMin, + EOpImageAtomicMax, + EOpImageAtomicAnd, + EOpImageAtomicOr, + EOpImageAtomicXor, + EOpImageAtomicExchange, + EOpImageAtomicCompSwap, + EOpImageAtomicLoad, + EOpImageAtomicStore, + + EOpSubpassLoad, + EOpSubpassLoadMS, + EOpSparseImageLoad, + EOpSparseImageLoadLod, + + EOpImageGuardEnd, + + // + // Texture operations + // + + EOpTextureGuardBegin, + + EOpTextureQuerySize, + EOpTextureQueryLod, + EOpTextureQueryLevels, + EOpTextureQuerySamples, + + EOpSamplingGuardBegin, + + EOpTexture, + EOpTextureProj, + EOpTextureLod, + EOpTextureOffset, + EOpTextureFetch, + EOpTextureFetchOffset, + EOpTextureProjOffset, + EOpTextureLodOffset, + EOpTextureProjLod, + EOpTextureProjLodOffset, + EOpTextureGrad, + EOpTextureGradOffset, + EOpTextureProjGrad, + EOpTextureProjGradOffset, + EOpTextureGather, + EOpTextureGatherOffset, + EOpTextureGatherOffsets, + EOpTextureClamp, + EOpTextureOffsetClamp, + EOpTextureGradClamp, + EOpTextureGradOffsetClamp, + EOpTextureGatherLod, + EOpTextureGatherLodOffset, + EOpTextureGatherLodOffsets, + EOpFragmentMaskFetch, + EOpFragmentFetch, + + EOpSparseTextureGuardBegin, + + EOpSparseTexture, + EOpSparseTextureLod, + EOpSparseTextureOffset, + EOpSparseTextureFetch, + EOpSparseTextureFetchOffset, + EOpSparseTextureLodOffset, + EOpSparseTextureGrad, + EOpSparseTextureGradOffset, + EOpSparseTextureGather, + EOpSparseTextureGatherOffset, + EOpSparseTextureGatherOffsets, + EOpSparseTexelsResident, + EOpSparseTextureClamp, + EOpSparseTextureOffsetClamp, + EOpSparseTextureGradClamp, + EOpSparseTextureGradOffsetClamp, + EOpSparseTextureGatherLod, + EOpSparseTextureGatherLodOffset, + EOpSparseTextureGatherLodOffsets, + + EOpSparseTextureGuardEnd, + + EOpImageFootprintGuardBegin, + EOpImageSampleFootprintNV, + EOpImageSampleFootprintClampNV, + EOpImageSampleFootprintLodNV, + EOpImageSampleFootprintGradNV, + EOpImageSampleFootprintGradClampNV, + EOpImageFootprintGuardEnd, + EOpSamplingGuardEnd, + EOpTextureGuardEnd, + + // + // Integer operations + // + + EOpAddCarry, + EOpSubBorrow, + EOpUMulExtended, + EOpIMulExtended, + EOpBitfieldExtract, + EOpBitfieldInsert, + EOpBitFieldReverse, + EOpBitCount, + EOpFindLSB, + EOpFindMSB, + + EOpCountLeadingZeros, + EOpCountTrailingZeros, + EOpAbsDifference, + EOpAddSaturate, + EOpSubSaturate, + EOpAverage, + EOpAverageRounded, + EOpMul32x16, + + EOpTraceNV, + EOpTraceRayMotionNV, + EOpTraceKHR, + EOpReportIntersection, + EOpIgnoreIntersectionNV, + EOpTerminateRayNV, + EOpExecuteCallableNV, + EOpExecuteCallableKHR, + EOpWritePackedPrimitiveIndices4x8NV, + EOpEmitMeshTasksEXT, + EOpSetMeshOutputsEXT, + + // + // GL_EXT_ray_query operations + // + + EOpRayQueryInitialize, + EOpRayQueryTerminate, + EOpRayQueryGenerateIntersection, + EOpRayQueryConfirmIntersection, + EOpRayQueryProceed, + EOpRayQueryGetIntersectionType, + EOpRayQueryGetRayTMin, + EOpRayQueryGetRayFlags, + EOpRayQueryGetIntersectionT, + EOpRayQueryGetIntersectionInstanceCustomIndex, + EOpRayQueryGetIntersectionInstanceId, + EOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffset, + EOpRayQueryGetIntersectionGeometryIndex, + EOpRayQueryGetIntersectionPrimitiveIndex, + EOpRayQueryGetIntersectionBarycentrics, + EOpRayQueryGetIntersectionFrontFace, + EOpRayQueryGetIntersectionCandidateAABBOpaque, + EOpRayQueryGetIntersectionObjectRayDirection, + EOpRayQueryGetIntersectionObjectRayOrigin, + EOpRayQueryGetWorldRayDirection, + EOpRayQueryGetWorldRayOrigin, + EOpRayQueryGetIntersectionObjectToWorld, + EOpRayQueryGetIntersectionWorldToObject, + + // + // HLSL operations + // + + EOpClip, // discard if input value < 0 + EOpIsFinite, + EOpLog10, // base 10 log + EOpRcp, // 1/x + EOpSaturate, // clamp from 0 to 1 + EOpSinCos, // sin and cos in out parameters + EOpGenMul, // mul(x,y) on any of mat/vec/scalars + EOpDst, // x = 1, y=src0.y * src1.y, z=src0.z, w=src1.w + EOpInterlockedAdd, // atomic ops, but uses [optional] out arg instead of return + EOpInterlockedAnd, // ... + EOpInterlockedCompareExchange, // ... + EOpInterlockedCompareStore, // ... + EOpInterlockedExchange, // ... + EOpInterlockedMax, // ... + EOpInterlockedMin, // ... + EOpInterlockedOr, // ... + EOpInterlockedXor, // ... + EOpAllMemoryBarrierWithGroupSync, // memory barriers without non-hlsl AST equivalents + EOpDeviceMemoryBarrier, // ... + EOpDeviceMemoryBarrierWithGroupSync, // ... + EOpWorkgroupMemoryBarrier, // ... + EOpWorkgroupMemoryBarrierWithGroupSync, // ... + EOpEvaluateAttributeSnapped, // InterpolateAtOffset with int position on 16x16 grid + EOpF32tof16, // HLSL conversion: half of a PackHalf2x16 + EOpF16tof32, // HLSL conversion: half of an UnpackHalf2x16 + EOpLit, // HLSL lighting coefficient vector + EOpTextureBias, // HLSL texture bias: will be lowered to EOpTexture + EOpAsDouble, // slightly different from EOpUint64BitsToDouble + EOpD3DCOLORtoUBYTE4, // convert and swizzle 4-component color to UBYTE4 range + + EOpMethodSample, // Texture object methods. These are translated to existing + EOpMethodSampleBias, // AST methods, and exist to represent HLSL semantics until that + EOpMethodSampleCmp, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodSampleCmpLevelZero, // ... + EOpMethodSampleGrad, // ... + EOpMethodSampleLevel, // ... + EOpMethodLoad, // ... + EOpMethodGetDimensions, // ... + EOpMethodGetSamplePosition, // ... + EOpMethodGather, // ... + EOpMethodCalculateLevelOfDetail, // ... + EOpMethodCalculateLevelOfDetailUnclamped, // ... + + // Load already defined above for textures + EOpMethodLoad2, // Structure buffer object methods. These are translated to existing + EOpMethodLoad3, // AST methods, and exist to represent HLSL semantics until that + EOpMethodLoad4, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodStore, // ... + EOpMethodStore2, // ... + EOpMethodStore3, // ... + EOpMethodStore4, // ... + EOpMethodIncrementCounter, // ... + EOpMethodDecrementCounter, // ... + // EOpMethodAppend is defined for geo shaders below + EOpMethodConsume, + + // SM5 texture methods + EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about + EOpMethodGatherGreen, // translation to existing AST opcodes. They exist temporarily + EOpMethodGatherBlue, // because HLSL arguments are slightly different. + EOpMethodGatherAlpha, // ... + EOpMethodGatherCmp, // ... + EOpMethodGatherCmpRed, // ... + EOpMethodGatherCmpGreen, // ... + EOpMethodGatherCmpBlue, // ... + EOpMethodGatherCmpAlpha, // ... + + // geometry methods + EOpMethodAppend, // Geometry shader methods + EOpMethodRestartStrip, // ... + + // matrix + EOpMatrixSwizzle, // select multiple matrix components (non-column) + + // SM6 wave ops + EOpWaveGetLaneCount, // Will decompose to gl_SubgroupSize. + EOpWaveGetLaneIndex, // Will decompose to gl_SubgroupInvocationID. + EOpWaveActiveCountBits, // Will decompose to subgroupBallotBitCount(subgroupBallot()). + EOpWavePrefixCountBits, // Will decompose to subgroupBallotInclusiveBitCount(subgroupBallot()). + + // Shader Clock Ops + EOpReadClockSubgroupKHR, + EOpReadClockDeviceKHR, +}; + +class TIntermTraverser; +class TIntermOperator; +class TIntermAggregate; +class TIntermUnary; +class TIntermBinary; +class TIntermConstantUnion; +class TIntermSelection; +class TIntermSwitch; +class TIntermBranch; +class TIntermTyped; +class TIntermMethod; +class TIntermSymbol; +class TIntermLoop; + +} // end namespace glslang + +// +// Base class for the tree nodes +// +// (Put outside the glslang namespace, as it's used as part of the external interface.) +// +class TIntermNode { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + + TIntermNode() { loc.init(); } + virtual const glslang::TSourceLoc& getLoc() const { return loc; } + virtual void setLoc(const glslang::TSourceLoc& l) { loc = l; } + virtual void traverse(glslang::TIntermTraverser*) = 0; + virtual glslang::TIntermTyped* getAsTyped() { return 0; } + virtual glslang::TIntermOperator* getAsOperator() { return 0; } + virtual glslang::TIntermConstantUnion* getAsConstantUnion() { return 0; } + virtual glslang::TIntermAggregate* getAsAggregate() { return 0; } + virtual glslang::TIntermUnary* getAsUnaryNode() { return 0; } + virtual glslang::TIntermBinary* getAsBinaryNode() { return 0; } + virtual glslang::TIntermSelection* getAsSelectionNode() { return 0; } + virtual glslang::TIntermSwitch* getAsSwitchNode() { return 0; } + virtual glslang::TIntermMethod* getAsMethodNode() { return 0; } + virtual glslang::TIntermSymbol* getAsSymbolNode() { return 0; } + virtual glslang::TIntermBranch* getAsBranchNode() { return 0; } + virtual glslang::TIntermLoop* getAsLoopNode() { return 0; } + + virtual const glslang::TIntermTyped* getAsTyped() const { return 0; } + virtual const glslang::TIntermOperator* getAsOperator() const { return 0; } + virtual const glslang::TIntermConstantUnion* getAsConstantUnion() const { return 0; } + virtual const glslang::TIntermAggregate* getAsAggregate() const { return 0; } + virtual const glslang::TIntermUnary* getAsUnaryNode() const { return 0; } + virtual const glslang::TIntermBinary* getAsBinaryNode() const { return 0; } + virtual const glslang::TIntermSelection* getAsSelectionNode() const { return 0; } + virtual const glslang::TIntermSwitch* getAsSwitchNode() const { return 0; } + virtual const glslang::TIntermMethod* getAsMethodNode() const { return 0; } + virtual const glslang::TIntermSymbol* getAsSymbolNode() const { return 0; } + virtual const glslang::TIntermBranch* getAsBranchNode() const { return 0; } + virtual const glslang::TIntermLoop* getAsLoopNode() const { return 0; } + virtual ~TIntermNode() { } + +protected: + TIntermNode(const TIntermNode&); + TIntermNode& operator=(const TIntermNode&); + glslang::TSourceLoc loc; +}; + +namespace glslang { + +// +// This is just to help yacc. +// +struct TIntermNodePair { + TIntermNode* node1; + TIntermNode* node2; +}; + +// +// Intermediate class for nodes that have a type. +// +class TIntermTyped : public TIntermNode { +public: + TIntermTyped(const TType& t) { type.shallowCopy(t); } + TIntermTyped(TBasicType basicType) { TType bt(basicType); type.shallowCopy(bt); } + virtual TIntermTyped* getAsTyped() { return this; } + virtual const TIntermTyped* getAsTyped() const { return this; } + virtual void setType(const TType& t) { type.shallowCopy(t); } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { return type; } + + virtual TBasicType getBasicType() const { return type.getBasicType(); } + virtual TQualifier& getQualifier() { return type.getQualifier(); } + virtual const TQualifier& getQualifier() const { return type.getQualifier(); } + virtual TArraySizes* getArraySizes() { return type.getArraySizes(); } + virtual const TArraySizes* getArraySizes() const { return type.getArraySizes(); } + virtual void propagatePrecision(TPrecisionQualifier); + virtual int getVectorSize() const { return type.getVectorSize(); } + virtual int getMatrixCols() const { return type.getMatrixCols(); } + virtual int getMatrixRows() const { return type.getMatrixRows(); } + virtual bool isMatrix() const { return type.isMatrix(); } + virtual bool isArray() const { return type.isArray(); } + virtual bool isVector() const { return type.isVector(); } + virtual bool isScalar() const { return type.isScalar(); } + virtual bool isStruct() const { return type.isStruct(); } + virtual bool isFloatingDomain() const { return type.isFloatingDomain(); } + virtual bool isIntegerDomain() const { return type.isIntegerDomain(); } + bool isAtomic() const { return type.isAtomic(); } + bool isReference() const { return type.isReference(); } + TString getCompleteString(bool enhanced = false) const { return type.getCompleteString(enhanced); } + +protected: + TIntermTyped& operator=(const TIntermTyped&); + TType type; +}; + +// +// Handle for, do-while, and while loops. +// +class TIntermLoop : public TIntermNode { +public: + TIntermLoop(TIntermNode* aBody, TIntermTyped* aTest, TIntermTyped* aTerminal, bool testFirst) : + body(aBody), + test(aTest), + terminal(aTerminal), + first(testFirst), + unroll(false), + dontUnroll(false), + dependency(0), + minIterations(0), + maxIterations(iterationsInfinite), + iterationMultiple(1), + peelCount(0), + partialCount(0) + { } + + virtual TIntermLoop* getAsLoopNode() { return this; } + virtual const TIntermLoop* getAsLoopNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TIntermNode* getBody() const { return body; } + TIntermTyped* getTest() const { return test; } + TIntermTyped* getTerminal() const { return terminal; } + bool testFirst() const { return first; } + + void setUnroll() { unroll = true; } + void setDontUnroll() { + dontUnroll = true; + peelCount = 0; + partialCount = 0; + } + bool getUnroll() const { return unroll; } + bool getDontUnroll() const { return dontUnroll; } + + static const unsigned int dependencyInfinite = 0xFFFFFFFF; + static const unsigned int iterationsInfinite = 0xFFFFFFFF; + void setLoopDependency(int d) { dependency = d; } + int getLoopDependency() const { return dependency; } + + void setMinIterations(unsigned int v) { minIterations = v; } + unsigned int getMinIterations() const { return minIterations; } + void setMaxIterations(unsigned int v) { maxIterations = v; } + unsigned int getMaxIterations() const { return maxIterations; } + void setIterationMultiple(unsigned int v) { iterationMultiple = v; } + unsigned int getIterationMultiple() const { return iterationMultiple; } + void setPeelCount(unsigned int v) { + peelCount = v; + dontUnroll = false; + } + unsigned int getPeelCount() const { return peelCount; } + void setPartialCount(unsigned int v) { + partialCount = v; + dontUnroll = false; + } + unsigned int getPartialCount() const { return partialCount; } + +protected: + TIntermNode* body; // code to loop over + TIntermTyped* test; // exit condition associated with loop, could be 0 for 'for' loops + TIntermTyped* terminal; // exists for for-loops + bool first; // true for while and for, not for do-while + bool unroll; // true if unroll requested + bool dontUnroll; // true if request to not unroll + unsigned int dependency; // loop dependency hint; 0 means not set or unknown + unsigned int minIterations; // as per the SPIR-V specification + unsigned int maxIterations; // as per the SPIR-V specification + unsigned int iterationMultiple; // as per the SPIR-V specification + unsigned int peelCount; // as per the SPIR-V specification + unsigned int partialCount; // as per the SPIR-V specification +}; + +// +// Handle case, break, continue, return, and kill. +// +class TIntermBranch : public TIntermNode { +public: + TIntermBranch(TOperator op, TIntermTyped* e) : + flowOp(op), + expression(e) { } + virtual TIntermBranch* getAsBranchNode() { return this; } + virtual const TIntermBranch* getAsBranchNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TOperator getFlowOp() const { return flowOp; } + TIntermTyped* getExpression() const { return expression; } + void setExpression(TIntermTyped* pExpression) { expression = pExpression; } + void updatePrecision(TPrecisionQualifier parentPrecision); +protected: + TOperator flowOp; + TIntermTyped* expression; +}; + +// +// Represent method names before seeing their calling signature +// or resolving them to operations. Just an expression as the base object +// and a textural name. +// +class TIntermMethod : public TIntermTyped { +public: + TIntermMethod(TIntermTyped* o, const TType& t, const TString& m) : TIntermTyped(t), object(o), method(m) { } + virtual TIntermMethod* getAsMethodNode() { return this; } + virtual const TIntermMethod* getAsMethodNode() const { return this; } + virtual const TString& getMethodName() const { return method; } + virtual TIntermTyped* getObject() const { return object; } + virtual void traverse(TIntermTraverser*); +protected: + TIntermTyped* object; + TString method; +}; + +// +// Nodes that correspond to symbols or constants in the source code. +// +class TIntermSymbol : public TIntermTyped { +public: + // if symbol is initialized as symbol(sym), the memory comes from the pool allocator of sym. If sym comes from + // per process threadPoolAllocator, then it causes increased memory usage per compile + // it is essential to use "symbol = sym" to assign to symbol + TIntermSymbol(long long i, const TString& n, const TType& t) + : TIntermTyped(t), id(i), +#ifndef GLSLANG_WEB + flattenSubset(-1), +#endif + constSubtree(nullptr) + { name = n; } + virtual long long getId() const { return id; } + virtual void changeId(long long i) { id = i; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual TIntermSymbol* getAsSymbolNode() { return this; } + virtual const TIntermSymbol* getAsSymbolNode() const { return this; } + void setConstArray(const TConstUnionArray& c) { constArray = c; } + const TConstUnionArray& getConstArray() const { return constArray; } + void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + TIntermTyped* getConstSubtree() const { return constSubtree; } +#ifndef GLSLANG_WEB + void setFlattenSubset(int subset) { flattenSubset = subset; } + virtual const TString& getAccessName() const; + + int getFlattenSubset() const { return flattenSubset; } // -1 means full object +#endif + + // This is meant for cases where a node has already been constructed, and + // later on, it becomes necessary to switch to a different symbol. + virtual void switchId(long long newId) { id = newId; } + +protected: + long long id; // the unique id of the symbol this node represents +#ifndef GLSLANG_WEB + int flattenSubset; // how deeply the flattened object rooted at id has been dereferenced +#endif + TString name; // the name of the symbol this node represents + TConstUnionArray constArray; // if the symbol is a front-end compile-time constant, this is its value + TIntermTyped* constSubtree; +}; + +class TIntermConstantUnion : public TIntermTyped { +public: + TIntermConstantUnion(const TConstUnionArray& ua, const TType& t) : TIntermTyped(t), constArray(ua), literal(false) { } + const TConstUnionArray& getConstArray() const { return constArray; } + virtual TIntermConstantUnion* getAsConstantUnion() { return this; } + virtual const TIntermConstantUnion* getAsConstantUnion() const { return this; } + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* fold(TOperator, const TIntermTyped*) const; + virtual TIntermTyped* fold(TOperator, const TType&) const; + void setLiteral() { literal = true; } + void setExpression() { literal = false; } + bool isLiteral() const { return literal; } + +protected: + TIntermConstantUnion& operator=(const TIntermConstantUnion&); + + const TConstUnionArray constArray; + bool literal; // true if node represents a literal in the source code +}; + +// Represent the independent aspects of a texturing TOperator +struct TCrackedTextureOp { + bool query; + bool proj; + bool lod; + bool fetch; + bool offset; + bool offsets; + bool gather; + bool grad; + bool subpass; + bool lodClamp; + bool fragMask; +}; + +// +// Intermediate class for node types that hold operators. +// +class TIntermOperator : public TIntermTyped { +public: + virtual TIntermOperator* getAsOperator() { return this; } + virtual const TIntermOperator* getAsOperator() const { return this; } + TOperator getOp() const { return op; } + void setOp(TOperator newOp) { op = newOp; } + bool modifiesState() const; + bool isConstructor() const; + bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; } + bool isSampling() const { return op > EOpSamplingGuardBegin && op < EOpSamplingGuardEnd; } +#ifdef GLSLANG_WEB + bool isImage() const { return false; } + bool isSparseTexture() const { return false; } + bool isImageFootprint() const { return false; } + bool isSparseImage() const { return false; } + bool isSubgroup() const { return false; } +#else + bool isImage() const { return op > EOpImageGuardBegin && op < EOpImageGuardEnd; } + bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; } + bool isImageFootprint() const { return op > EOpImageFootprintGuardBegin && op < EOpImageFootprintGuardEnd; } + bool isSparseImage() const { return op == EOpSparseImageLoad; } + bool isSubgroup() const { return op > EOpSubgroupGuardStart && op < EOpSubgroupGuardStop; } +#endif + + void setOperationPrecision(TPrecisionQualifier p) { operationPrecision = p; } + TPrecisionQualifier getOperationPrecision() const { return operationPrecision != EpqNone ? + operationPrecision : + type.getQualifier().precision; } + TString getCompleteString() const + { + TString cs = type.getCompleteString(); + if (getOperationPrecision() != type.getQualifier().precision) { + cs += ", operation at "; + cs += GetPrecisionQualifierString(getOperationPrecision()); + } + + return cs; + } + + // Crack the op into the individual dimensions of texturing operation. + void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const + { + cracked.query = false; + cracked.proj = false; + cracked.lod = false; + cracked.fetch = false; + cracked.offset = false; + cracked.offsets = false; + cracked.gather = false; + cracked.grad = false; + cracked.subpass = false; + cracked.lodClamp = false; + cracked.fragMask = false; + + switch (op) { + case EOpImageQuerySize: + case EOpImageQuerySamples: + case EOpTextureQuerySize: + case EOpTextureQueryLod: + case EOpTextureQueryLevels: + case EOpTextureQuerySamples: + case EOpSparseTexelsResident: + cracked.query = true; + break; + case EOpTexture: + case EOpSparseTexture: + break; + case EOpTextureProj: + cracked.proj = true; + break; + case EOpTextureLod: + case EOpSparseTextureLod: + cracked.lod = true; + break; + case EOpTextureOffset: + case EOpSparseTextureOffset: + cracked.offset = true; + break; + case EOpTextureFetch: + case EOpSparseTextureFetch: + cracked.fetch = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureFetchOffset: + case EOpSparseTextureFetchOffset: + cracked.fetch = true; + cracked.offset = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureProjOffset: + cracked.offset = true; + cracked.proj = true; + break; + case EOpTextureLodOffset: + case EOpSparseTextureLodOffset: + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureProjLod: + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureProjLodOffset: + cracked.offset = true; + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureGrad: + case EOpSparseTextureGrad: + cracked.grad = true; + break; + case EOpTextureGradOffset: + case EOpSparseTextureGradOffset: + cracked.grad = true; + cracked.offset = true; + break; + case EOpTextureProjGrad: + cracked.grad = true; + cracked.proj = true; + break; + case EOpTextureProjGradOffset: + cracked.grad = true; + cracked.offset = true; + cracked.proj = true; + break; +#ifndef GLSLANG_WEB + case EOpTextureClamp: + case EOpSparseTextureClamp: + cracked.lodClamp = true; + break; + case EOpTextureOffsetClamp: + case EOpSparseTextureOffsetClamp: + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGradClamp: + case EOpSparseTextureGradClamp: + cracked.grad = true; + cracked.lodClamp = true; + break; + case EOpTextureGradOffsetClamp: + case EOpSparseTextureGradOffsetClamp: + cracked.grad = true; + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGather: + case EOpSparseTextureGather: + cracked.gather = true; + break; + case EOpTextureGatherOffset: + case EOpSparseTextureGatherOffset: + cracked.gather = true; + cracked.offset = true; + break; + case EOpTextureGatherOffsets: + case EOpSparseTextureGatherOffsets: + cracked.gather = true; + cracked.offsets = true; + break; + case EOpTextureGatherLod: + case EOpSparseTextureGatherLod: + cracked.gather = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffset: + case EOpSparseTextureGatherLodOffset: + cracked.gather = true; + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffsets: + case EOpSparseTextureGatherLodOffsets: + cracked.gather = true; + cracked.offsets = true; + cracked.lod = true; + break; + case EOpImageLoadLod: + case EOpImageStoreLod: + case EOpSparseImageLoadLod: + cracked.lod = true; + break; + case EOpFragmentMaskFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpFragmentFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpImageSampleFootprintNV: + break; + case EOpImageSampleFootprintClampNV: + cracked.lodClamp = true; + break; + case EOpImageSampleFootprintLodNV: + cracked.lod = true; + break; + case EOpImageSampleFootprintGradNV: + cracked.grad = true; + break; + case EOpImageSampleFootprintGradClampNV: + cracked.lodClamp = true; + cracked.grad = true; + break; + case EOpSubpassLoad: + case EOpSubpassLoadMS: + cracked.subpass = true; + break; +#endif + default: + break; + } + } + +protected: + TIntermOperator(TOperator o) : TIntermTyped(EbtFloat), op(o), operationPrecision(EpqNone) {} + TIntermOperator(TOperator o, TType& t) : TIntermTyped(t), op(o), operationPrecision(EpqNone) {} + TOperator op; + // The result precision is in the inherited TType, and is usually meant to be both + // the operation precision and the result precision. However, some more complex things, + // like built-in function calls, distinguish between the two, in which case non-EqpNone + // 'operationPrecision' overrides the result precision as far as operation precision + // is concerned. + TPrecisionQualifier operationPrecision; +}; + +// +// Nodes for all the basic binary math operators. +// +class TIntermBinary : public TIntermOperator { +public: + TIntermBinary(TOperator o) : TIntermOperator(o) {} + virtual void traverse(TIntermTraverser*); + virtual void setLeft(TIntermTyped* n) { left = n; } + virtual void setRight(TIntermTyped* n) { right = n; } + virtual TIntermTyped* getLeft() const { return left; } + virtual TIntermTyped* getRight() const { return right; } + virtual TIntermBinary* getAsBinaryNode() { return this; } + virtual const TIntermBinary* getAsBinaryNode() const { return this; } + virtual void updatePrecision(); +protected: + TIntermTyped* left; + TIntermTyped* right; +}; + +// +// Nodes for unary math operators. +// +class TIntermUnary : public TIntermOperator { +public: + TIntermUnary(TOperator o, TType& t) : TIntermOperator(o, t), operand(0) {} + TIntermUnary(TOperator o) : TIntermOperator(o), operand(0) {} + virtual void traverse(TIntermTraverser*); + virtual void setOperand(TIntermTyped* o) { operand = o; } + virtual TIntermTyped* getOperand() { return operand; } + virtual const TIntermTyped* getOperand() const { return operand; } + virtual TIntermUnary* getAsUnaryNode() { return this; } + virtual const TIntermUnary* getAsUnaryNode() const { return this; } + virtual void updatePrecision(); +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermTyped* operand; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +typedef TVector TIntermSequence; +typedef TVector TQualifierList; +// +// Nodes that operate on an arbitrary sized set of children. +// +class TIntermAggregate : public TIntermOperator { +public: + TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(nullptr) { } + TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(nullptr) { } + ~TIntermAggregate() { delete pragmaTable; } + virtual TIntermAggregate* getAsAggregate() { return this; } + virtual const TIntermAggregate* getAsAggregate() const { return this; } + virtual void updatePrecision(); + virtual void setOperator(TOperator o) { op = o; } + virtual TIntermSequence& getSequence() { return sequence; } + virtual const TIntermSequence& getSequence() const { return sequence; } + virtual void setName(const TString& n) { name = n; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual void setUserDefined() { userDefined = true; } + virtual bool isUserDefined() { return userDefined; } + virtual TQualifierList& getQualifierList() { return qualifier; } + virtual const TQualifierList& getQualifierList() const { return qualifier; } + void setOptimize(bool o) { optimize = o; } + void setDebug(bool d) { debug = d; } + bool getOptimize() const { return optimize; } + bool getDebug() const { return debug; } + void setPragmaTable(const TPragmaTable& pTable); + const TPragmaTable& getPragmaTable() const { return *pragmaTable; } +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermAggregate(const TIntermAggregate&); // disallow copy constructor + TIntermAggregate& operator=(const TIntermAggregate&); // disallow assignment operator + TIntermSequence sequence; + TQualifierList qualifier; + TString name; + bool userDefined; // used for user defined function names + bool optimize; + bool debug; + TPragmaTable* pragmaTable; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +// +// For if tests. +// +class TIntermSelection : public TIntermTyped { +public: + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB) : + TIntermTyped(EbtVoid), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB, const TType& type) : + TIntermTyped(type), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* getCondition() const { return condition; } + virtual void setCondition(TIntermTyped* c) { condition = c; } + virtual TIntermNode* getTrueBlock() const { return trueBlock; } + virtual void setTrueBlock(TIntermTyped* tb) { trueBlock = tb; } + virtual TIntermNode* getFalseBlock() const { return falseBlock; } + virtual void setFalseBlock(TIntermTyped* fb) { falseBlock = fb; } + virtual TIntermSelection* getAsSelectionNode() { return this; } + virtual const TIntermSelection* getAsSelectionNode() const { return this; } + + void setNoShortCircuit() { shortCircuit = false; } + bool getShortCircuit() const { return shortCircuit; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermNode* trueBlock; + TIntermNode* falseBlock; + bool shortCircuit; // normally all if-then-else and all GLSL ?: short-circuit, but HLSL ?: does not + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +// +// For switch statements. Designed use is that a switch will have sequence of nodes +// that are either case/default nodes or a *single* node that represents all the code +// in between (if any) consecutive case/defaults. So, a traversal need only deal with +// 0 or 1 nodes per case/default statement. +// +class TIntermSwitch : public TIntermNode { +public: + TIntermSwitch(TIntermTyped* cond, TIntermAggregate* b) : condition(cond), body(b), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermNode* getCondition() const { return condition; } + virtual TIntermAggregate* getBody() const { return body; } + virtual TIntermSwitch* getAsSwitchNode() { return this; } + virtual const TIntermSwitch* getAsSwitchNode() const { return this; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermAggregate* body; + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +enum TVisit +{ + EvPreVisit, + EvInVisit, + EvPostVisit +}; + +// +// For traversing the tree. User should derive from this, +// put their traversal specific data in it, and then pass +// it to a Traverse method. +// +// When using this, just fill in the methods for nodes you want visited. +// Return false from a pre-visit to skip visiting that node's subtree. +// +// Explicitly set postVisit to true if you want post visiting, otherwise, +// filled in methods will only be called at pre-visit time (before processing +// the subtree). Similarly for inVisit for in-order visiting of nodes with +// multiple children. +// +// If you only want post-visits, explicitly turn off preVisit (and inVisit) +// and turn on postVisit. +// +// In general, for the visit*() methods, return true from interior nodes +// to have the traversal continue on to children. +// +// If you process children yourself, or don't want them processed, return false. +// +class TIntermTraverser { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + TIntermTraverser(bool preVisit = true, bool inVisit = false, bool postVisit = false, bool rightToLeft = false) : + preVisit(preVisit), + inVisit(inVisit), + postVisit(postVisit), + rightToLeft(rightToLeft), + depth(0), + maxDepth(0) { } + virtual ~TIntermTraverser() { } + + virtual void visitSymbol(TIntermSymbol*) { } + virtual void visitConstantUnion(TIntermConstantUnion*) { } + virtual bool visitBinary(TVisit, TIntermBinary*) { return true; } + virtual bool visitUnary(TVisit, TIntermUnary*) { return true; } + virtual bool visitSelection(TVisit, TIntermSelection*) { return true; } + virtual bool visitAggregate(TVisit, TIntermAggregate*) { return true; } + virtual bool visitLoop(TVisit, TIntermLoop*) { return true; } + virtual bool visitBranch(TVisit, TIntermBranch*) { return true; } + virtual bool visitSwitch(TVisit, TIntermSwitch*) { return true; } + + int getMaxDepth() const { return maxDepth; } + + void incrementDepth(TIntermNode *current) + { + depth++; + maxDepth = (std::max)(maxDepth, depth); + path.push_back(current); + } + + void decrementDepth() + { + depth--; + path.pop_back(); + } + + TIntermNode *getParentNode() + { + return path.size() == 0 ? NULL : path.back(); + } + + const bool preVisit; + const bool inVisit; + const bool postVisit; + const bool rightToLeft; + +protected: + TIntermTraverser& operator=(TIntermTraverser&); + + int depth; + int maxDepth; + + // All the nodes from root to the current node's parent during traversing. + TVector path; +}; + +// KHR_vulkan_glsl says "Two arrays sized with specialization constants are the same type only if +// sized with the same symbol, involving no operations" +inline bool SameSpecializationConstants(TIntermTyped* node1, TIntermTyped* node2) +{ + return node1->getAsSymbolNode() && node2->getAsSymbolNode() && + node1->getAsSymbolNode()->getId() == node2->getAsSymbolNode()->getId(); +} + +} // end namespace glslang + +#endif // __INTERMEDIATE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Initialize.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Initialize.h new file mode 100644 index 0000000..ac8ec33 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Initialize.h @@ -0,0 +1,112 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INITIALIZE_INCLUDED_ +#define _INITIALIZE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../Include/Common.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "Versions.h" + +namespace glslang { + +// +// This is made to hold parseable strings for almost all the built-in +// functions and variables for one specific combination of version +// and profile. (Some still need to be added programmatically.) +// This is a base class for language-specific derivations, which +// can be used for language independent builtins. +// +// The strings are organized by +// commonBuiltins: intersection of all stages' built-ins, processed just once +// stageBuiltins[]: anything a stage needs that's not in commonBuiltins +// +class TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltInParseables(); + virtual ~TBuiltInParseables(); + virtual void initialize(int version, EProfile, const SpvVersion& spvVersion) = 0; + virtual void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage) = 0; + virtual const TString& getCommonString() const { return commonBuiltins; } + virtual const TString& getStageString(EShLanguage language) const { return stageBuiltins[language]; } + + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable) = 0; + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources) = 0; + +protected: + TString commonBuiltins; + TString stageBuiltins[EShLangCount]; +}; + +// +// This is a GLSL specific derivation of TBuiltInParseables. To present a stable +// interface and match other similar code, it is called TBuiltIns, rather +// than TBuiltInParseablesGlsl. +// +class TBuiltIns : public TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltIns(); + virtual ~TBuiltIns(); + void initialize(int version, EProfile, const SpvVersion& spvVersion); + void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage); + + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable); + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources); + +protected: + void addTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion); + void relateTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage, TSymbolTable&); + void add2ndGenerationSamplingImaging(int version, EProfile profile, const SpvVersion& spvVersion); + void addSubpassSampling(TSampler, const TString& typeName, int version, EProfile profile); + void addQueryFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addImageFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addSamplingFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addGatherFunctions(TSampler, const TString& typeName, int version, EProfile profile); + + // Helpers for making textual representations of the permutations + // of texturing/imaging functions. + const char* postfixes[5]; + const char* prefixes[EbtNumTypes]; + int dimMap[EsdNumDims]; +}; + +} // end namespace glslang + +#endif // _INITIALIZE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/LiveTraverser.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/LiveTraverser.h new file mode 100644 index 0000000..9b39b59 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/LiveTraverser.h @@ -0,0 +1,168 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#include "../Include/Common.h" +#include "reflection.h" +#include "localintermediate.h" + +#include "gl_types.h" + +#include +#include + +namespace glslang { + +// +// The traverser: mostly pass through, except +// - processing function-call nodes to push live functions onto the stack of functions to process +// - processing selection nodes to trim semantically dead code +// +// This is in the glslang namespace directly so it can be a friend of TReflection. +// This can be derived from to implement reflection database traversers or +// binding mappers: anything that wants to traverse the live subset of the tree. +// + +class TLiveTraverser : public TIntermTraverser { +public: + TLiveTraverser(const TIntermediate& i, bool traverseAll = false, + bool preVisit = true, bool inVisit = false, bool postVisit = false) : + TIntermTraverser(preVisit, inVisit, postVisit), + intermediate(i), traverseAll(traverseAll) + { } + + // + // Given a function name, find its subroot in the tree, and push it onto the stack of + // functions left to process. + // + void pushFunction(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpFunction && candidate->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + + void pushGlobalReference(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpSequence && + candidate->getSequence().size() == 1 && + candidate->getSequence()[0]->getAsBinaryNode()) { + TIntermBinary* binary = candidate->getSequence()[0]->getAsBinaryNode(); + TIntermSymbol* symbol = binary->getLeft()->getAsSymbolNode(); + if (symbol && symbol->getQualifier().storage == EvqGlobal && + symbol->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + } + + typedef std::list TDestinationStack; + TDestinationStack destinations; + +protected: + // To catch which function calls are not dead, and hence which functions must be visited. + virtual bool visitAggregate(TVisit, TIntermAggregate* node) + { + if (!traverseAll) + if (node->getOp() == EOpFunctionCall) + addFunctionCall(node); + + return true; // traverse this subtree + } + + // To prune semantically dead paths. + virtual bool visitSelection(TVisit /* visit */, TIntermSelection* node) + { + if (traverseAll) + return true; // traverse all code + + TIntermConstantUnion* constant = node->getCondition()->getAsConstantUnion(); + if (constant) { + // cull the path that is dead + if (constant->getConstArray()[0].getBConst() == true && node->getTrueBlock()) + node->getTrueBlock()->traverse(this); + if (constant->getConstArray()[0].getBConst() == false && node->getFalseBlock()) + node->getFalseBlock()->traverse(this); + + return false; // don't traverse any more, we did it all above + } else + return true; // traverse the whole subtree + } + + // Track live functions as well as uniforms, so that we don't visit dead functions + // and only visit each function once. + void addFunctionCall(TIntermAggregate* call) + { + // just use the map to ensure we process each function at most once + if (liveFunctions.find(call->getName()) == liveFunctions.end()) { + liveFunctions.insert(call->getName()); + pushFunction(call->getName()); + } + } + + void addGlobalReference(const TString& name) + { + // just use the map to ensure we process each global at most once + if (liveGlobals.find(name) == liveGlobals.end()) { + liveGlobals.insert(name); + pushGlobalReference(name); + } + } + + const TIntermediate& intermediate; + typedef std::unordered_set TLiveFunctions; + TLiveFunctions liveFunctions; + typedef std::unordered_set TLiveGlobals; + TLiveGlobals liveGlobals; + bool traverseAll; + +private: + // prevent copy & copy construct + TLiveTraverser(TLiveTraverser&); + TLiveTraverser& operator=(TLiveTraverser&); +}; + +} // namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/ParseHelper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/ParseHelper.h new file mode 100644 index 0000000..885fd90 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/ParseHelper.h @@ -0,0 +1,588 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This header defines a two-level parse-helper hierarchy, derived from +// TParseVersions: +// - TParseContextBase: sharable across multiple parsers +// - TParseContext: GLSL specific helper +// + +#ifndef _PARSER_HELPER_INCLUDED_ +#define _PARSER_HELPER_INCLUDED_ + +#include +#include + +#include "parseVersions.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "localintermediate.h" +#include "Scan.h" +#include "attribute.h" + +namespace glslang { + +struct TPragma { + TPragma(bool o, bool d) : optimize(o), debug(d) { } + bool optimize; + bool debug; + TPragmaTable pragmaTable; +}; + +class TScanContext; +class TPpContext; + +typedef std::set TIdSetType; +typedef std::map> TStructRecord; + +// +// Sharable code (as well as what's in TParseVersions) across +// parse helpers. +// +class TParseContextBase : public TParseVersions { +public: + TParseContextBase(TSymbolTable& symbolTable, TIntermediate& interm, bool parsingBuiltins, int version, + EProfile profile, const SpvVersion& spvVersion, EShLanguage language, + TInfoSink& infoSink, bool forwardCompatible, EShMessages messages, + const TString* entryPoint = nullptr) + : TParseVersions(interm, version, profile, spvVersion, language, infoSink, forwardCompatible, messages), + scopeMangler("::"), + symbolTable(symbolTable), + statementNestingLevel(0), loopNestingLevel(0), structNestingLevel(0), blockNestingLevel(0), controlFlowNestingLevel(0), + currentFunctionType(nullptr), + postEntryPointReturn(false), + contextPragma(true, false), + beginInvocationInterlockCount(0), endInvocationInterlockCount(0), + parsingBuiltins(parsingBuiltins), scanContext(nullptr), ppContext(nullptr), + limits(resources.limits), + globalUniformBlock(nullptr), + globalUniformBinding(TQualifier::layoutBindingEnd), + globalUniformSet(TQualifier::layoutSetEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) + { + if (entryPoint != nullptr) + sourceEntryPointName = *entryPoint; + } + virtual ~TParseContextBase() { } + +#if !defined(GLSLANG_WEB) || defined(GLSLANG_WEB_DEVEL) + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); +#endif + + virtual void setLimits(const TBuiltInResource&) = 0; + + void checkIndex(const TSourceLoc&, const TType&, int& index); + + EShLanguage getLanguage() const { return language; } + void setScanContext(TScanContext* c) { scanContext = c; } + TScanContext* getScanContext() const { return scanContext; } + void setPpContext(TPpContext* c) { ppContext = c; } + TPpContext* getPpContext() const { return ppContext; } + + virtual void setLineCallback(const std::function& func) { lineCallback = func; } + virtual void setExtensionCallback(const std::function& func) { extensionCallback = func; } + virtual void setVersionCallback(const std::function& func) { versionCallback = func; } + virtual void setPragmaCallback(const std::function&)>& func) { pragmaCallback = func; } + virtual void setErrorCallback(const std::function& func) { errorCallback = func; } + + virtual void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) = 0; + virtual bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) = 0; + virtual bool lineDirectiveShouldSetNextLine() const = 0; + virtual void handlePragma(const TSourceLoc&, const TVector&) = 0; + + virtual bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) = 0; + + virtual void notifyVersion(int line, int version, const char* type_string) + { + if (versionCallback) + versionCallback(line, version, type_string); + } + virtual void notifyErrorDirective(int line, const char* error_message) + { + if (errorCallback) + errorCallback(line, error_message); + } + virtual void notifyLineDirective(int curLineNo, int newLineNo, bool hasSource, int sourceNum, const char* sourceName) + { + if (lineCallback) + lineCallback(curLineNo, newLineNo, hasSource, sourceNum, sourceName); + } + virtual void notifyExtensionDirective(int line, const char* extension, const char* behavior) + { + if (extensionCallback) + extensionCallback(line, extension, behavior); + } + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Manage global buffer (used for backing atomic counters in GLSL when using relaxed Vulkan semantics) + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Potentially rename shader entry point function + void renameShaderFunction(TString*& name) const + { + // Replace the entry point name given in the shader with the real entry point name, + // if there is a substitution. + if (name != nullptr && *name == sourceEntryPointName && intermediate.getEntryPointName().size() > 0) + name = NewPoolTString(intermediate.getEntryPointName().c_str()); + } + + virtual bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + virtual void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + + const char* const scopeMangler; + + // Basic parsing state, easily accessible to the grammar + + TSymbolTable& symbolTable; // symbol table that goes with the current language, version, and profile + int statementNestingLevel; // 0 if outside all flow control or compound statements + int loopNestingLevel; // 0 if outside all loops + int structNestingLevel; // 0 if outside structures + int blockNestingLevel; // 0 if outside blocks + int controlFlowNestingLevel; // 0 if outside all flow control + const TType* currentFunctionType; // the return type of the function that's currently being parsed + bool functionReturnsValue; // true if a non-void function has a return + // if inside a function, true if the function is the entry point and this is after a return statement + bool postEntryPointReturn; + // case, node, case, case, node, ...; ensure only one node between cases; stack of them for nesting + TList switchSequenceStack; + // the statementNestingLevel the current switch statement is at, which must match the level of its case statements + TList switchLevel; + struct TPragma contextPragma; + int beginInvocationInterlockCount; + int endInvocationInterlockCount; + +protected: + TParseContextBase(TParseContextBase&); + TParseContextBase& operator=(TParseContextBase&); + + const bool parsingBuiltins; // true if parsing built-in symbols/functions + TVector linkageSymbols; // will be transferred to 'linkage', after all editing is done, order preserving + TScanContext* scanContext; + TPpContext* ppContext; + TBuiltInResource resources; + TLimits& limits; + TString sourceEntryPointName; + + // These, if set, will be called when a line, pragma ... is preprocessed. + // They will be called with any parameters to the original directive. + std::function lineCallback; + std::function&)> pragmaCallback; + std::function versionCallback; + std::function extensionCallback; + std::function errorCallback; + + // see implementation for detail + const TFunction* selectFunction(const TVector, const TFunction&, + std::function, + std::function, + /* output */ bool& tie); + + virtual void parseSwizzleSelector(const TSourceLoc&, const TString&, int size, + TSwizzleSelectors&); + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + TVariable* globalUniformBlock; // the actual block, inserted into the symbol table + unsigned int globalUniformBinding; // the block's binding number + unsigned int globalUniformSet; // the block's set number + int firstNewMember; // the index of the first member not yet inserted into the symbol table + // override this to set the language-specific name + virtual const char* getGlobalUniformBlockName() const { return ""; } + virtual void setUniformBlockDefaults(TType&) const { } + virtual void finalizeGlobalUniformBlockLayout(TVariable&) {} + + // Manage the atomic counter block (used for atomic_uints with Vulkan-Relaxed) + TMap atomicCounterBuffers; + unsigned int atomicCounterBlockSet; + TMap atomicCounterBlockFirstNewMember; + // override this to set the language-specific name + virtual const char* getAtomicCounterBlockName() const { return ""; } + virtual void setAtomicCounterBlockDefaults(TType&) const {} + virtual void setInvariant(const TSourceLoc&, const char*) {} + virtual void finalizeAtomicCounterBlockLayout(TVariable&) {} + bool isAtomicCounterBlock(const TSymbol& symbol) { + const TVariable* var = symbol.getAsVariable(); + if (!var) + return false; + const auto& at = atomicCounterBuffers.find(var->getType().getQualifier().layoutBinding); + return (at != atomicCounterBuffers.end() && (*at).second->getType() == var->getType()); + } + + virtual void outputMessage(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, TPrefixType prefix, + va_list args); + virtual void trackLinkage(TSymbol& symbol); + virtual void makeEditable(TSymbol*&); + virtual TVariable* getEditableVariable(const char* name); + virtual void finish(); +}; + +// +// Manage the state for when to respect precision qualifiers and when to warn about +// the defaults being different than might be expected. +// +class TPrecisionManager { +public: + TPrecisionManager() : obey(false), warn(false), explicitIntDefault(false), explicitFloatDefault(false){ } + virtual ~TPrecisionManager() {} + + void respectPrecisionQualifiers() { obey = true; } + bool respectingPrecisionQualifiers() const { return obey; } + bool shouldWarnAboutDefaults() const { return warn; } + void defaultWarningGiven() { warn = false; } + void warnAboutDefaults() { warn = true; } + void explicitIntDefaultSeen() + { + explicitIntDefault = true; + if (explicitFloatDefault) + warn = false; + } + void explicitFloatDefaultSeen() + { + explicitFloatDefault = true; + if (explicitIntDefault) + warn = false; + } + +protected: + bool obey; // respect precision qualifiers + bool warn; // need to give a warning about the defaults + bool explicitIntDefault; // user set the default for int/uint + bool explicitFloatDefault; // user set the default for float +}; + +// +// GLSL-specific parse helper. Should have GLSL in the name, but that's +// too big of a change for comparing branches at the moment, and perhaps +// impacts downstream consumers as well. +// +class TParseContext : public TParseContextBase { +public: + TParseContext(TSymbolTable&, TIntermediate&, bool parsingBuiltins, int version, EProfile, const SpvVersion& spvVersion, EShLanguage, TInfoSink&, + bool forwardCompatible = false, EShMessages messages = EShMsgDefault, + const TString* entryPoint = nullptr); + virtual ~TParseContext(); + + bool obeyPrecisionQualifiers() const { return precisionManager.respectingPrecisionQualifiers(); } + void setPrecisionDefaults(); + + void setLimits(const TBuiltInResource&) override; + bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) override; + void parserError(const char* s); // for bison's yyerror + + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + + void reservedErrorCheck(const TSourceLoc&, const TString&); + void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) override; + bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) override; + bool lineDirectiveShouldSetNextLine() const override; + bool builtInName(const TString&); + + void handlePragma(const TSourceLoc&, const TVector&) override; + TIntermTyped* handleVariable(const TSourceLoc&, TSymbol* symbol, const TString* string); + TIntermTyped* handleBracketDereference(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + void handleIndexLimits(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + +#ifndef GLSLANG_WEB + void makeEditable(TSymbol*&) override; + void ioArrayCheck(const TSourceLoc&, const TType&, const TString& identifier); +#endif + bool isIoResizeArray(const TType&) const; + void fixIoArraySize(const TSourceLoc&, TType&); + void handleIoResizeArrayAccess(const TSourceLoc&, TIntermTyped* base); + void checkIoArraysConsistency(const TSourceLoc&, bool tailOnly = false); + int getIoArrayImplicitSize(const TQualifier&, TString* featureString = nullptr) const; + void checkIoArrayConsistency(const TSourceLoc&, int requiredSize, const char* feature, TType&, const TString&); + + TIntermTyped* handleBinaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* left, TIntermTyped* right); + TIntermTyped* handleUnaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* childNode); + TIntermTyped* handleDotDereference(const TSourceLoc&, TIntermTyped* base, const TString& field); + TIntermTyped* handleDotSwizzle(const TSourceLoc&, TIntermTyped* base, const TString& field); + void blockMemberExtensionCheck(const TSourceLoc&, const TIntermTyped* base, int member, const TString& memberName); + TFunction* handleFunctionDeclarator(const TSourceLoc&, TFunction& function, bool prototype); + TIntermAggregate* handleFunctionDefinition(const TSourceLoc&, TFunction&); + TIntermTyped* handleFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + TIntermTyped* handleBuiltInFunctionCall(TSourceLoc, TIntermNode* arguments, const TFunction& function); + void computeBuiltinPrecisions(TIntermTyped&, const TFunction&); + TIntermNode* handleReturnValue(const TSourceLoc&, TIntermTyped*); + void checkLocation(const TSourceLoc&, TOperator); + TIntermTyped* handleLengthMethod(const TSourceLoc&, TFunction*, TIntermNode*); + void addInputArgumentConversions(const TFunction&, TIntermNode*&) const; + TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermAggregate&) const; + TIntermTyped* addAssign(const TSourceLoc&, TOperator op, TIntermTyped* left, TIntermTyped* right); + void builtInOpCheck(const TSourceLoc&, const TFunction&, TIntermOperator&); + void nonOpBuiltInCheck(const TSourceLoc&, const TFunction&, TIntermAggregate&); + void userFunctionCallCheck(const TSourceLoc&, TIntermAggregate&); + void samplerConstructorLocationCheck(const TSourceLoc&, const char* token, TIntermNode*); + TFunction* handleConstructorCall(const TSourceLoc&, const TPublicType&); + void handlePrecisionQualifier(const TSourceLoc&, TQualifier&, TPrecisionQualifier); + void checkPrecisionQualifier(const TSourceLoc&, TPrecisionQualifier); + void memorySemanticsCheck(const TSourceLoc&, const TFunction&, const TIntermOperator& callNode); + + TIntermTyped* vkRelaxedRemapFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + // returns true if the variable was remapped to something else + bool vkRelaxedRemapUniformVariable(const TSourceLoc&, TString&, const TPublicType&, TArraySizes*, TIntermTyped*, TType&); + + void assignError(const TSourceLoc&, const char* op, TString left, TString right); + void unaryOpError(const TSourceLoc&, const char* op, TString operand); + void binaryOpError(const TSourceLoc&, const char* op, TString left, TString right); + void variableCheck(TIntermTyped*& nodePtr); + bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void constantValueCheck(TIntermTyped* node, const char* token); + void integerCheck(const TIntermTyped* node, const char* token); + void globalCheck(const TSourceLoc&, const char* token); + bool constructorError(const TSourceLoc&, TIntermNode*, TFunction&, TOperator, TType&); + bool constructorTextureSamplerError(const TSourceLoc&, const TFunction&); + void arraySizeCheck(const TSourceLoc&, TIntermTyped* expr, TArraySize&, const char *sizeType); + bool arrayQualifierError(const TSourceLoc&, const TQualifier&); + bool arrayError(const TSourceLoc&, const TType&); + void arraySizeRequiredCheck(const TSourceLoc&, const TArraySizes&); + void structArrayCheck(const TSourceLoc&, const TType& structure); + void arraySizesCheck(const TSourceLoc&, const TQualifier&, TArraySizes*, const TIntermTyped* initializer, bool lastMember); + void arrayOfArrayVersionCheck(const TSourceLoc&, const TArraySizes*); + bool voidErrorCheck(const TSourceLoc&, const TString&, TBasicType); + void boolCheck(const TSourceLoc&, const TIntermTyped*); + void boolCheck(const TSourceLoc&, const TPublicType&); + void samplerCheck(const TSourceLoc&, const TType&, const TString& identifier, TIntermTyped* initializer); + void atomicUintCheck(const TSourceLoc&, const TType&, const TString& identifier); + void accStructCheck(const TSourceLoc & loc, const TType & type, const TString & identifier); + void transparentOpaqueCheck(const TSourceLoc&, const TType&, const TString& identifier); + void memberQualifierCheck(glslang::TPublicType&); + void globalQualifierFixCheck(const TSourceLoc&, TQualifier&, bool isMemberCheck = false); + void globalQualifierTypeCheck(const TSourceLoc&, const TQualifier&, const TPublicType&); + bool structQualifierErrorCheck(const TSourceLoc&, const TPublicType& pType); + void mergeQualifiers(const TSourceLoc&, TQualifier& dst, const TQualifier& src, bool force); + void setDefaultPrecision(const TSourceLoc&, TPublicType&, TPrecisionQualifier); + int computeSamplerTypeIndex(TSampler&); + TPrecisionQualifier getDefaultPrecision(TPublicType&); + void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&); + void parameterTypeCheck(const TSourceLoc&, TStorageQualifier qualifier, const TType& type); + bool containsFieldWithBasicType(const TType& type ,TBasicType basicType); + TSymbol* redeclareBuiltinVariable(const TSourceLoc&, const TString&, const TQualifier&, const TShaderQualifiers&); + void redeclareBuiltinBlock(const TSourceLoc&, TTypeList& typeList, const TString& blockName, const TString* instanceName, TArraySizes* arraySizes); + void paramCheckFixStorage(const TSourceLoc&, const TStorageQualifier&, TType& type); + void paramCheckFix(const TSourceLoc&, const TQualifier&, TType& type); + void nestedBlockCheck(const TSourceLoc&); + void nestedStructCheck(const TSourceLoc&); + void arrayObjectCheck(const TSourceLoc&, const TType&, const char* op); + void opaqueCheck(const TSourceLoc&, const TType&, const char* op); + void referenceCheck(const TSourceLoc&, const TType&, const char* op); + void storage16BitAssignmentCheck(const TSourceLoc&, const TType&, const char* op); + void specializationCheck(const TSourceLoc&, const TType&, const char* op); + void structTypeCheck(const TSourceLoc&, TPublicType&); + void inductiveLoopCheck(const TSourceLoc&, TIntermNode* init, TIntermLoop* loop); + void arrayLimitCheck(const TSourceLoc&, const TString&, int size); + void limitCheck(const TSourceLoc&, int value, const char* limit, const char* feature); + + void inductiveLoopBodyCheck(TIntermNode*, long long loopIndexId, TSymbolTable&); + void constantIndexExpressionCheck(TIntermNode*); + + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&); + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&, const TIntermTyped*); + void mergeObjectLayoutQualifiers(TQualifier& dest, const TQualifier& src, bool inheritOnly); + void layoutObjectCheck(const TSourceLoc&, const TSymbol&); + void layoutMemberLocationArrayCheck(const TSourceLoc&, bool memberWithLocation, TArraySizes* arraySizes); + void layoutTypeCheck(const TSourceLoc&, const TType&); + void layoutQualifierCheck(const TSourceLoc&, const TQualifier&); + void checkNoShaderLayouts(const TSourceLoc&, const TShaderQualifiers&); + void fixOffset(const TSourceLoc&, TSymbol&); + + const TFunction* findFunction(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExact(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction120(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction400(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExplicitTypes(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + void declareTypeDefaults(const TSourceLoc&, const TPublicType&); + TIntermNode* declareVariable(const TSourceLoc&, TString& identifier, const TPublicType&, TArraySizes* typeArray = 0, TIntermTyped* initializer = 0); + TIntermTyped* addConstructor(const TSourceLoc&, TIntermNode*, const TType&); + TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&); + TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset); + void inheritMemoryQualifiers(const TQualifier& from, TQualifier& to); + void declareBlock(const TSourceLoc&, TTypeList& typeList, const TString* instanceName = 0, TArraySizes* arraySizes = 0); + void blockStorageRemap(const TSourceLoc&, const TString*, TQualifier&); + void blockStageIoCheck(const TSourceLoc&, const TQualifier&); + void blockQualifierCheck(const TSourceLoc&, const TQualifier&, bool instanceName); + void fixBlockLocations(const TSourceLoc&, TQualifier&, TTypeList&, bool memberWithLocation, bool memberWithoutLocation); + void fixXfbOffsets(TQualifier&, TTypeList&); + void fixBlockUniformOffsets(TQualifier&, TTypeList&); + void fixBlockUniformLayoutMatrix(TQualifier&, TTypeList*, TTypeList*); + void fixBlockUniformLayoutPacking(TQualifier&, TTypeList*, TTypeList*); + void addQualifierToExisting(const TSourceLoc&, TQualifier, const TString& identifier); + void addQualifierToExisting(const TSourceLoc&, TQualifier, TIdentifierList&); + void invariantCheck(const TSourceLoc&, const TQualifier&); + void updateStandaloneQualifierDefaults(const TSourceLoc&, const TPublicType&); + void wrapupSwitchSubsequence(TIntermAggregate* statements, TIntermNode* branchNode); + TIntermNode* addSwitch(const TSourceLoc&, TIntermTyped* expression, TIntermAggregate* body); + const TTypeList* recordStructCopy(TStructRecord&, const TType*, const TType*); + +#ifndef GLSLANG_WEB + TAttributeType attributeFromName(const TString& name) const; + TAttributes* makeAttributes(const TString& identifier) const; + TAttributes* makeAttributes(const TString& identifier, TIntermNode* node) const; + TAttributes* mergeAttributes(TAttributes*, TAttributes*) const; + + // Determine selection control from attributes + void handleSelectionAttributes(const TAttributes& attributes, TIntermNode*); + void handleSwitchAttributes(const TAttributes& attributes, TIntermNode*); + // Determine loop control from attributes + void handleLoopAttributes(const TAttributes& attributes, TIntermNode*); + // Function attributes + void handleFunctionAttributes(const TSourceLoc&, const TAttributes&); + + // GL_EXT_spirv_intrinsics + TSpirvRequirement* makeSpirvRequirement(const TSourceLoc& loc, const TString& name, + const TIntermAggregate* extensions, const TIntermAggregate* capabilities); + TSpirvRequirement* mergeSpirvRequirements(const TSourceLoc& loc, TSpirvRequirement* spirvReq1, + TSpirvRequirement* spirvReq2); + TSpirvTypeParameters* makeSpirvTypeParameters(const TSourceLoc& loc, const TIntermConstantUnion* constant); + TSpirvTypeParameters* mergeSpirvTypeParameters(TSpirvTypeParameters* spirvTypeParams1, + TSpirvTypeParameters* spirvTypeParams2); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, const TString& value); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, int value); + TSpirvInstruction* mergeSpirvInstruction(const TSourceLoc& loc, TSpirvInstruction* spirvInst1, + TSpirvInstruction* spirvInst2); +#endif + + void checkAndResizeMeshViewDim(const TSourceLoc&, TType&, bool isBlockMember); + +protected: + void nonInitConstCheck(const TSourceLoc&, TString& identifier, TType& type); + void inheritGlobalDefaults(TQualifier& dst) const; + TVariable* makeInternalVariable(const char* name, const TType&) const; + TVariable* declareNonArray(const TSourceLoc&, const TString& identifier, const TType&); + void declareArray(const TSourceLoc&, const TString& identifier, const TType&, TSymbol*&); + void checkRuntimeSizable(const TSourceLoc&, const TIntermTyped&); + bool isRuntimeLength(const TIntermTyped&) const; + TIntermNode* executeInitializer(const TSourceLoc&, TIntermTyped* initializer, TVariable* variable); + TIntermTyped* convertInitializerList(const TSourceLoc&, const TType&, TIntermTyped* initializer); +#ifndef GLSLANG_WEB + void finish() override; +#endif + + virtual const char* getGlobalUniformBlockName() const override; + virtual void finalizeGlobalUniformBlockLayout(TVariable&) override; + virtual void setUniformBlockDefaults(TType& block) const override; + + virtual const char* getAtomicCounterBlockName() const override; + virtual void finalizeAtomicCounterBlockLayout(TVariable&) override; + virtual void setAtomicCounterBlockDefaults(TType& block) const override; + virtual void setInvariant(const TSourceLoc& loc, const char* builtin) override; + +public: + // + // Generally, bison productions, the scanner, and the PP need read/write access to these; just give them direct access + // + + // Current state of parsing + bool inMain; // if inside a function, true if the function is main + const TString* blockName; + TQualifier currentBlockQualifier; + TPrecisionQualifier defaultPrecision[EbtNumTypes]; + TBuiltInResource resources; + TLimits& limits; + +protected: + TParseContext(TParseContext&); + TParseContext& operator=(TParseContext&); + + static const int maxSamplerIndex = EsdNumDims * (EbtNumTypes * (2 * 2 * 2 * 2 * 2)); // see computeSamplerTypeIndex() + TPrecisionQualifier defaultSamplerPrecision[maxSamplerIndex]; + TPrecisionManager precisionManager; + TQualifier globalBufferDefaults; + TQualifier globalUniformDefaults; + TQualifier globalInputDefaults; + TQualifier globalOutputDefaults; + TQualifier globalSharedDefaults; + TString currentCaller; // name of last function body entered (not valid when at global scope) +#ifndef GLSLANG_WEB + int* atomicUintOffsets; // to become an array of the right size to hold an offset per binding point + bool anyIndexLimits; + TIdSetType inductiveLoopIds; + TVector needsIndexLimitationChecking; + TStructRecord matrixFixRecord; + TStructRecord packingFixRecord; + + // + // Geometry shader input arrays: + // - array sizing is based on input primitive and/or explicit size + // + // Tessellation control output arrays: + // - array sizing is based on output layout(vertices=...) and/or explicit size + // + // Both: + // - array sizing is retroactive + // - built-in block redeclarations interact with this + // + // Design: + // - use a per-context "resize-list", a list of symbols whose array sizes + // can be fixed + // + // - the resize-list starts empty at beginning of user-shader compilation, it does + // not have built-ins in it + // + // - on built-in array use: copyUp() symbol and add it to the resize-list + // + // - on user array declaration: add it to the resize-list + // + // - on block redeclaration: copyUp() symbol and add it to the resize-list + // * note, that appropriately gives an error if redeclaring a block that + // was already used and hence already copied-up + // + // - on seeing a layout declaration that sizes the array, fix everything in the + // resize-list, giving errors for mismatch + // + // - on seeing an array size declaration, give errors on mismatch between it and previous + // array-sizing declarations + // + TVector ioArraySymbolResizeList; +#endif +}; + +} // end namespace glslang + +#endif // _PARSER_HELPER_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/RemoveTree.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/RemoveTree.h new file mode 100644 index 0000000..1ed0156 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/RemoveTree.h @@ -0,0 +1,41 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +namespace glslang { + +void RemoveAllTreeNodes(TIntermNode*); + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Scan.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Scan.h new file mode 100644 index 0000000..24b75cf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Scan.h @@ -0,0 +1,276 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _GLSLANG_SCAN_INCLUDED_ +#define _GLSLANG_SCAN_INCLUDED_ + +#include "Versions.h" + +namespace glslang { + +// Use a global end-of-input character, so no translation is needed across +// layers of encapsulation. Characters are all 8 bit, and positive, so there is +// no aliasing of character 255 onto -1, for example. +const int EndOfInput = -1; + +// +// A character scanner that seamlessly, on read-only strings, reads across an +// array of strings without assuming null termination. +// +class TInputScanner { +public: + TInputScanner(int n, const char* const s[], size_t L[], const char* const* names = nullptr, + int b = 0, int f = 0, bool single = false) : + numSources(n), + // up to this point, common usage is "char*", but now we need positive 8-bit characters + sources(reinterpret_cast(s)), + lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f), singleLogical(single), + endOfFileReached(false) + { + loc = new TSourceLoc[numSources]; + for (int i = 0; i < numSources; ++i) { + loc[i].init(i - stringBias); + } + if (names != nullptr) { + for (int i = 0; i < numSources; ++i) + loc[i].name = names[i] != nullptr ? NewPoolTString(names[i]) : nullptr; + } + loc[currentSource].line = 1; + logicalSourceLoc.init(1); + logicalSourceLoc.name = loc[0].name; + } + + virtual ~TInputScanner() + { + delete [] loc; + } + + // retrieve the next character and advance one character + int get() + { + int ret = peek(); + if (ret == EndOfInput) + return ret; + ++loc[currentSource].column; + ++logicalSourceLoc.column; + if (ret == '\n') { + ++loc[currentSource].line; + ++logicalSourceLoc.line; + logicalSourceLoc.column = 0; + loc[currentSource].column = 0; + } + advance(); + + return ret; + } + + // retrieve the next character, no advance + int peek() + { + if (currentSource >= numSources) { + endOfFileReached = true; + return EndOfInput; + } + // Make sure we do not read off the end of a string. + // N.B. Sources can have a length of 0. + int sourceToRead = currentSource; + size_t charToRead = currentChar; + while(charToRead >= lengths[sourceToRead]) { + charToRead = 0; + sourceToRead += 1; + if (sourceToRead >= numSources) { + return EndOfInput; + } + } + + // Here, we care about making negative valued characters positive + return sources[sourceToRead][charToRead]; + } + + // go back one character + void unget() + { + // Do not roll back once we've reached the end of the file. + if (endOfFileReached) + return; + + if (currentChar > 0) { + --currentChar; + --loc[currentSource].column; + --logicalSourceLoc.column; + if (loc[currentSource].column < 0) { + // We've moved back past a new line. Find the + // previous newline (or start of the file) to compute + // the column count on the now current line. + size_t chIndex = currentChar; + while (chIndex > 0) { + if (sources[currentSource][chIndex] == '\n') { + break; + } + --chIndex; + } + logicalSourceLoc.column = (int)(currentChar - chIndex); + loc[currentSource].column = (int)(currentChar - chIndex); + } + } else { + do { + --currentSource; + } while (currentSource > 0 && lengths[currentSource] == 0); + if (lengths[currentSource] == 0) { + // set to 0 if we've backed up to the start of an empty string + currentChar = 0; + } else + currentChar = lengths[currentSource] - 1; + } + if (peek() == '\n') { + --loc[currentSource].line; + --logicalSourceLoc.line; + } + } + + // for #line override + void setLine(int newLine) + { + logicalSourceLoc.line = newLine; + loc[getLastValidSourceIndex()].line = newLine; + } + + // for #line override in filename based parsing + void setFile(const char* filename) + { + TString* fn_tstr = NewPoolTString(filename); + logicalSourceLoc.name = fn_tstr; + loc[getLastValidSourceIndex()].name = fn_tstr; + } + + void setFile(const char* filename, int i) + { + TString* fn_tstr = NewPoolTString(filename); + if (i == getLastValidSourceIndex()) { + logicalSourceLoc.name = fn_tstr; + } + loc[i].name = fn_tstr; + } + + void setString(int newString) + { + logicalSourceLoc.string = newString; + loc[getLastValidSourceIndex()].string = newString; + logicalSourceLoc.name = nullptr; + loc[getLastValidSourceIndex()].name = nullptr; + } + + // for #include content indentation + void setColumn(int col) + { + logicalSourceLoc.column = col; + loc[getLastValidSourceIndex()].column = col; + } + + void setEndOfInput() + { + endOfFileReached = true; + currentSource = numSources; + } + + bool atEndOfInput() const { return endOfFileReached; } + + const TSourceLoc& getSourceLoc() const + { + if (singleLogical) { + return logicalSourceLoc; + } else { + return loc[std::max(0, std::min(currentSource, numSources - finale - 1))]; + } + } + // Returns the index (starting from 0) of the most recent valid source string we are reading from. + int getLastValidSourceIndex() const { return std::min(currentSource, numSources - 1); } + + void consumeWhiteSpace(bool& foundNonSpaceTab); + bool consumeComment(); + void consumeWhitespaceComment(bool& foundNonSpaceTab); + bool scanVersion(int& version, EProfile& profile, bool& notFirstToken); + +protected: + + // advance one character + void advance() + { + ++currentChar; + if (currentChar >= lengths[currentSource]) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + while (currentSource < numSources && lengths[currentSource] == 0) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + } + currentChar = 0; + } + } + + int numSources; // number of strings in source + const unsigned char* const *sources; // array of strings; must be converted to positive values on use, to avoid aliasing with -1 as EndOfInput + const size_t *lengths; // length of each string + int currentSource; + size_t currentChar; + + // This is for reporting what string/line an error occurred on, and can be overridden by #line. + // It remembers the last state of each source string as it is left for the next one, so unget() + // can restore that state. + TSourceLoc* loc; // an array + + int stringBias; // the first string that is the user's string number 0 + int finale; // number of internal strings after user's last string + + TSourceLoc logicalSourceLoc; + bool singleLogical; // treats the strings as a single logical string. + // locations will be reported from the first string. + + // Set to true once peek() returns EndOfFile, so that we won't roll back + // once we've reached EndOfFile. + bool endOfFileReached; +}; + +} // end namespace glslang + +#endif // _GLSLANG_SCAN_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/ScanContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/ScanContext.h new file mode 100644 index 0000000..74b2b3c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/ScanContext.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This holds context specific to the GLSL scanner, which +// sits between the preprocessor scanner and parser. +// + +#pragma once + +#include "ParseHelper.h" + +namespace glslang { + +class TPpContext; +class TPpToken; +class TParserToken; + +class TScanContext { +public: + explicit TScanContext(TParseContextBase& pc) : + parseContext(pc), + afterType(false), afterStruct(false), + field(false), afterBuffer(false) { } + virtual ~TScanContext() { } + + static void fillInKeywordMap(); + static void deleteKeywordMap(); + + int tokenize(TPpContext*, TParserToken&); + +protected: + TScanContext(TScanContext&); + TScanContext& operator=(TScanContext&); + + int tokenizeIdentifier(); + int identifierOrType(); + int reservedWord(); + int identifierOrReserved(bool reserved); + int es30ReservedFromGLSL(int version); + int nonreservedKeyword(int esVersion, int nonEsVersion); + int precisionKeyword(); + int matNxM(); + int dMat(); + int firstGenerationImage(bool inEs310); + int secondGenerationImage(); + + TParseContextBase& parseContext; + bool afterType; // true if we've recognized a type, so can only be looking for an identifier + bool afterStruct; // true if we've recognized the STRUCT keyword, so can only be looking for an identifier + bool field; // true if we're on a field, right after a '.' + bool afterBuffer; // true if we've recognized the BUFFER keyword + TSourceLoc loc; + TParserToken* parserToken; + TPpToken* ppToken; + + const char* tokenText; + int keyword; +}; + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/SymbolTable.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/SymbolTable.h new file mode 100644 index 0000000..0d45e48 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/SymbolTable.h @@ -0,0 +1,956 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SYMBOL_TABLE_INCLUDED_ +#define _SYMBOL_TABLE_INCLUDED_ + +// +// Symbol table for parsing. Has these design characteristics: +// +// * Same symbol table can be used to compile many shaders, to preserve +// effort of creating and loading with the large numbers of built-in +// symbols. +// +// --> This requires a copy mechanism, so initial pools used to create +// the shared information can be popped. Done through "clone" +// methods. +// +// * Name mangling will be used to give each function a unique name +// so that symbol table lookups are never ambiguous. This allows +// a simpler symbol table structure. +// +// * Pushing and popping of scope, so symbol table will really be a stack +// of symbol tables. Searched from the top, with new inserts going into +// the top. +// +// * Constants: Compile time constant symbols will keep their values +// in the symbol table. The parser can substitute constants at parse +// time, including doing constant folding and constant propagation. +// +// * No temporaries: Temporaries made from operations (+, --, .xy, etc.) +// are tracked in the intermediate representation, not the symbol table. +// + +#include "../Include/Common.h" +#include "../Include/intermediate.h" +#include "../Include/InfoSink.h" + +namespace glslang { + +// +// Symbol base class. (Can build functions or variables out of these...) +// + +class TVariable; +class TFunction; +class TAnonMember; + +typedef TVector TExtensionList; + +class TSymbol { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + explicit TSymbol(const TString *n) : name(n), uniqueId(0), extensions(0), writable(true) { } + virtual TSymbol* clone() const = 0; + virtual ~TSymbol() { } // rely on all symbol owned memory coming from the pool + + virtual const TString& getName() const { return *name; } + virtual void changeName(const TString* newName) { name = newName; } + virtual void addPrefix(const char* prefix) + { + TString newName(prefix); + newName.append(*name); + changeName(NewPoolTString(newName.c_str())); + } + virtual const TString& getMangledName() const { return getName(); } + virtual TFunction* getAsFunction() { return 0; } + virtual const TFunction* getAsFunction() const { return 0; } + virtual TVariable* getAsVariable() { return 0; } + virtual const TVariable* getAsVariable() const { return 0; } + virtual const TAnonMember* getAsAnonMember() const { return 0; } + virtual const TType& getType() const = 0; + virtual TType& getWritableType() = 0; + virtual void setUniqueId(long long id) { uniqueId = id; } + virtual long long getUniqueId() const { return uniqueId; } + virtual void setExtensions(int numExts, const char* const exts[]) + { + assert(extensions == 0); + assert(numExts > 0); + extensions = NewPoolObject(extensions); + for (int e = 0; e < numExts; ++e) + extensions->push_back(exts[e]); + } + virtual int getNumExtensions() const { return extensions == nullptr ? 0 : (int)extensions->size(); } + virtual const char** getExtensions() const { return extensions->data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const = 0; + void dumpExtensions(TInfoSink& infoSink) const; +#endif + + virtual bool isReadOnly() const { return ! writable; } + virtual void makeReadOnly() { writable = false; } + +protected: + explicit TSymbol(const TSymbol&); + TSymbol& operator=(const TSymbol&); + + const TString *name; + unsigned long long uniqueId; // For cross-scope comparing during code generation + + // For tracking what extensions must be present + // (don't use if correct version/profile is present). + TExtensionList* extensions; // an array of pointers to existing constant char strings + + // + // N.B.: Non-const functions that will be generally used should assert on this, + // to avoid overwriting shared symbol-table information. + // + bool writable; +}; + +// +// Variable class, meaning a symbol that's not a function. +// +// There could be a separate class hierarchy for Constant variables; +// Only one of int, bool, or float, (or none) is correct for +// any particular use, but it's easy to do this way, and doesn't +// seem worth having separate classes, and "getConst" can't simply return +// different values for different types polymorphically, so this is +// just simple and pragmatic. +// +class TVariable : public TSymbol { +public: + TVariable(const TString *name, const TType& t, bool uT = false ) + : TSymbol(name), + userType(uT), + constSubtree(nullptr), + memberExtensions(nullptr), + anonId(-1) + { type.shallowCopy(t); } + virtual TVariable* clone() const; + virtual ~TVariable() { } + + virtual TVariable* getAsVariable() { return this; } + virtual const TVariable* getAsVariable() const { return this; } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { assert(writable); return type; } + virtual bool isUserType() const { return userType; } + virtual const TConstUnionArray& getConstArray() const { return constArray; } + virtual TConstUnionArray& getWritableConstArray() { assert(writable); return constArray; } + virtual void setConstArray(const TConstUnionArray& array) { constArray = array; } + virtual void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + virtual TIntermTyped* getConstSubtree() const { return constSubtree; } + virtual void setAnonId(int i) { anonId = i; } + virtual int getAnonId() const { return anonId; } + + virtual void setMemberExtensions(int member, int numExts, const char* const exts[]) + { + assert(type.isStruct()); + assert(numExts > 0); + if (memberExtensions == nullptr) { + memberExtensions = NewPoolObject(memberExtensions); + memberExtensions->resize(type.getStruct()->size()); + } + for (int e = 0; e < numExts; ++e) + (*memberExtensions)[member].push_back(exts[e]); + } + virtual bool hasMemberExtensions() const { return memberExtensions != nullptr; } + virtual int getNumMemberExtensions(int member) const + { + return memberExtensions == nullptr ? 0 : (int)(*memberExtensions)[member].size(); + } + virtual const char** getMemberExtensions(int member) const { return (*memberExtensions)[member].data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + +protected: + explicit TVariable(const TVariable&); + TVariable& operator=(const TVariable&); + + TType type; + bool userType; + + // we are assuming that Pool Allocator will free the memory allocated to unionArray + // when this object is destroyed + + TConstUnionArray constArray; // for compile-time constant value + TIntermTyped* constSubtree; // for specialization constant computation + TVector* memberExtensions; // per-member extension list, allocated only when needed + int anonId; // the ID used for anonymous blocks: TODO: see if uniqueId could serve a dual purpose +}; + +// +// The function sub-class of symbols and the parser will need to +// share this definition of a function parameter. +// +struct TParameter { + TString *name; + TType* type; + TIntermTyped* defaultValue; + TParameter& copyParam(const TParameter& param) + { + if (param.name) + name = NewPoolTString(param.name->c_str()); + else + name = 0; + type = param.type->clone(); + defaultValue = param.defaultValue; + return *this; + } + TBuiltInVariable getDeclaredBuiltIn() const { return type->getQualifier().declaredBuiltIn; } +}; + +// +// The function sub-class of a symbol. +// +class TFunction : public TSymbol { +public: + explicit TFunction(TOperator o) : + TSymbol(0), + op(o), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) { } + TFunction(const TString *name, const TType& retType, TOperator tOp = EOpNull) : + TSymbol(name), + mangledName(*name + '('), + op(tOp), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) + { + returnType.shallowCopy(retType); + declaredBuiltIn = retType.getQualifier().builtIn; + } + virtual TFunction* clone() const override; + virtual ~TFunction(); + + virtual TFunction* getAsFunction() override { return this; } + virtual const TFunction* getAsFunction() const override { return this; } + + // Install 'p' as the (non-'this') last parameter. + // Non-'this' parameters are reflected in both the list of parameters and the + // mangled name. + virtual void addParameter(TParameter& p) + { + assert(writable); + parameters.push_back(p); + p.type->appendMangledName(mangledName); + + if (p.defaultValue != nullptr) + defaultParamCount++; + } + + // Install 'this' as the first parameter. + // 'this' is reflected in the list of parameters, but not the mangled name. + virtual void addThisParameter(TType& type, const char* name) + { + TParameter p = { NewPoolTString(name), new TType, nullptr }; + p.type->shallowCopy(type); + parameters.insert(parameters.begin(), p); + } + + virtual void addPrefix(const char* prefix) override + { + TSymbol::addPrefix(prefix); + mangledName.insert(0, prefix); + } + + virtual void removePrefix(const TString& prefix) + { + assert(mangledName.compare(0, prefix.size(), prefix) == 0); + mangledName.erase(0, prefix.size()); + } + + virtual const TString& getMangledName() const override { return mangledName; } + virtual const TType& getType() const override { return returnType; } + virtual TBuiltInVariable getDeclaredBuiltInType() const { return declaredBuiltIn; } + virtual TType& getWritableType() override { return returnType; } + virtual void relateToOperator(TOperator o) { assert(writable); op = o; } + virtual TOperator getBuiltInOp() const { return op; } + virtual void setDefined() { assert(writable); defined = true; } + virtual bool isDefined() const { return defined; } + virtual void setPrototyped() { assert(writable); prototyped = true; } + virtual bool isPrototyped() const { return prototyped; } + virtual void setImplicitThis() { assert(writable); implicitThis = true; } + virtual bool hasImplicitThis() const { return implicitThis; } + virtual void setIllegalImplicitThis() { assert(writable); illegalImplicitThis = true; } + virtual bool hasIllegalImplicitThis() const { return illegalImplicitThis; } + + // Return total number of parameters + virtual int getParamCount() const { return static_cast(parameters.size()); } + // Return number of parameters with default values. + virtual int getDefaultParamCount() const { return defaultParamCount; } + // Return number of fixed parameters (without default values) + virtual int getFixedParamCount() const { return getParamCount() - getDefaultParamCount(); } + + virtual TParameter& operator[](int i) { assert(writable); return parameters[i]; } + virtual const TParameter& operator[](int i) const { return parameters[i]; } + +#ifndef GLSLANG_WEB + virtual void setSpirvInstruction(const TSpirvInstruction& inst) + { + relateToOperator(EOpSpirvInst); + spirvInst = inst; + } + virtual const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TFunction(const TFunction&); + TFunction& operator=(const TFunction&); + + typedef TVector TParamList; + TParamList parameters; + TType returnType; + TBuiltInVariable declaredBuiltIn; + + TString mangledName; + TOperator op; + bool defined; + bool prototyped; + bool implicitThis; // True if this function is allowed to see all members of 'this' + bool illegalImplicitThis; // True if this function is not supposed to have access to dynamic members of 'this', + // even if it finds member variables in the symbol table. + // This is important for a static member function that has member variables in scope, + // but is not allowed to use them, or see hidden symbols instead. + int defaultParamCount; + +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; // SPIR-V instruction qualifiers +#endif +}; + +// +// Members of anonymous blocks are a kind of TSymbol. They are not hidden in +// the symbol table behind a container; rather they are visible and point to +// their anonymous container. (The anonymous container is found through the +// member, not the other way around.) +// +class TAnonMember : public TSymbol { +public: + TAnonMember(const TString* n, unsigned int m, TVariable& a, int an) : TSymbol(n), anonContainer(a), memberNumber(m), anonId(an) { } + virtual TAnonMember* clone() const override; + virtual ~TAnonMember() { } + + virtual const TAnonMember* getAsAnonMember() const override { return this; } + virtual const TVariable& getAnonContainer() const { return anonContainer; } + virtual unsigned int getMemberNumber() const { return memberNumber; } + + virtual const TType& getType() const override + { + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual TType& getWritableType() override + { + assert(writable); + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual void setExtensions(int numExts, const char* const exts[]) override + { + anonContainer.setMemberExtensions(memberNumber, numExts, exts); + } + virtual int getNumExtensions() const override { return anonContainer.getNumMemberExtensions(memberNumber); } + virtual const char** getExtensions() const override { return anonContainer.getMemberExtensions(memberNumber); } + + virtual int getAnonId() const { return anonId; } +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TAnonMember(const TAnonMember&); + TAnonMember& operator=(const TAnonMember&); + + TVariable& anonContainer; + unsigned int memberNumber; + int anonId; +}; + +class TSymbolTableLevel { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TSymbolTableLevel() : defaultPrecision(0), anonId(0), thisLevel(false) { } + ~TSymbolTableLevel(); + + bool insert(const TString& name, TSymbol* symbol) { + return level.insert(tLevelPair(name, symbol)).second; + } + + bool insert(TSymbol& symbol, bool separateNameSpaces, const TString& forcedKeyName = TString()) + { + // + // returning true means symbol was added to the table with no semantic errors + // + const TString& name = symbol.getName(); + if (forcedKeyName.length()) { + return level.insert(tLevelPair(forcedKeyName, &symbol)).second; + } + else if (name == "") { + symbol.getAsVariable()->setAnonId(anonId++); + // An empty name means an anonymous container, exposing its members to the external scope. + // Give it a name and insert its members in the symbol table, pointing to the container. + char buf[20]; + snprintf(buf, 20, "%s%d", AnonymousPrefix, symbol.getAsVariable()->getAnonId()); + symbol.changeName(NewPoolTString(buf)); + + return insertAnonymousMembers(symbol, 0); + } else { + // Check for redefinition errors: + // - STL itself will tell us if there is a direct name collision, with name mangling, at this level + // - additionally, check for function-redefining-variable name collisions + const TString& insertName = symbol.getMangledName(); + if (symbol.getAsFunction()) { + // make sure there isn't a variable of this name + if (! separateNameSpaces && level.find(name) != level.end()) + return false; + + // insert, and whatever happens is okay + level.insert(tLevelPair(insertName, &symbol)); + + return true; + } else + return level.insert(tLevelPair(insertName, &symbol)).second; + } + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + // Only supporting amend of anonymous blocks so far. + if (IsAnonymous(symbol.getName())) + return insertAnonymousMembers(symbol, firstNewMember); + else + return false; + } + + bool insertAnonymousMembers(TSymbol& symbol, int firstMember) + { + const TTypeList& types = *symbol.getAsVariable()->getType().getStruct(); + for (unsigned int m = firstMember; m < types.size(); ++m) { + TAnonMember* member = new TAnonMember(&types[m].type->getFieldName(), m, *symbol.getAsVariable(), symbol.getAsVariable()->getAnonId()); + if (! level.insert(tLevelPair(member->getMangledName(), member)).second) + return false; + } + + return true; + } + + void retargetSymbol(const TString& from, const TString& to) { + tLevel::const_iterator fromIt = level.find(from); + tLevel::const_iterator toIt = level.find(to); + if (fromIt == level.end() || toIt == level.end()) + return; + delete fromIt->second; + level[from] = toIt->second; + retargetedSymbols.push_back({from, to}); + } + + TSymbol* find(const TString& name) const + { + tLevel::const_iterator it = level.find(name); + if (it == level.end()) + return 0; + else + return (*it).second; + } + + void findFunctionNameList(const TString& name, TVector& list) + { + size_t parenAt = name.find_first_of('('); + TString base(name, 0, parenAt + 1); + + tLevel::const_iterator begin = level.lower_bound(base); + base[parenAt] = ')'; // assume ')' is lexically after '(' + tLevel::const_iterator end = level.upper_bound(base); + for (tLevel::const_iterator it = begin; it != end; ++it) + list.push_back(it->second->getAsFunction()); + } + + // See if there is already a function in the table having the given non-function-style name. + bool hasFunctionName(const TString& name) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt != candidateName.npos && candidateName.compare(0, parenAt, name) == 0) + + return true; + } + + return false; + } + + // See if there is a variable at this level having the given non-function-style name. + // Return true if name is found, and set variable to true if the name was a variable. + bool findFunctionVariableName(const TString& name, bool& variable) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt == candidateName.npos) { + // not a mangled name + if (candidateName == name) { + // found a variable name match + variable = true; + return true; + } + } else { + // a mangled name + if (candidateName.compare(0, parenAt, name) == 0) { + // found a function name match + variable = false; + return true; + } + } + } + + return false; + } + + // Use this to do a lazy 'push' of precision defaults the first time + // a precision statement is seen in a new scope. Leave it at 0 for + // when no push was needed. Thus, it is not the current defaults, + // it is what to restore the defaults to when popping a level. + void setPreviousDefaultPrecisions(const TPrecisionQualifier *p) + { + // can call multiple times at one scope, will only latch on first call, + // as we're tracking the previous scope's values, not the current values + if (defaultPrecision != 0) + return; + + defaultPrecision = new TPrecisionQualifier[EbtNumTypes]; + for (int t = 0; t < EbtNumTypes; ++t) + defaultPrecision[t] = p[t]; + } + + void getPreviousDefaultPrecisions(TPrecisionQualifier *p) + { + // can be called for table level pops that didn't set the + // defaults + if (defaultPrecision == 0 || p == 0) + return; + + for (int t = 0; t < EbtNumTypes; ++t) + p[t] = defaultPrecision[t]; + } + + void relateToOperator(const char* name, TOperator op); + void setFunctionExtensions(const char* name, int num, const char* const extensions[]); +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + TSymbolTableLevel* clone() const; + void readOnly(); + + void setThisLevel() { thisLevel = true; } + bool isThisLevel() const { return thisLevel; } + +protected: + explicit TSymbolTableLevel(TSymbolTableLevel&); + TSymbolTableLevel& operator=(TSymbolTableLevel&); + + typedef std::map, pool_allocator > > tLevel; + typedef const tLevel::value_type tLevelPair; + typedef std::pair tInsertResult; + + tLevel level; // named mappings + TPrecisionQualifier *defaultPrecision; + // pair + TVector> retargetedSymbols; + int anonId; + bool thisLevel; // True if this level of the symbol table is a structure scope containing member function + // that are supposed to see anonymous access to member variables. +}; + +class TSymbolTable { +public: + TSymbolTable() : uniqueId(0), noBuiltInRedeclarations(false), separateNameSpaces(false), adoptedLevels(0) + { + // + // This symbol table cannot be used until push() is called. + // + } + ~TSymbolTable() + { + // this can be called explicitly; safest to code it so it can be called multiple times + + // don't deallocate levels passed in from elsewhere + while (table.size() > adoptedLevels) + pop(0); + } + + void adoptLevels(TSymbolTable& symTable) + { + for (unsigned int level = 0; level < symTable.table.size(); ++level) { + table.push_back(symTable.table[level]); + ++adoptedLevels; + } + uniqueId = symTable.uniqueId; + noBuiltInRedeclarations = symTable.noBuiltInRedeclarations; + separateNameSpaces = symTable.separateNameSpaces; + } + + // + // While level adopting is generic, the methods below enact a the following + // convention for levels: + // 0: common built-ins shared across all stages, all compiles, only one copy for all symbol tables + // 1: per-stage built-ins, shared across all compiles, but a different copy per stage + // 2: built-ins specific to a compile, like resources that are context-dependent, or redeclared built-ins + // 3: user-shader globals + // +protected: + static const uint32_t LevelFlagBitOffset = 56; + static const int globalLevel = 3; + static bool isSharedLevel(int level) { return level <= 1; } // exclude all per-compile levels + static bool isBuiltInLevel(int level) { return level <= 2; } // exclude user globals + static bool isGlobalLevel(int level) { return level <= globalLevel; } // include user globals +public: + bool isEmpty() { return table.size() == 0; } + bool atBuiltInLevel() { return isBuiltInLevel(currentLevel()); } + bool atGlobalLevel() { return isGlobalLevel(currentLevel()); } + static bool isBuiltInSymbol(long long uniqueId) { + int level = static_cast(uniqueId >> LevelFlagBitOffset); + return isBuiltInLevel(level); + } + static constexpr uint64_t uniqueIdMask = (1LL << LevelFlagBitOffset) - 1; + static const uint32_t MaxLevelInUniqueID = 127; + void setNoBuiltInRedeclarations() { noBuiltInRedeclarations = true; } + void setSeparateNameSpaces() { separateNameSpaces = true; } + + void push() + { + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + } + + // Make a new symbol-table level to represent the scope introduced by a structure + // containing member functions, such that the member functions can find anonymous + // references to member variables. + // + // 'thisSymbol' should have a name of "" to trigger anonymous structure-member + // symbol finds. + void pushThis(TSymbol& thisSymbol) + { + assert(thisSymbol.getName().size() == 0); + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + table.back()->setThisLevel(); + insert(thisSymbol); + } + + void pop(TPrecisionQualifier *p) + { + table[currentLevel()]->getPreviousDefaultPrecisions(p); + delete table.back(); + table.pop_back(); + updateUniqueIdLevelFlag(); + } + + // + // Insert a visible symbol into the symbol table so it can + // be found later by name. + // + // Returns false if the was a name collision. + // + bool insert(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + + // make sure there isn't a function of this variable name + if (! separateNameSpaces && ! symbol.getAsFunction() && table[currentLevel()]->hasFunctionName(symbol.getName())) + return false; + + // check for not overloading or redefining a built-in function + if (noBuiltInRedeclarations) { + if (atGlobalLevel() && currentLevel() > 0) { + if (table[0]->hasFunctionName(symbol.getName())) + return false; + if (currentLevel() > 1 && table[1]->hasFunctionName(symbol.getName())) + return false; + } + } + + return table[currentLevel()]->insert(symbol, separateNameSpaces); + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + return table[currentLevel()]->amend(symbol, firstNewMember); + } + + // Update the level info in symbol's unique ID to current level + void amendSymbolIdLevel(TSymbol& symbol) + { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uint64_t symbolId = symbol.getUniqueId(); + symbolId &= uniqueIdMask; + symbolId |= (level << LevelFlagBitOffset); + symbol.setUniqueId(symbolId); + } + // + // To allocate an internal temporary, which will need to be uniquely + // identified by the consumer of the AST, but never need to + // found by doing a symbol table search by name, hence allowed an + // arbitrary name in the symbol with no worry of collision. + // + void makeInternalVariable(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + } + + // + // Copy a variable or anonymous member's structure from a shared level so that + // it can be added (soon after return) to the symbol table where it can be + // modified without impacting other users of the shared table. + // + TSymbol* copyUpDeferredInsert(TSymbol* shared) + { + if (shared->getAsVariable()) { + TSymbol* copy = shared->clone(); + copy->setUniqueId(shared->getUniqueId()); + return copy; + } else { + const TAnonMember* anon = shared->getAsAnonMember(); + assert(anon); + TVariable* container = anon->getAnonContainer().clone(); + container->changeName(NewPoolTString("")); + container->setUniqueId(anon->getAnonContainer().getUniqueId()); + return container; + } + } + + TSymbol* copyUp(TSymbol* shared) + { + TSymbol* copy = copyUpDeferredInsert(shared); + table[globalLevel]->insert(*copy, separateNameSpaces); + if (shared->getAsVariable()) + return copy; + else { + // return the copy of the anonymous member + return table[globalLevel]->find(shared->getName()); + } + } + + // Normal find of a symbol, that can optionally say whether the symbol was found + // at a built-in level or the current top-scope level. + TSymbol* find(const TString& name, bool* builtIn = 0, bool* currentScope = 0, int* thisDepthP = 0) + { + int level = currentLevel(); + TSymbol* symbol; + int thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == nullptr && level >= 0); + level++; + if (builtIn) + *builtIn = isBuiltInLevel(level); + if (currentScope) + *currentScope = isGlobalLevel(currentLevel()) || level == currentLevel(); // consider shared levels as "current scope" WRT user globals + if (thisDepthP != nullptr) { + if (! table[level]->isThisLevel()) + thisDepth = 0; + *thisDepthP = thisDepth; + } + + return symbol; + } + + void retargetSymbol(const TString& from, const TString& to) { + int level = currentLevel(); + table[level]->retargetSymbol(from, to); + } + + + // Find of a symbol that returns how many layers deep of nested + // structures-with-member-functions ('this' scopes) deep the symbol was + // found in. + TSymbol* find(const TString& name, int& thisDepth) + { + int level = currentLevel(); + TSymbol* symbol; + thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == 0 && level >= 0); + + if (! table[level + 1]->isThisLevel()) + thisDepth = 0; + + return symbol; + } + + bool isFunctionNameVariable(const TString& name) const + { + if (separateNameSpaces) + return false; + + int level = currentLevel(); + do { + bool variable; + bool found = table[level]->findFunctionVariableName(name, variable); + if (found) + return variable; + --level; + } while (level >= 0); + + return false; + } + + void findFunctionNameList(const TString& name, TVector& list, bool& builtIn) + { + // For user levels, return the set found in the first scope with a match + builtIn = false; + int level = currentLevel(); + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (list.empty() && level >= globalLevel); + + if (! list.empty()) + return; + + // Gather across all built-in levels; they don't hide each other + builtIn = true; + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (level >= 0); + } + + void relateToOperator(const char* name, TOperator op) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->relateToOperator(name, op); + } + + void setFunctionExtensions(const char* name, int num, const char* const extensions[]) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->setFunctionExtensions(name, num, extensions); + } + + void setVariableExtensions(const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(name)); + if (symbol == nullptr) + return; + + symbol->setExtensions(numExts, extensions); + } + + void setVariableExtensions(const char* blockName, const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(blockName)); + if (symbol == nullptr) + return; + TVariable* variable = symbol->getAsVariable(); + assert(variable != nullptr); + + const TTypeList& structure = *variable->getAsVariable()->getType().getStruct(); + for (int member = 0; member < (int)structure.size(); ++member) { + if (structure[member].type->getFieldName().compare(name) == 0) { + variable->setMemberExtensions(member, numExts, extensions); + return; + } + } + } + + long long getMaxSymbolId() { return uniqueId; } +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + void copyTable(const TSymbolTable& copyOf); + + void setPreviousDefaultPrecisions(TPrecisionQualifier *p) { table[currentLevel()]->setPreviousDefaultPrecisions(p); } + + void readOnly() + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->readOnly(); + } + + // Add current level in the high-bits of unique id + void updateUniqueIdLevelFlag() { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uniqueId &= uniqueIdMask; + uniqueId |= (level << LevelFlagBitOffset); + } + + void overwriteUniqueId(long long id) + { + uniqueId = id; + updateUniqueIdLevelFlag(); + } + +protected: + TSymbolTable(TSymbolTable&); + TSymbolTable& operator=(TSymbolTableLevel&); + + int currentLevel() const { return static_cast(table.size()) - 1; } + std::vector table; + long long uniqueId; // for unique identification in code generation + bool noBuiltInRedeclarations; + bool separateNameSpaces; + unsigned int adoptedLevels; +}; + +} // end namespace glslang + +#endif // _SYMBOL_TABLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Versions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Versions.h new file mode 100644 index 0000000..f06abdd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/Versions.h @@ -0,0 +1,359 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _VERSIONS_INCLUDED_ +#define _VERSIONS_INCLUDED_ + +#define LAST_ELEMENT_MARKER(x) x + +// +// Help manage multiple profiles, versions, extensions etc. +// + +// +// Profiles are set up for masking operations, so queries can be done on multiple +// profiles at the same time. +// +// Don't maintain an ordinal set of enums (0,1,2,3...) to avoid all possible +// defects from mixing the two different forms. +// +typedef enum : unsigned { + EBadProfile = 0, + ENoProfile = (1 << 0), // only for desktop, before profiles showed up + ECoreProfile = (1 << 1), + ECompatibilityProfile = (1 << 2), + EEsProfile = (1 << 3), + LAST_ELEMENT_MARKER(EProfileCount), +} EProfile; + +namespace glslang { + +// +// Map from profile enum to externally readable text name. +// +inline const char* ProfileName(EProfile profile) +{ + switch (profile) { + case ENoProfile: return "none"; + case ECoreProfile: return "core"; + case ECompatibilityProfile: return "compatibility"; + case EEsProfile: return "es"; + default: return "unknown profile"; + } +} + +// +// What source rules, validation rules, target language, etc. are needed or +// desired for SPIR-V? +// +// 0 means a target or rule set is not enabled (ignore rules from that entity). +// Non-0 means to apply semantic rules arising from that version of its rule set. +// The union of all requested rule sets will be applied. +// +struct SpvVersion { + SpvVersion() : spv(0), vulkanGlsl(0), vulkan(0), openGl(0), vulkanRelaxed(false) {} + unsigned int spv; // the version of SPIR-V to target, as defined by "word 1" of the SPIR-V binary header + int vulkanGlsl; // the version of GLSL semantics for Vulkan, from GL_KHR_vulkan_glsl, for "#define VULKAN XXX" + int vulkan; // the version of Vulkan, for which SPIR-V execution environment rules to use + int openGl; // the version of GLSL semantics for OpenGL, from GL_ARB_gl_spirv, for "#define GL_SPIRV XXX" + bool vulkanRelaxed; // relax changes to GLSL for Vulkan, allowing some GL-specific to be compiled to Vulkan SPIR-V target +}; + +// +// The behaviors from the GLSL "#extension extension_name : behavior" +// +typedef enum { + EBhMissing = 0, + EBhRequire, + EBhEnable, + EBhWarn, + EBhDisable, + EBhDisablePartial // use as initial state of an extension that is only partially implemented +} TExtensionBehavior; + +// +// Symbolic names for extensions. Strings may be directly used when calling the +// functions, but better to have the compiler do spelling checks. +// +const char* const E_GL_OES_texture_3D = "GL_OES_texture_3D"; +const char* const E_GL_OES_standard_derivatives = "GL_OES_standard_derivatives"; +const char* const E_GL_EXT_frag_depth = "GL_EXT_frag_depth"; +const char* const E_GL_OES_EGL_image_external = "GL_OES_EGL_image_external"; +const char* const E_GL_OES_EGL_image_external_essl3 = "GL_OES_EGL_image_external_essl3"; +const char* const E_GL_EXT_YUV_target = "GL_EXT_YUV_target"; +const char* const E_GL_EXT_shader_texture_lod = "GL_EXT_shader_texture_lod"; +const char* const E_GL_EXT_shadow_samplers = "GL_EXT_shadow_samplers"; + +const char* const E_GL_ARB_texture_rectangle = "GL_ARB_texture_rectangle"; +const char* const E_GL_3DL_array_objects = "GL_3DL_array_objects"; +const char* const E_GL_ARB_shading_language_420pack = "GL_ARB_shading_language_420pack"; +const char* const E_GL_ARB_texture_gather = "GL_ARB_texture_gather"; +const char* const E_GL_ARB_gpu_shader5 = "GL_ARB_gpu_shader5"; +const char* const E_GL_ARB_separate_shader_objects = "GL_ARB_separate_shader_objects"; +const char* const E_GL_ARB_compute_shader = "GL_ARB_compute_shader"; +const char* const E_GL_ARB_tessellation_shader = "GL_ARB_tessellation_shader"; +const char* const E_GL_ARB_enhanced_layouts = "GL_ARB_enhanced_layouts"; +const char* const E_GL_ARB_texture_cube_map_array = "GL_ARB_texture_cube_map_array"; +const char* const E_GL_ARB_texture_multisample = "GL_ARB_texture_multisample"; +const char* const E_GL_ARB_shader_texture_lod = "GL_ARB_shader_texture_lod"; +const char* const E_GL_ARB_explicit_attrib_location = "GL_ARB_explicit_attrib_location"; +const char* const E_GL_ARB_explicit_uniform_location = "GL_ARB_explicit_uniform_location"; +const char* const E_GL_ARB_shader_image_load_store = "GL_ARB_shader_image_load_store"; +const char* const E_GL_ARB_shader_atomic_counters = "GL_ARB_shader_atomic_counters"; +const char* const E_GL_ARB_shader_atomic_counter_ops = "GL_ARB_shader_atomic_counter_ops"; +const char* const E_GL_ARB_shader_draw_parameters = "GL_ARB_shader_draw_parameters"; +const char* const E_GL_ARB_shader_group_vote = "GL_ARB_shader_group_vote"; +const char* const E_GL_ARB_derivative_control = "GL_ARB_derivative_control"; +const char* const E_GL_ARB_shader_texture_image_samples = "GL_ARB_shader_texture_image_samples"; +const char* const E_GL_ARB_viewport_array = "GL_ARB_viewport_array"; +const char* const E_GL_ARB_gpu_shader_int64 = "GL_ARB_gpu_shader_int64"; +const char* const E_GL_ARB_gpu_shader_fp64 = "GL_ARB_gpu_shader_fp64"; +const char* const E_GL_ARB_shader_ballot = "GL_ARB_shader_ballot"; +const char* const E_GL_ARB_sparse_texture2 = "GL_ARB_sparse_texture2"; +const char* const E_GL_ARB_sparse_texture_clamp = "GL_ARB_sparse_texture_clamp"; +const char* const E_GL_ARB_shader_stencil_export = "GL_ARB_shader_stencil_export"; +// const char* const E_GL_ARB_cull_distance = "GL_ARB_cull_distance"; // present for 4.5, but need extension control over block members +const char* const E_GL_ARB_post_depth_coverage = "GL_ARB_post_depth_coverage"; +const char* const E_GL_ARB_shader_viewport_layer_array = "GL_ARB_shader_viewport_layer_array"; +const char* const E_GL_ARB_fragment_shader_interlock = "GL_ARB_fragment_shader_interlock"; +const char* const E_GL_ARB_shader_clock = "GL_ARB_shader_clock"; +const char* const E_GL_ARB_uniform_buffer_object = "GL_ARB_uniform_buffer_object"; +const char* const E_GL_ARB_sample_shading = "GL_ARB_sample_shading"; +const char* const E_GL_ARB_shader_bit_encoding = "GL_ARB_shader_bit_encoding"; +const char* const E_GL_ARB_shader_image_size = "GL_ARB_shader_image_size"; +const char* const E_GL_ARB_shader_storage_buffer_object = "GL_ARB_shader_storage_buffer_object"; +const char* const E_GL_ARB_shading_language_packing = "GL_ARB_shading_language_packing"; +const char* const E_GL_ARB_texture_query_lod = "GL_ARB_texture_query_lod"; +const char* const E_GL_ARB_vertex_attrib_64bit = "GL_ARB_vertex_attrib_64bit"; +const char* const E_GL_ARB_draw_instanced = "GL_ARB_draw_instanced"; +const char* const E_GL_ARB_fragment_coord_conventions = "GL_ARB_fragment_coord_conventions"; + +const char* const E_GL_KHR_shader_subgroup_basic = "GL_KHR_shader_subgroup_basic"; +const char* const E_GL_KHR_shader_subgroup_vote = "GL_KHR_shader_subgroup_vote"; +const char* const E_GL_KHR_shader_subgroup_arithmetic = "GL_KHR_shader_subgroup_arithmetic"; +const char* const E_GL_KHR_shader_subgroup_ballot = "GL_KHR_shader_subgroup_ballot"; +const char* const E_GL_KHR_shader_subgroup_shuffle = "GL_KHR_shader_subgroup_shuffle"; +const char* const E_GL_KHR_shader_subgroup_shuffle_relative = "GL_KHR_shader_subgroup_shuffle_relative"; +const char* const E_GL_KHR_shader_subgroup_clustered = "GL_KHR_shader_subgroup_clustered"; +const char* const E_GL_KHR_shader_subgroup_quad = "GL_KHR_shader_subgroup_quad"; +const char* const E_GL_KHR_memory_scope_semantics = "GL_KHR_memory_scope_semantics"; + +const char* const E_GL_EXT_shader_atomic_int64 = "GL_EXT_shader_atomic_int64"; + +const char* const E_GL_EXT_shader_non_constant_global_initializers = "GL_EXT_shader_non_constant_global_initializers"; +const char* const E_GL_EXT_shader_image_load_formatted = "GL_EXT_shader_image_load_formatted"; + +const char* const E_GL_EXT_shader_16bit_storage = "GL_EXT_shader_16bit_storage"; +const char* const E_GL_EXT_shader_8bit_storage = "GL_EXT_shader_8bit_storage"; + + +// EXT extensions +const char* const E_GL_EXT_device_group = "GL_EXT_device_group"; +const char* const E_GL_EXT_multiview = "GL_EXT_multiview"; +const char* const E_GL_EXT_post_depth_coverage = "GL_EXT_post_depth_coverage"; +const char* const E_GL_EXT_control_flow_attributes = "GL_EXT_control_flow_attributes"; +const char* const E_GL_EXT_nonuniform_qualifier = "GL_EXT_nonuniform_qualifier"; +const char* const E_GL_EXT_samplerless_texture_functions = "GL_EXT_samplerless_texture_functions"; +const char* const E_GL_EXT_scalar_block_layout = "GL_EXT_scalar_block_layout"; +const char* const E_GL_EXT_fragment_invocation_density = "GL_EXT_fragment_invocation_density"; +const char* const E_GL_EXT_buffer_reference = "GL_EXT_buffer_reference"; +const char* const E_GL_EXT_buffer_reference2 = "GL_EXT_buffer_reference2"; +const char* const E_GL_EXT_buffer_reference_uvec2 = "GL_EXT_buffer_reference_uvec2"; +const char* const E_GL_EXT_demote_to_helper_invocation = "GL_EXT_demote_to_helper_invocation"; +const char* const E_GL_EXT_shader_realtime_clock = "GL_EXT_shader_realtime_clock"; +const char* const E_GL_EXT_debug_printf = "GL_EXT_debug_printf"; +const char* const E_GL_EXT_ray_tracing = "GL_EXT_ray_tracing"; +const char* const E_GL_EXT_ray_query = "GL_EXT_ray_query"; +const char* const E_GL_EXT_ray_flags_primitive_culling = "GL_EXT_ray_flags_primitive_culling"; +const char* const E_GL_EXT_ray_cull_mask = "GL_EXT_ray_cull_mask"; +const char* const E_GL_EXT_blend_func_extended = "GL_EXT_blend_func_extended"; +const char* const E_GL_EXT_shader_implicit_conversions = "GL_EXT_shader_implicit_conversions"; +const char* const E_GL_EXT_fragment_shading_rate = "GL_EXT_fragment_shading_rate"; +const char* const E_GL_EXT_shader_image_int64 = "GL_EXT_shader_image_int64"; +const char* const E_GL_EXT_null_initializer = "GL_EXT_null_initializer"; +const char* const E_GL_EXT_shared_memory_block = "GL_EXT_shared_memory_block"; +const char* const E_GL_EXT_subgroup_uniform_control_flow = "GL_EXT_subgroup_uniform_control_flow"; +const char* const E_GL_EXT_spirv_intrinsics = "GL_EXT_spirv_intrinsics"; +const char* const E_GL_EXT_fragment_shader_barycentric = "GL_EXT_fragment_shader_barycentric"; +const char* const E_GL_EXT_mesh_shader = "GL_EXT_mesh_shader"; +const char* const E_GL_EXT_opacity_micromap = "GL_EXT_opacity_micromap"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const post_depth_coverageEXTs[] = { E_GL_ARB_post_depth_coverage, E_GL_EXT_post_depth_coverage }; +const int Num_post_depth_coverageEXTs = sizeof(post_depth_coverageEXTs) / sizeof(post_depth_coverageEXTs[0]); + +// Array of extensions to cover both extensions providing ray tracing capabilities. +const char* const ray_tracing_EXTs[] = { E_GL_EXT_ray_query, E_GL_EXT_ray_tracing }; +const int Num_ray_tracing_EXTs = sizeof(ray_tracing_EXTs) / sizeof(ray_tracing_EXTs[0]); + +// OVR extensions +const char* const E_GL_OVR_multiview = "GL_OVR_multiview"; +const char* const E_GL_OVR_multiview2 = "GL_OVR_multiview2"; + +const char* const OVR_multiview_EXTs[] = { E_GL_OVR_multiview, E_GL_OVR_multiview2 }; +const int Num_OVR_multiview_EXTs = sizeof(OVR_multiview_EXTs) / sizeof(OVR_multiview_EXTs[0]); + +// #line and #include +const char* const E_GL_GOOGLE_cpp_style_line_directive = "GL_GOOGLE_cpp_style_line_directive"; +const char* const E_GL_GOOGLE_include_directive = "GL_GOOGLE_include_directive"; + +const char* const E_GL_AMD_shader_ballot = "GL_AMD_shader_ballot"; +const char* const E_GL_AMD_shader_trinary_minmax = "GL_AMD_shader_trinary_minmax"; +const char* const E_GL_AMD_shader_explicit_vertex_parameter = "GL_AMD_shader_explicit_vertex_parameter"; +const char* const E_GL_AMD_gcn_shader = "GL_AMD_gcn_shader"; +const char* const E_GL_AMD_gpu_shader_half_float = "GL_AMD_gpu_shader_half_float"; +const char* const E_GL_AMD_texture_gather_bias_lod = "GL_AMD_texture_gather_bias_lod"; +const char* const E_GL_AMD_gpu_shader_int16 = "GL_AMD_gpu_shader_int16"; +const char* const E_GL_AMD_shader_image_load_store_lod = "GL_AMD_shader_image_load_store_lod"; +const char* const E_GL_AMD_shader_fragment_mask = "GL_AMD_shader_fragment_mask"; +const char* const E_GL_AMD_gpu_shader_half_float_fetch = "GL_AMD_gpu_shader_half_float_fetch"; +const char* const E_GL_AMD_shader_early_and_late_fragment_tests = "GL_AMD_shader_early_and_late_fragment_tests"; + +const char* const E_GL_INTEL_shader_integer_functions2 = "GL_INTEL_shader_integer_functions2"; + +const char* const E_GL_NV_sample_mask_override_coverage = "GL_NV_sample_mask_override_coverage"; +const char* const E_SPV_NV_geometry_shader_passthrough = "GL_NV_geometry_shader_passthrough"; +const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2"; +const char* const E_GL_NV_stereo_view_rendering = "GL_NV_stereo_view_rendering"; +const char* const E_GL_NVX_multiview_per_view_attributes = "GL_NVX_multiview_per_view_attributes"; +const char* const E_GL_NV_shader_atomic_int64 = "GL_NV_shader_atomic_int64"; +const char* const E_GL_NV_conservative_raster_underestimation = "GL_NV_conservative_raster_underestimation"; +const char* const E_GL_NV_shader_noperspective_interpolation = "GL_NV_shader_noperspective_interpolation"; +const char* const E_GL_NV_shader_subgroup_partitioned = "GL_NV_shader_subgroup_partitioned"; +const char* const E_GL_NV_shading_rate_image = "GL_NV_shading_rate_image"; +const char* const E_GL_NV_ray_tracing = "GL_NV_ray_tracing"; +const char* const E_GL_NV_ray_tracing_motion_blur = "GL_NV_ray_tracing_motion_blur"; +const char* const E_GL_NV_fragment_shader_barycentric = "GL_NV_fragment_shader_barycentric"; +const char* const E_GL_NV_compute_shader_derivatives = "GL_NV_compute_shader_derivatives"; +const char* const E_GL_NV_shader_texture_footprint = "GL_NV_shader_texture_footprint"; +const char* const E_GL_NV_mesh_shader = "GL_NV_mesh_shader"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const viewportEXTs[] = { E_GL_ARB_shader_viewport_layer_array, E_GL_NV_viewport_array2 }; +const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]); + +const char* const E_GL_NV_cooperative_matrix = "GL_NV_cooperative_matrix"; +const char* const E_GL_NV_shader_sm_builtins = "GL_NV_shader_sm_builtins"; +const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer_cooperative_matrix"; + +// AEP +const char* const E_GL_ANDROID_extension_pack_es31a = "GL_ANDROID_extension_pack_es31a"; +const char* const E_GL_KHR_blend_equation_advanced = "GL_KHR_blend_equation_advanced"; +const char* const E_GL_OES_sample_variables = "GL_OES_sample_variables"; +const char* const E_GL_OES_shader_image_atomic = "GL_OES_shader_image_atomic"; +const char* const E_GL_OES_shader_multisample_interpolation = "GL_OES_shader_multisample_interpolation"; +const char* const E_GL_OES_texture_storage_multisample_2d_array = "GL_OES_texture_storage_multisample_2d_array"; +const char* const E_GL_EXT_geometry_shader = "GL_EXT_geometry_shader"; +const char* const E_GL_EXT_geometry_point_size = "GL_EXT_geometry_point_size"; +const char* const E_GL_EXT_gpu_shader5 = "GL_EXT_gpu_shader5"; +const char* const E_GL_EXT_primitive_bounding_box = "GL_EXT_primitive_bounding_box"; +const char* const E_GL_EXT_shader_io_blocks = "GL_EXT_shader_io_blocks"; +const char* const E_GL_EXT_tessellation_shader = "GL_EXT_tessellation_shader"; +const char* const E_GL_EXT_tessellation_point_size = "GL_EXT_tessellation_point_size"; +const char* const E_GL_EXT_texture_buffer = "GL_EXT_texture_buffer"; +const char* const E_GL_EXT_texture_cube_map_array = "GL_EXT_texture_cube_map_array"; +const char* const E_GL_EXT_shader_integer_mix = "GL_EXT_shader_integer_mix"; + +// OES matching AEP +const char* const E_GL_OES_geometry_shader = "GL_OES_geometry_shader"; +const char* const E_GL_OES_geometry_point_size = "GL_OES_geometry_point_size"; +const char* const E_GL_OES_gpu_shader5 = "GL_OES_gpu_shader5"; +const char* const E_GL_OES_primitive_bounding_box = "GL_OES_primitive_bounding_box"; +const char* const E_GL_OES_shader_io_blocks = "GL_OES_shader_io_blocks"; +const char* const E_GL_OES_tessellation_shader = "GL_OES_tessellation_shader"; +const char* const E_GL_OES_tessellation_point_size = "GL_OES_tessellation_point_size"; +const char* const E_GL_OES_texture_buffer = "GL_OES_texture_buffer"; +const char* const E_GL_OES_texture_cube_map_array = "GL_OES_texture_cube_map_array"; + +// EXT +const char* const E_GL_EXT_shader_explicit_arithmetic_types = "GL_EXT_shader_explicit_arithmetic_types"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int8 = "GL_EXT_shader_explicit_arithmetic_types_int8"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int16 = "GL_EXT_shader_explicit_arithmetic_types_int16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int32 = "GL_EXT_shader_explicit_arithmetic_types_int32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int64 = "GL_EXT_shader_explicit_arithmetic_types_int64"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float16 = "GL_EXT_shader_explicit_arithmetic_types_float16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float32 = "GL_EXT_shader_explicit_arithmetic_types_float32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float64 = "GL_EXT_shader_explicit_arithmetic_types_float64"; + +const char* const E_GL_EXT_shader_subgroup_extended_types_int8 = "GL_EXT_shader_subgroup_extended_types_int8"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int16 = "GL_EXT_shader_subgroup_extended_types_int16"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int64 = "GL_EXT_shader_subgroup_extended_types_int64"; +const char* const E_GL_EXT_shader_subgroup_extended_types_float16 = "GL_EXT_shader_subgroup_extended_types_float16"; +const char* const E_GL_EXT_terminate_invocation = "GL_EXT_terminate_invocation"; + +const char* const E_GL_EXT_shader_atomic_float = "GL_EXT_shader_atomic_float"; +const char* const E_GL_EXT_shader_atomic_float2 = "GL_EXT_shader_atomic_float2"; + +// Arrays of extensions for the above AEP duplications + +const char* const AEP_geometry_shader[] = { E_GL_EXT_geometry_shader, E_GL_OES_geometry_shader }; +const int Num_AEP_geometry_shader = sizeof(AEP_geometry_shader)/sizeof(AEP_geometry_shader[0]); + +const char* const AEP_geometry_point_size[] = { E_GL_EXT_geometry_point_size, E_GL_OES_geometry_point_size }; +const int Num_AEP_geometry_point_size = sizeof(AEP_geometry_point_size)/sizeof(AEP_geometry_point_size[0]); + +const char* const AEP_gpu_shader5[] = { E_GL_EXT_gpu_shader5, E_GL_OES_gpu_shader5 }; +const int Num_AEP_gpu_shader5 = sizeof(AEP_gpu_shader5)/sizeof(AEP_gpu_shader5[0]); + +const char* const AEP_primitive_bounding_box[] = { E_GL_EXT_primitive_bounding_box, E_GL_OES_primitive_bounding_box }; +const int Num_AEP_primitive_bounding_box = sizeof(AEP_primitive_bounding_box)/sizeof(AEP_primitive_bounding_box[0]); + +const char* const AEP_shader_io_blocks[] = { E_GL_EXT_shader_io_blocks, E_GL_OES_shader_io_blocks }; +const int Num_AEP_shader_io_blocks = sizeof(AEP_shader_io_blocks)/sizeof(AEP_shader_io_blocks[0]); + +const char* const AEP_tessellation_shader[] = { E_GL_EXT_tessellation_shader, E_GL_OES_tessellation_shader }; +const int Num_AEP_tessellation_shader = sizeof(AEP_tessellation_shader)/sizeof(AEP_tessellation_shader[0]); + +const char* const AEP_tessellation_point_size[] = { E_GL_EXT_tessellation_point_size, E_GL_OES_tessellation_point_size }; +const int Num_AEP_tessellation_point_size = sizeof(AEP_tessellation_point_size)/sizeof(AEP_tessellation_point_size[0]); + +const char* const AEP_texture_buffer[] = { E_GL_EXT_texture_buffer, E_GL_OES_texture_buffer }; +const int Num_AEP_texture_buffer = sizeof(AEP_texture_buffer)/sizeof(AEP_texture_buffer[0]); + +const char* const AEP_texture_cube_map_array[] = { E_GL_EXT_texture_cube_map_array, E_GL_OES_texture_cube_map_array }; +const int Num_AEP_texture_cube_map_array = sizeof(AEP_texture_cube_map_array)/sizeof(AEP_texture_cube_map_array[0]); + +const char* const AEP_mesh_shader[] = { E_GL_NV_mesh_shader, E_GL_EXT_mesh_shader }; +const int Num_AEP_mesh_shader = sizeof(AEP_mesh_shader)/sizeof(AEP_mesh_shader[0]); + +} // end namespace glslang + +#endif // _VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/attribute.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/attribute.h new file mode 100644 index 0000000..c5b2917 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/attribute.h @@ -0,0 +1,150 @@ +// +// Copyright (C) 2017 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _ATTRIBUTE_INCLUDED_ +#define _ATTRIBUTE_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + + enum TAttributeType { + EatNone, + EatAllow_uav_condition, + EatBranch, + EatCall, + EatDomain, + EatEarlyDepthStencil, + EatFastOpt, + EatFlatten, + EatForceCase, + EatInstance, + EatMaxTessFactor, + EatNumThreads, + EatMaxVertexCount, + EatOutputControlPoints, + EatOutputTopology, + EatPartitioning, + EatPatchConstantFunc, + EatPatchSize, + EatUnroll, + EatLoop, + EatBinding, + EatGlobalBinding, + EatLocation, + EatInputAttachment, + EatBuiltIn, + EatPushConstant, + EatConstantId, + EatDependencyInfinite, + EatDependencyLength, + EatMinIterations, + EatMaxIterations, + EatIterationMultiple, + EatPeelCount, + EatPartialCount, + EatFormatRgba32f, + EatFormatRgba16f, + EatFormatR32f, + EatFormatRgba8, + EatFormatRgba8Snorm, + EatFormatRg32f, + EatFormatRg16f, + EatFormatR11fG11fB10f, + EatFormatR16f, + EatFormatRgba16, + EatFormatRgb10A2, + EatFormatRg16, + EatFormatRg8, + EatFormatR16, + EatFormatR8, + EatFormatRgba16Snorm, + EatFormatRg16Snorm, + EatFormatRg8Snorm, + EatFormatR16Snorm, + EatFormatR8Snorm, + EatFormatRgba32i, + EatFormatRgba16i, + EatFormatRgba8i, + EatFormatR32i, + EatFormatRg32i, + EatFormatRg16i, + EatFormatRg8i, + EatFormatR16i, + EatFormatR8i, + EatFormatRgba32ui, + EatFormatRgba16ui, + EatFormatRgba8ui, + EatFormatR32ui, + EatFormatRgb10a2ui, + EatFormatRg32ui, + EatFormatRg16ui, + EatFormatRg8ui, + EatFormatR16ui, + EatFormatR8ui, + EatFormatUnknown, + EatNonWritable, + EatNonReadable, + EatSubgroupUniformControlFlow, + }; + + class TIntermAggregate; + + struct TAttributeArgs { + TAttributeType name; + const TIntermAggregate* args; + + // Obtain attribute as integer + // Return false if it cannot be obtained + bool getInt(int& value, int argNum = 0) const; + + // Obtain attribute as string, with optional to-lower transform + // Return false if it cannot be obtained + bool getString(TString& value, int argNum = 0, bool convertToLower = true) const; + + // How many arguments were provided to the attribute? + int size() const; + + protected: + const TConstUnion* getConstUnion(TBasicType basicType, int argNum) const; + }; + + typedef TList TAttributes; + +} // end namespace glslang + +#endif // _ATTRIBUTE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/gl_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/gl_types.h new file mode 100644 index 0000000..d6c9393 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/gl_types.h @@ -0,0 +1,218 @@ +/* +** Copyright (c) 2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#pragma once + +#define GL_FLOAT 0x1406 +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 + +#define GL_DOUBLE 0x140A +#define GL_DOUBLE_VEC2 0x8FFC +#define GL_DOUBLE_VEC3 0x8FFD +#define GL_DOUBLE_VEC4 0x8FFE + +#define GL_INT 0x1404 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 + +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 + +#define GL_INT64_ARB 0x140E +#define GL_INT64_VEC2_ARB 0x8FE9 +#define GL_INT64_VEC3_ARB 0x8FEA +#define GL_INT64_VEC4_ARB 0x8FEB + +#define GL_UNSIGNED_INT64_ARB 0x140F +#define GL_UNSIGNED_INT64_VEC2_ARB 0x8FF5 +#define GL_UNSIGNED_INT64_VEC3_ARB 0x8FF6 +#define GL_UNSIGNED_INT64_VEC4_ARB 0x8FF7 +#define GL_UNSIGNED_INT16_VEC2_NV 0x8FF1 +#define GL_UNSIGNED_INT16_VEC3_NV 0x8FF2 +#define GL_UNSIGNED_INT16_VEC4_NV 0x8FF3 + +#define GL_INT16_NV 0x8FE4 +#define GL_INT16_VEC2_NV 0x8FE5 +#define GL_INT16_VEC3_NV 0x8FE6 +#define GL_INT16_VEC4_NV 0x8FE7 + +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 + +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A + +#define GL_DOUBLE_MAT2 0x8F46 +#define GL_DOUBLE_MAT3 0x8F47 +#define GL_DOUBLE_MAT4 0x8F48 +#define GL_DOUBLE_MAT2x3 0x8F49 +#define GL_DOUBLE_MAT2x4 0x8F4A +#define GL_DOUBLE_MAT3x2 0x8F4B +#define GL_DOUBLE_MAT3x4 0x8F4C +#define GL_DOUBLE_MAT4x2 0x8F4D +#define GL_DOUBLE_MAT4x3 0x8F4E + +// Those constants are borrowed from extension NV_gpu_shader5 +#define GL_FLOAT16_NV 0x8FF8 +#define GL_FLOAT16_VEC2_NV 0x8FF9 +#define GL_FLOAT16_VEC3_NV 0x8FFA +#define GL_FLOAT16_VEC4_NV 0x8FFB + +#define GL_FLOAT16_MAT2_AMD 0x91C5 +#define GL_FLOAT16_MAT3_AMD 0x91C6 +#define GL_FLOAT16_MAT4_AMD 0x91C7 +#define GL_FLOAT16_MAT2x3_AMD 0x91C8 +#define GL_FLOAT16_MAT2x4_AMD 0x91C9 +#define GL_FLOAT16_MAT3x2_AMD 0x91CA +#define GL_FLOAT16_MAT3x4_AMD 0x91CB +#define GL_FLOAT16_MAT4x2_AMD 0x91CC +#define GL_FLOAT16_MAT4x3_AMD 0x91CD + +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D +#define GL_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW_ARB 0x900D + +#define GL_FLOAT16_SAMPLER_1D_AMD 0x91CE +#define GL_FLOAT16_SAMPLER_2D_AMD 0x91CF +#define GL_FLOAT16_SAMPLER_3D_AMD 0x91D0 +#define GL_FLOAT16_SAMPLER_CUBE_AMD 0x91D1 +#define GL_FLOAT16_SAMPLER_2D_RECT_AMD 0x91D2 +#define GL_FLOAT16_SAMPLER_1D_ARRAY_AMD 0x91D3 +#define GL_FLOAT16_SAMPLER_2D_ARRAY_AMD 0x91D4 +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_AMD 0x91D5 +#define GL_FLOAT16_SAMPLER_BUFFER_AMD 0x91D6 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_AMD 0x91D7 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_ARRAY_AMD 0x91D8 + +#define GL_FLOAT16_SAMPLER_1D_SHADOW_AMD 0x91D9 +#define GL_FLOAT16_SAMPLER_2D_SHADOW_AMD 0x91DA +#define GL_FLOAT16_SAMPLER_2D_RECT_SHADOW_AMD 0x91DB +#define GL_FLOAT16_SAMPLER_1D_ARRAY_SHADOW_AMD 0x91DC +#define GL_FLOAT16_SAMPLER_2D_ARRAY_SHADOW_AMD 0x91DD +#define GL_FLOAT16_SAMPLER_CUBE_SHADOW_AMD 0x91DE +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_SHADOW_AMD 0x91DF + +#define GL_FLOAT16_IMAGE_1D_AMD 0x91E0 +#define GL_FLOAT16_IMAGE_2D_AMD 0x91E1 +#define GL_FLOAT16_IMAGE_3D_AMD 0x91E2 +#define GL_FLOAT16_IMAGE_2D_RECT_AMD 0x91E3 +#define GL_FLOAT16_IMAGE_CUBE_AMD 0x91E4 +#define GL_FLOAT16_IMAGE_1D_ARRAY_AMD 0x91E5 +#define GL_FLOAT16_IMAGE_2D_ARRAY_AMD 0x91E6 +#define GL_FLOAT16_IMAGE_CUBE_MAP_ARRAY_AMD 0x91E7 +#define GL_FLOAT16_IMAGE_BUFFER_AMD 0x91E8 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_AMD 0x91E9 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_ARRAY_AMD 0x91EA + +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900E + +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900F +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A + +#define GL_IMAGE_1D 0x904C +#define GL_IMAGE_2D 0x904D +#define GL_IMAGE_3D 0x904E +#define GL_IMAGE_2D_RECT 0x904F +#define GL_IMAGE_CUBE 0x9050 +#define GL_IMAGE_BUFFER 0x9051 +#define GL_IMAGE_1D_ARRAY 0x9052 +#define GL_IMAGE_2D_ARRAY 0x9053 +#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054 +#define GL_IMAGE_2D_MULTISAMPLE 0x9055 +#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056 +#define GL_INT_IMAGE_1D 0x9057 +#define GL_INT_IMAGE_2D 0x9058 +#define GL_INT_IMAGE_3D 0x9059 +#define GL_INT_IMAGE_2D_RECT 0x905A +#define GL_INT_IMAGE_CUBE 0x905B +#define GL_INT_IMAGE_BUFFER 0x905C +#define GL_INT_IMAGE_1D_ARRAY 0x905D +#define GL_INT_IMAGE_2D_ARRAY 0x905E +#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F +#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060 +#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061 +#define GL_UNSIGNED_INT_IMAGE_1D 0x9062 +#define GL_UNSIGNED_INT_IMAGE_2D 0x9063 +#define GL_UNSIGNED_INT_IMAGE_3D 0x9064 +#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065 +#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066 +#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067 +#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068 +#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069 +#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C + +#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/glslang_tab.cpp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/glslang_tab.cpp.h new file mode 100644 index 0000000..18cef46 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/glslang_tab.cpp.h @@ -0,0 +1,571 @@ +/* A Bison parser, made by GNU Bison 3.7.4. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +#ifndef YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +# define YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 1 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token kinds. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + YYEMPTY = -2, + YYEOF = 0, /* "end of file" */ + YYerror = 256, /* error */ + YYUNDEF = 257, /* "invalid token" */ + CONST = 258, /* CONST */ + BOOL = 259, /* BOOL */ + INT = 260, /* INT */ + UINT = 261, /* UINT */ + FLOAT = 262, /* FLOAT */ + BVEC2 = 263, /* BVEC2 */ + BVEC3 = 264, /* BVEC3 */ + BVEC4 = 265, /* BVEC4 */ + IVEC2 = 266, /* IVEC2 */ + IVEC3 = 267, /* IVEC3 */ + IVEC4 = 268, /* IVEC4 */ + UVEC2 = 269, /* UVEC2 */ + UVEC3 = 270, /* UVEC3 */ + UVEC4 = 271, /* UVEC4 */ + VEC2 = 272, /* VEC2 */ + VEC3 = 273, /* VEC3 */ + VEC4 = 274, /* VEC4 */ + MAT2 = 275, /* MAT2 */ + MAT3 = 276, /* MAT3 */ + MAT4 = 277, /* MAT4 */ + MAT2X2 = 278, /* MAT2X2 */ + MAT2X3 = 279, /* MAT2X3 */ + MAT2X4 = 280, /* MAT2X4 */ + MAT3X2 = 281, /* MAT3X2 */ + MAT3X3 = 282, /* MAT3X3 */ + MAT3X4 = 283, /* MAT3X4 */ + MAT4X2 = 284, /* MAT4X2 */ + MAT4X3 = 285, /* MAT4X3 */ + MAT4X4 = 286, /* MAT4X4 */ + SAMPLER2D = 287, /* SAMPLER2D */ + SAMPLER3D = 288, /* SAMPLER3D */ + SAMPLERCUBE = 289, /* SAMPLERCUBE */ + SAMPLER2DSHADOW = 290, /* SAMPLER2DSHADOW */ + SAMPLERCUBESHADOW = 291, /* SAMPLERCUBESHADOW */ + SAMPLER2DARRAY = 292, /* SAMPLER2DARRAY */ + SAMPLER2DARRAYSHADOW = 293, /* SAMPLER2DARRAYSHADOW */ + ISAMPLER2D = 294, /* ISAMPLER2D */ + ISAMPLER3D = 295, /* ISAMPLER3D */ + ISAMPLERCUBE = 296, /* ISAMPLERCUBE */ + ISAMPLER2DARRAY = 297, /* ISAMPLER2DARRAY */ + USAMPLER2D = 298, /* USAMPLER2D */ + USAMPLER3D = 299, /* USAMPLER3D */ + USAMPLERCUBE = 300, /* USAMPLERCUBE */ + USAMPLER2DARRAY = 301, /* USAMPLER2DARRAY */ + SAMPLER = 302, /* SAMPLER */ + SAMPLERSHADOW = 303, /* SAMPLERSHADOW */ + TEXTURE2D = 304, /* TEXTURE2D */ + TEXTURE3D = 305, /* TEXTURE3D */ + TEXTURECUBE = 306, /* TEXTURECUBE */ + TEXTURE2DARRAY = 307, /* TEXTURE2DARRAY */ + ITEXTURE2D = 308, /* ITEXTURE2D */ + ITEXTURE3D = 309, /* ITEXTURE3D */ + ITEXTURECUBE = 310, /* ITEXTURECUBE */ + ITEXTURE2DARRAY = 311, /* ITEXTURE2DARRAY */ + UTEXTURE2D = 312, /* UTEXTURE2D */ + UTEXTURE3D = 313, /* UTEXTURE3D */ + UTEXTURECUBE = 314, /* UTEXTURECUBE */ + UTEXTURE2DARRAY = 315, /* UTEXTURE2DARRAY */ + ATTRIBUTE = 316, /* ATTRIBUTE */ + VARYING = 317, /* VARYING */ + FLOAT16_T = 318, /* FLOAT16_T */ + FLOAT32_T = 319, /* FLOAT32_T */ + DOUBLE = 320, /* DOUBLE */ + FLOAT64_T = 321, /* FLOAT64_T */ + INT64_T = 322, /* INT64_T */ + UINT64_T = 323, /* UINT64_T */ + INT32_T = 324, /* INT32_T */ + UINT32_T = 325, /* UINT32_T */ + INT16_T = 326, /* INT16_T */ + UINT16_T = 327, /* UINT16_T */ + INT8_T = 328, /* INT8_T */ + UINT8_T = 329, /* UINT8_T */ + I64VEC2 = 330, /* I64VEC2 */ + I64VEC3 = 331, /* I64VEC3 */ + I64VEC4 = 332, /* I64VEC4 */ + U64VEC2 = 333, /* U64VEC2 */ + U64VEC3 = 334, /* U64VEC3 */ + U64VEC4 = 335, /* U64VEC4 */ + I32VEC2 = 336, /* I32VEC2 */ + I32VEC3 = 337, /* I32VEC3 */ + I32VEC4 = 338, /* I32VEC4 */ + U32VEC2 = 339, /* U32VEC2 */ + U32VEC3 = 340, /* U32VEC3 */ + U32VEC4 = 341, /* U32VEC4 */ + I16VEC2 = 342, /* I16VEC2 */ + I16VEC3 = 343, /* I16VEC3 */ + I16VEC4 = 344, /* I16VEC4 */ + U16VEC2 = 345, /* U16VEC2 */ + U16VEC3 = 346, /* U16VEC3 */ + U16VEC4 = 347, /* U16VEC4 */ + I8VEC2 = 348, /* I8VEC2 */ + I8VEC3 = 349, /* I8VEC3 */ + I8VEC4 = 350, /* I8VEC4 */ + U8VEC2 = 351, /* U8VEC2 */ + U8VEC3 = 352, /* U8VEC3 */ + U8VEC4 = 353, /* U8VEC4 */ + DVEC2 = 354, /* DVEC2 */ + DVEC3 = 355, /* DVEC3 */ + DVEC4 = 356, /* DVEC4 */ + DMAT2 = 357, /* DMAT2 */ + DMAT3 = 358, /* DMAT3 */ + DMAT4 = 359, /* DMAT4 */ + F16VEC2 = 360, /* F16VEC2 */ + F16VEC3 = 361, /* F16VEC3 */ + F16VEC4 = 362, /* F16VEC4 */ + F16MAT2 = 363, /* F16MAT2 */ + F16MAT3 = 364, /* F16MAT3 */ + F16MAT4 = 365, /* F16MAT4 */ + F32VEC2 = 366, /* F32VEC2 */ + F32VEC3 = 367, /* F32VEC3 */ + F32VEC4 = 368, /* F32VEC4 */ + F32MAT2 = 369, /* F32MAT2 */ + F32MAT3 = 370, /* F32MAT3 */ + F32MAT4 = 371, /* F32MAT4 */ + F64VEC2 = 372, /* F64VEC2 */ + F64VEC3 = 373, /* F64VEC3 */ + F64VEC4 = 374, /* F64VEC4 */ + F64MAT2 = 375, /* F64MAT2 */ + F64MAT3 = 376, /* F64MAT3 */ + F64MAT4 = 377, /* F64MAT4 */ + DMAT2X2 = 378, /* DMAT2X2 */ + DMAT2X3 = 379, /* DMAT2X3 */ + DMAT2X4 = 380, /* DMAT2X4 */ + DMAT3X2 = 381, /* DMAT3X2 */ + DMAT3X3 = 382, /* DMAT3X3 */ + DMAT3X4 = 383, /* DMAT3X4 */ + DMAT4X2 = 384, /* DMAT4X2 */ + DMAT4X3 = 385, /* DMAT4X3 */ + DMAT4X4 = 386, /* DMAT4X4 */ + F16MAT2X2 = 387, /* F16MAT2X2 */ + F16MAT2X3 = 388, /* F16MAT2X3 */ + F16MAT2X4 = 389, /* F16MAT2X4 */ + F16MAT3X2 = 390, /* F16MAT3X2 */ + F16MAT3X3 = 391, /* F16MAT3X3 */ + F16MAT3X4 = 392, /* F16MAT3X4 */ + F16MAT4X2 = 393, /* F16MAT4X2 */ + F16MAT4X3 = 394, /* F16MAT4X3 */ + F16MAT4X4 = 395, /* F16MAT4X4 */ + F32MAT2X2 = 396, /* F32MAT2X2 */ + F32MAT2X3 = 397, /* F32MAT2X3 */ + F32MAT2X4 = 398, /* F32MAT2X4 */ + F32MAT3X2 = 399, /* F32MAT3X2 */ + F32MAT3X3 = 400, /* F32MAT3X3 */ + F32MAT3X4 = 401, /* F32MAT3X4 */ + F32MAT4X2 = 402, /* F32MAT4X2 */ + F32MAT4X3 = 403, /* F32MAT4X3 */ + F32MAT4X4 = 404, /* F32MAT4X4 */ + F64MAT2X2 = 405, /* F64MAT2X2 */ + F64MAT2X3 = 406, /* F64MAT2X3 */ + F64MAT2X4 = 407, /* F64MAT2X4 */ + F64MAT3X2 = 408, /* F64MAT3X2 */ + F64MAT3X3 = 409, /* F64MAT3X3 */ + F64MAT3X4 = 410, /* F64MAT3X4 */ + F64MAT4X2 = 411, /* F64MAT4X2 */ + F64MAT4X3 = 412, /* F64MAT4X3 */ + F64MAT4X4 = 413, /* F64MAT4X4 */ + ATOMIC_UINT = 414, /* ATOMIC_UINT */ + ACCSTRUCTNV = 415, /* ACCSTRUCTNV */ + ACCSTRUCTEXT = 416, /* ACCSTRUCTEXT */ + RAYQUERYEXT = 417, /* RAYQUERYEXT */ + FCOOPMATNV = 418, /* FCOOPMATNV */ + ICOOPMATNV = 419, /* ICOOPMATNV */ + UCOOPMATNV = 420, /* UCOOPMATNV */ + SAMPLERCUBEARRAY = 421, /* SAMPLERCUBEARRAY */ + SAMPLERCUBEARRAYSHADOW = 422, /* SAMPLERCUBEARRAYSHADOW */ + ISAMPLERCUBEARRAY = 423, /* ISAMPLERCUBEARRAY */ + USAMPLERCUBEARRAY = 424, /* USAMPLERCUBEARRAY */ + SAMPLER1D = 425, /* SAMPLER1D */ + SAMPLER1DARRAY = 426, /* SAMPLER1DARRAY */ + SAMPLER1DARRAYSHADOW = 427, /* SAMPLER1DARRAYSHADOW */ + ISAMPLER1D = 428, /* ISAMPLER1D */ + SAMPLER1DSHADOW = 429, /* SAMPLER1DSHADOW */ + SAMPLER2DRECT = 430, /* SAMPLER2DRECT */ + SAMPLER2DRECTSHADOW = 431, /* SAMPLER2DRECTSHADOW */ + ISAMPLER2DRECT = 432, /* ISAMPLER2DRECT */ + USAMPLER2DRECT = 433, /* USAMPLER2DRECT */ + SAMPLERBUFFER = 434, /* SAMPLERBUFFER */ + ISAMPLERBUFFER = 435, /* ISAMPLERBUFFER */ + USAMPLERBUFFER = 436, /* USAMPLERBUFFER */ + SAMPLER2DMS = 437, /* SAMPLER2DMS */ + ISAMPLER2DMS = 438, /* ISAMPLER2DMS */ + USAMPLER2DMS = 439, /* USAMPLER2DMS */ + SAMPLER2DMSARRAY = 440, /* SAMPLER2DMSARRAY */ + ISAMPLER2DMSARRAY = 441, /* ISAMPLER2DMSARRAY */ + USAMPLER2DMSARRAY = 442, /* USAMPLER2DMSARRAY */ + SAMPLEREXTERNALOES = 443, /* SAMPLEREXTERNALOES */ + SAMPLEREXTERNAL2DY2YEXT = 444, /* SAMPLEREXTERNAL2DY2YEXT */ + ISAMPLER1DARRAY = 445, /* ISAMPLER1DARRAY */ + USAMPLER1D = 446, /* USAMPLER1D */ + USAMPLER1DARRAY = 447, /* USAMPLER1DARRAY */ + F16SAMPLER1D = 448, /* F16SAMPLER1D */ + F16SAMPLER2D = 449, /* F16SAMPLER2D */ + F16SAMPLER3D = 450, /* F16SAMPLER3D */ + F16SAMPLER2DRECT = 451, /* F16SAMPLER2DRECT */ + F16SAMPLERCUBE = 452, /* F16SAMPLERCUBE */ + F16SAMPLER1DARRAY = 453, /* F16SAMPLER1DARRAY */ + F16SAMPLER2DARRAY = 454, /* F16SAMPLER2DARRAY */ + F16SAMPLERCUBEARRAY = 455, /* F16SAMPLERCUBEARRAY */ + F16SAMPLERBUFFER = 456, /* F16SAMPLERBUFFER */ + F16SAMPLER2DMS = 457, /* F16SAMPLER2DMS */ + F16SAMPLER2DMSARRAY = 458, /* F16SAMPLER2DMSARRAY */ + F16SAMPLER1DSHADOW = 459, /* F16SAMPLER1DSHADOW */ + F16SAMPLER2DSHADOW = 460, /* F16SAMPLER2DSHADOW */ + F16SAMPLER1DARRAYSHADOW = 461, /* F16SAMPLER1DARRAYSHADOW */ + F16SAMPLER2DARRAYSHADOW = 462, /* F16SAMPLER2DARRAYSHADOW */ + F16SAMPLER2DRECTSHADOW = 463, /* F16SAMPLER2DRECTSHADOW */ + F16SAMPLERCUBESHADOW = 464, /* F16SAMPLERCUBESHADOW */ + F16SAMPLERCUBEARRAYSHADOW = 465, /* F16SAMPLERCUBEARRAYSHADOW */ + IMAGE1D = 466, /* IMAGE1D */ + IIMAGE1D = 467, /* IIMAGE1D */ + UIMAGE1D = 468, /* UIMAGE1D */ + IMAGE2D = 469, /* IMAGE2D */ + IIMAGE2D = 470, /* IIMAGE2D */ + UIMAGE2D = 471, /* UIMAGE2D */ + IMAGE3D = 472, /* IMAGE3D */ + IIMAGE3D = 473, /* IIMAGE3D */ + UIMAGE3D = 474, /* UIMAGE3D */ + IMAGE2DRECT = 475, /* IMAGE2DRECT */ + IIMAGE2DRECT = 476, /* IIMAGE2DRECT */ + UIMAGE2DRECT = 477, /* UIMAGE2DRECT */ + IMAGECUBE = 478, /* IMAGECUBE */ + IIMAGECUBE = 479, /* IIMAGECUBE */ + UIMAGECUBE = 480, /* UIMAGECUBE */ + IMAGEBUFFER = 481, /* IMAGEBUFFER */ + IIMAGEBUFFER = 482, /* IIMAGEBUFFER */ + UIMAGEBUFFER = 483, /* UIMAGEBUFFER */ + IMAGE1DARRAY = 484, /* IMAGE1DARRAY */ + IIMAGE1DARRAY = 485, /* IIMAGE1DARRAY */ + UIMAGE1DARRAY = 486, /* UIMAGE1DARRAY */ + IMAGE2DARRAY = 487, /* IMAGE2DARRAY */ + IIMAGE2DARRAY = 488, /* IIMAGE2DARRAY */ + UIMAGE2DARRAY = 489, /* UIMAGE2DARRAY */ + IMAGECUBEARRAY = 490, /* IMAGECUBEARRAY */ + IIMAGECUBEARRAY = 491, /* IIMAGECUBEARRAY */ + UIMAGECUBEARRAY = 492, /* UIMAGECUBEARRAY */ + IMAGE2DMS = 493, /* IMAGE2DMS */ + IIMAGE2DMS = 494, /* IIMAGE2DMS */ + UIMAGE2DMS = 495, /* UIMAGE2DMS */ + IMAGE2DMSARRAY = 496, /* IMAGE2DMSARRAY */ + IIMAGE2DMSARRAY = 497, /* IIMAGE2DMSARRAY */ + UIMAGE2DMSARRAY = 498, /* UIMAGE2DMSARRAY */ + F16IMAGE1D = 499, /* F16IMAGE1D */ + F16IMAGE2D = 500, /* F16IMAGE2D */ + F16IMAGE3D = 501, /* F16IMAGE3D */ + F16IMAGE2DRECT = 502, /* F16IMAGE2DRECT */ + F16IMAGECUBE = 503, /* F16IMAGECUBE */ + F16IMAGE1DARRAY = 504, /* F16IMAGE1DARRAY */ + F16IMAGE2DARRAY = 505, /* F16IMAGE2DARRAY */ + F16IMAGECUBEARRAY = 506, /* F16IMAGECUBEARRAY */ + F16IMAGEBUFFER = 507, /* F16IMAGEBUFFER */ + F16IMAGE2DMS = 508, /* F16IMAGE2DMS */ + F16IMAGE2DMSARRAY = 509, /* F16IMAGE2DMSARRAY */ + I64IMAGE1D = 510, /* I64IMAGE1D */ + U64IMAGE1D = 511, /* U64IMAGE1D */ + I64IMAGE2D = 512, /* I64IMAGE2D */ + U64IMAGE2D = 513, /* U64IMAGE2D */ + I64IMAGE3D = 514, /* I64IMAGE3D */ + U64IMAGE3D = 515, /* U64IMAGE3D */ + I64IMAGE2DRECT = 516, /* I64IMAGE2DRECT */ + U64IMAGE2DRECT = 517, /* U64IMAGE2DRECT */ + I64IMAGECUBE = 518, /* I64IMAGECUBE */ + U64IMAGECUBE = 519, /* U64IMAGECUBE */ + I64IMAGEBUFFER = 520, /* I64IMAGEBUFFER */ + U64IMAGEBUFFER = 521, /* U64IMAGEBUFFER */ + I64IMAGE1DARRAY = 522, /* I64IMAGE1DARRAY */ + U64IMAGE1DARRAY = 523, /* U64IMAGE1DARRAY */ + I64IMAGE2DARRAY = 524, /* I64IMAGE2DARRAY */ + U64IMAGE2DARRAY = 525, /* U64IMAGE2DARRAY */ + I64IMAGECUBEARRAY = 526, /* I64IMAGECUBEARRAY */ + U64IMAGECUBEARRAY = 527, /* U64IMAGECUBEARRAY */ + I64IMAGE2DMS = 528, /* I64IMAGE2DMS */ + U64IMAGE2DMS = 529, /* U64IMAGE2DMS */ + I64IMAGE2DMSARRAY = 530, /* I64IMAGE2DMSARRAY */ + U64IMAGE2DMSARRAY = 531, /* U64IMAGE2DMSARRAY */ + TEXTURECUBEARRAY = 532, /* TEXTURECUBEARRAY */ + ITEXTURECUBEARRAY = 533, /* ITEXTURECUBEARRAY */ + UTEXTURECUBEARRAY = 534, /* UTEXTURECUBEARRAY */ + TEXTURE1D = 535, /* TEXTURE1D */ + ITEXTURE1D = 536, /* ITEXTURE1D */ + UTEXTURE1D = 537, /* UTEXTURE1D */ + TEXTURE1DARRAY = 538, /* TEXTURE1DARRAY */ + ITEXTURE1DARRAY = 539, /* ITEXTURE1DARRAY */ + UTEXTURE1DARRAY = 540, /* UTEXTURE1DARRAY */ + TEXTURE2DRECT = 541, /* TEXTURE2DRECT */ + ITEXTURE2DRECT = 542, /* ITEXTURE2DRECT */ + UTEXTURE2DRECT = 543, /* UTEXTURE2DRECT */ + TEXTUREBUFFER = 544, /* TEXTUREBUFFER */ + ITEXTUREBUFFER = 545, /* ITEXTUREBUFFER */ + UTEXTUREBUFFER = 546, /* UTEXTUREBUFFER */ + TEXTURE2DMS = 547, /* TEXTURE2DMS */ + ITEXTURE2DMS = 548, /* ITEXTURE2DMS */ + UTEXTURE2DMS = 549, /* UTEXTURE2DMS */ + TEXTURE2DMSARRAY = 550, /* TEXTURE2DMSARRAY */ + ITEXTURE2DMSARRAY = 551, /* ITEXTURE2DMSARRAY */ + UTEXTURE2DMSARRAY = 552, /* UTEXTURE2DMSARRAY */ + F16TEXTURE1D = 553, /* F16TEXTURE1D */ + F16TEXTURE2D = 554, /* F16TEXTURE2D */ + F16TEXTURE3D = 555, /* F16TEXTURE3D */ + F16TEXTURE2DRECT = 556, /* F16TEXTURE2DRECT */ + F16TEXTURECUBE = 557, /* F16TEXTURECUBE */ + F16TEXTURE1DARRAY = 558, /* F16TEXTURE1DARRAY */ + F16TEXTURE2DARRAY = 559, /* F16TEXTURE2DARRAY */ + F16TEXTURECUBEARRAY = 560, /* F16TEXTURECUBEARRAY */ + F16TEXTUREBUFFER = 561, /* F16TEXTUREBUFFER */ + F16TEXTURE2DMS = 562, /* F16TEXTURE2DMS */ + F16TEXTURE2DMSARRAY = 563, /* F16TEXTURE2DMSARRAY */ + SUBPASSINPUT = 564, /* SUBPASSINPUT */ + SUBPASSINPUTMS = 565, /* SUBPASSINPUTMS */ + ISUBPASSINPUT = 566, /* ISUBPASSINPUT */ + ISUBPASSINPUTMS = 567, /* ISUBPASSINPUTMS */ + USUBPASSINPUT = 568, /* USUBPASSINPUT */ + USUBPASSINPUTMS = 569, /* USUBPASSINPUTMS */ + F16SUBPASSINPUT = 570, /* F16SUBPASSINPUT */ + F16SUBPASSINPUTMS = 571, /* F16SUBPASSINPUTMS */ + SPIRV_INSTRUCTION = 572, /* SPIRV_INSTRUCTION */ + SPIRV_EXECUTION_MODE = 573, /* SPIRV_EXECUTION_MODE */ + SPIRV_EXECUTION_MODE_ID = 574, /* SPIRV_EXECUTION_MODE_ID */ + SPIRV_DECORATE = 575, /* SPIRV_DECORATE */ + SPIRV_DECORATE_ID = 576, /* SPIRV_DECORATE_ID */ + SPIRV_DECORATE_STRING = 577, /* SPIRV_DECORATE_STRING */ + SPIRV_TYPE = 578, /* SPIRV_TYPE */ + SPIRV_STORAGE_CLASS = 579, /* SPIRV_STORAGE_CLASS */ + SPIRV_BY_REFERENCE = 580, /* SPIRV_BY_REFERENCE */ + SPIRV_LITERAL = 581, /* SPIRV_LITERAL */ + LEFT_OP = 582, /* LEFT_OP */ + RIGHT_OP = 583, /* RIGHT_OP */ + INC_OP = 584, /* INC_OP */ + DEC_OP = 585, /* DEC_OP */ + LE_OP = 586, /* LE_OP */ + GE_OP = 587, /* GE_OP */ + EQ_OP = 588, /* EQ_OP */ + NE_OP = 589, /* NE_OP */ + AND_OP = 590, /* AND_OP */ + OR_OP = 591, /* OR_OP */ + XOR_OP = 592, /* XOR_OP */ + MUL_ASSIGN = 593, /* MUL_ASSIGN */ + DIV_ASSIGN = 594, /* DIV_ASSIGN */ + ADD_ASSIGN = 595, /* ADD_ASSIGN */ + MOD_ASSIGN = 596, /* MOD_ASSIGN */ + LEFT_ASSIGN = 597, /* LEFT_ASSIGN */ + RIGHT_ASSIGN = 598, /* RIGHT_ASSIGN */ + AND_ASSIGN = 599, /* AND_ASSIGN */ + XOR_ASSIGN = 600, /* XOR_ASSIGN */ + OR_ASSIGN = 601, /* OR_ASSIGN */ + SUB_ASSIGN = 602, /* SUB_ASSIGN */ + STRING_LITERAL = 603, /* STRING_LITERAL */ + LEFT_PAREN = 604, /* LEFT_PAREN */ + RIGHT_PAREN = 605, /* RIGHT_PAREN */ + LEFT_BRACKET = 606, /* LEFT_BRACKET */ + RIGHT_BRACKET = 607, /* RIGHT_BRACKET */ + LEFT_BRACE = 608, /* LEFT_BRACE */ + RIGHT_BRACE = 609, /* RIGHT_BRACE */ + DOT = 610, /* DOT */ + COMMA = 611, /* COMMA */ + COLON = 612, /* COLON */ + EQUAL = 613, /* EQUAL */ + SEMICOLON = 614, /* SEMICOLON */ + BANG = 615, /* BANG */ + DASH = 616, /* DASH */ + TILDE = 617, /* TILDE */ + PLUS = 618, /* PLUS */ + STAR = 619, /* STAR */ + SLASH = 620, /* SLASH */ + PERCENT = 621, /* PERCENT */ + LEFT_ANGLE = 622, /* LEFT_ANGLE */ + RIGHT_ANGLE = 623, /* RIGHT_ANGLE */ + VERTICAL_BAR = 624, /* VERTICAL_BAR */ + CARET = 625, /* CARET */ + AMPERSAND = 626, /* AMPERSAND */ + QUESTION = 627, /* QUESTION */ + INVARIANT = 628, /* INVARIANT */ + HIGH_PRECISION = 629, /* HIGH_PRECISION */ + MEDIUM_PRECISION = 630, /* MEDIUM_PRECISION */ + LOW_PRECISION = 631, /* LOW_PRECISION */ + PRECISION = 632, /* PRECISION */ + PACKED = 633, /* PACKED */ + RESOURCE = 634, /* RESOURCE */ + SUPERP = 635, /* SUPERP */ + FLOATCONSTANT = 636, /* FLOATCONSTANT */ + INTCONSTANT = 637, /* INTCONSTANT */ + UINTCONSTANT = 638, /* UINTCONSTANT */ + BOOLCONSTANT = 639, /* BOOLCONSTANT */ + IDENTIFIER = 640, /* IDENTIFIER */ + TYPE_NAME = 641, /* TYPE_NAME */ + CENTROID = 642, /* CENTROID */ + IN = 643, /* IN */ + OUT = 644, /* OUT */ + INOUT = 645, /* INOUT */ + STRUCT = 646, /* STRUCT */ + VOID = 647, /* VOID */ + WHILE = 648, /* WHILE */ + BREAK = 649, /* BREAK */ + CONTINUE = 650, /* CONTINUE */ + DO = 651, /* DO */ + ELSE = 652, /* ELSE */ + FOR = 653, /* FOR */ + IF = 654, /* IF */ + DISCARD = 655, /* DISCARD */ + RETURN = 656, /* RETURN */ + SWITCH = 657, /* SWITCH */ + CASE = 658, /* CASE */ + DEFAULT = 659, /* DEFAULT */ + TERMINATE_INVOCATION = 660, /* TERMINATE_INVOCATION */ + TERMINATE_RAY = 661, /* TERMINATE_RAY */ + IGNORE_INTERSECTION = 662, /* IGNORE_INTERSECTION */ + UNIFORM = 663, /* UNIFORM */ + SHARED = 664, /* SHARED */ + BUFFER = 665, /* BUFFER */ + FLAT = 666, /* FLAT */ + SMOOTH = 667, /* SMOOTH */ + LAYOUT = 668, /* LAYOUT */ + DOUBLECONSTANT = 669, /* DOUBLECONSTANT */ + INT16CONSTANT = 670, /* INT16CONSTANT */ + UINT16CONSTANT = 671, /* UINT16CONSTANT */ + FLOAT16CONSTANT = 672, /* FLOAT16CONSTANT */ + INT32CONSTANT = 673, /* INT32CONSTANT */ + UINT32CONSTANT = 674, /* UINT32CONSTANT */ + INT64CONSTANT = 675, /* INT64CONSTANT */ + UINT64CONSTANT = 676, /* UINT64CONSTANT */ + SUBROUTINE = 677, /* SUBROUTINE */ + DEMOTE = 678, /* DEMOTE */ + PAYLOADNV = 679, /* PAYLOADNV */ + PAYLOADINNV = 680, /* PAYLOADINNV */ + HITATTRNV = 681, /* HITATTRNV */ + CALLDATANV = 682, /* CALLDATANV */ + CALLDATAINNV = 683, /* CALLDATAINNV */ + PAYLOADEXT = 684, /* PAYLOADEXT */ + PAYLOADINEXT = 685, /* PAYLOADINEXT */ + HITATTREXT = 686, /* HITATTREXT */ + CALLDATAEXT = 687, /* CALLDATAEXT */ + CALLDATAINEXT = 688, /* CALLDATAINEXT */ + PATCH = 689, /* PATCH */ + SAMPLE = 690, /* SAMPLE */ + NONUNIFORM = 691, /* NONUNIFORM */ + COHERENT = 692, /* COHERENT */ + VOLATILE = 693, /* VOLATILE */ + RESTRICT = 694, /* RESTRICT */ + READONLY = 695, /* READONLY */ + WRITEONLY = 696, /* WRITEONLY */ + DEVICECOHERENT = 697, /* DEVICECOHERENT */ + QUEUEFAMILYCOHERENT = 698, /* QUEUEFAMILYCOHERENT */ + WORKGROUPCOHERENT = 699, /* WORKGROUPCOHERENT */ + SUBGROUPCOHERENT = 700, /* SUBGROUPCOHERENT */ + NONPRIVATE = 701, /* NONPRIVATE */ + SHADERCALLCOHERENT = 702, /* SHADERCALLCOHERENT */ + NOPERSPECTIVE = 703, /* NOPERSPECTIVE */ + EXPLICITINTERPAMD = 704, /* EXPLICITINTERPAMD */ + PERVERTEXEXT = 705, /* PERVERTEXEXT */ + PERVERTEXNV = 706, /* PERVERTEXNV */ + PERPRIMITIVENV = 707, /* PERPRIMITIVENV */ + PERVIEWNV = 708, /* PERVIEWNV */ + PERTASKNV = 709, /* PERTASKNV */ + PERPRIMITIVEEXT = 710, /* PERPRIMITIVEEXT */ + TASKPAYLOADWORKGROUPEXT = 711, /* TASKPAYLOADWORKGROUPEXT */ + PRECISE = 712 /* PRECISE */ + }; + typedef enum yytokentype yytoken_kind_t; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +union YYSTYPE +{ +#line 97 "MachineIndependent/glslang.y" + + struct { + glslang::TSourceLoc loc; + union { + glslang::TString *string; + int i; + unsigned int u; + long long i64; + unsigned long long u64; + bool b; + double d; + }; + glslang::TSymbol* symbol; + } lex; + struct { + glslang::TSourceLoc loc; + glslang::TOperator op; + union { + TIntermNode* intermNode; + glslang::TIntermNodePair nodePair; + glslang::TIntermTyped* intermTypedNode; + glslang::TAttributes* attributes; + glslang::TSpirvRequirement* spirvReq; + glslang::TSpirvInstruction* spirvInst; + glslang::TSpirvTypeParameters* spirvTypeParams; + }; + union { + glslang::TPublicType type; + glslang::TFunction* function; + glslang::TParameter param; + glslang::TTypeLoc typeLine; + glslang::TTypeList* typeList; + glslang::TArraySizes* arraySizes; + glslang::TIdentifierList* identifierList; + }; + glslang::TArraySizes* typeParameters; + } interm; + +#line 560 "MachineIndependent/glslang_tab.cpp.h" + +}; +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + +int yyparse (glslang::TParseContext* pParseContext); + +#endif /* !YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/iomapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/iomapper.h new file mode 100644 index 0000000..0003a74 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/iomapper.h @@ -0,0 +1,361 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _IOMAPPER_INCLUDED +#define _IOMAPPER_INCLUDED + +#include +#include "LiveTraverser.h" +#include +#include +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +class TInfoSink; + +namespace glslang { + +class TIntermediate; +struct TVarEntryInfo { + long long id; + TIntermSymbol* symbol; + bool live; + int newBinding; + int newSet; + int newLocation; + int newComponent; + int newIndex; + EShLanguage stage; + + void clearNewAssignments() { + newBinding = -1; + newSet = -1; + newLocation = -1; + newComponent = -1; + newIndex = -1; + } + + struct TOrderById { + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { return l.id < r.id; } + }; + + struct TOrderByPriority { + // ordering: + // 1) has both binding and set + // 2) has binding but no set + // 3) has no binding but set + // 4) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (lPoints == rPoints) + return l.id < r.id; + return lPoints > rPoints; + } + }; + + struct TOrderByPriorityAndLive { + // ordering: + // 1) do live variables first + // 2) has both binding and set + // 3) has binding but no set + // 4) has no binding but set + // 5) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (l.live != r.live) + return l.live > r.live; + + if (lPoints != rPoints) + return lPoints > rPoints; + + return l.id < r.id; + } + }; +}; + +// Base class for shared TIoMapResolver services, used by several derivations. +struct TDefaultIoResolverBase : public glslang::TIoMapResolver { +public: + TDefaultIoResolverBase(const TIntermediate& intermediate); + typedef std::vector TSlotSet; + typedef std::unordered_map TSlotSetMap; + + // grow the reflection stage by stage + void notifyBinding(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void notifyInOut(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void beginNotifications(EShLanguage) override {} + void endNotifications(EShLanguage) override {} + void beginResolve(EShLanguage) override {} + void endResolve(EShLanguage) override {} + void beginCollect(EShLanguage) override {} + void endCollect(EShLanguage) override {} + void reserverResourceSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + void reserverStorageSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + int getBaseBinding(EShLanguage stage, TResourceType res, unsigned int set) const; + const std::vector& getResourceSetBinding(EShLanguage stage) const; + virtual TResourceType getResourceType(const glslang::TType& type) = 0; + bool doAutoBindingMapping() const; + bool doAutoLocationMapping() const; + TSlotSet::iterator findSlot(int set, int slot); + bool checkEmpty(int set, int slot); + bool validateInOut(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + int reserveSlot(int set, int slot, int size = 1); + int getFreeSlot(int set, int base, int size = 1); + int resolveSet(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveInOutComponent(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutIndex(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void addStage(EShLanguage stage, TIntermediate& stageIntermediate) override { + if (stage < EShLangCount) { + stageMask[stage] = true; + stageIntermediates[stage] = &stageIntermediate; + } + } + uint32_t computeTypeLocationSize(const TType& type, EShLanguage stage); + + TSlotSetMap slots; + bool hasError = false; + +protected: + TDefaultIoResolverBase(TDefaultIoResolverBase&); + TDefaultIoResolverBase& operator=(TDefaultIoResolverBase&); + const TIntermediate& referenceIntermediate; + int nextUniformLocation; + int nextInputLocation; + int nextOutputLocation; + bool stageMask[EShLangCount + 1]; + const TIntermediate* stageIntermediates[EShLangCount]; + + // Return descriptor set specific base if there is one, and the generic base otherwise. + int selectBaseBinding(int base, int descriptorSetBase) const { + return descriptorSetBase != -1 ? descriptorSetBase : base; + } + + static int getLayoutSet(const glslang::TType& type) { + if (type.getQualifier().hasSet()) + return type.getQualifier().layoutSet; + else + return 0; + } + + static bool isSamplerType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isPureSampler(); + } + + static bool isTextureType(const glslang::TType& type) { + return (type.getBasicType() == glslang::EbtSampler && + (type.getSampler().isTexture() || type.getSampler().isSubpass())); + } + + static bool isUboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqUniform; + } + + static bool isImageType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage(); + } + + static bool isSsboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a SRV (shader resource view) type: + static bool isSrvType(const glslang::TType& type) { + return isTextureType(type) || type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a UAV (unordered access view) type: + static bool isUavType(const glslang::TType& type) { + if (type.getQualifier().isReadOnly()) + return false; + return (type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage()) || + (type.getQualifier().storage == EvqBuffer); + } +}; + +// Default I/O resolver for OpenGL +struct TDefaultGlslIoResolver : public TDefaultIoResolverBase { +public: + typedef std::map TVarSlotMap; // + typedef std::map TSlotMap; // + TDefaultGlslIoResolver(const TIntermediate& intermediate); + bool validateBinding(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + TResourceType getResourceType(const glslang::TType& type) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveBinding(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void beginResolve(EShLanguage /*stage*/) override; + void endResolve(EShLanguage stage) override; + void beginCollect(EShLanguage) override; + void endCollect(EShLanguage) override; + void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + // in/out symbol and uniform symbol are stored in the same resourceSlotMap, the storage key is used to identify each type of symbol. + // We use stage and storage qualifier to construct a storage key. it can help us identify the same storage resource used in different stage. + // if a resource is a program resource and we don't need know it usage stage, we can use same stage to build storage key. + // Note: both stage and type must less then 0xffff. + int buildStorageKey(EShLanguage stage, TStorageQualifier type) { + assert(static_cast(stage) <= 0x0000ffff && static_cast(type) <= 0x0000ffff); + return (stage << 16) | type; + } + +protected: + // Use for mark pre stage, to get more interface symbol information. + EShLanguage preStage; + // Use for mark current shader stage for resolver + EShLanguage currentStage; + // Slot map for storage resource(location of uniform and interface symbol) It's a program share slot + TSlotMap resourceSlotMap; + // Slot map for other resource(image, ubo, ssbo), It's a program share slot. + TSlotMap storageSlotMap; +}; + +typedef std::map TVarLiveMap; + +// override function "operator=", if a vector being sort, +// when use vc++, the sort function will call : +// pair& operator=(const pair<_Other1, _Other2>& _Right) +// { +// first = _Right.first; +// second = _Right.second; +// return (*this); +// } +// that will make a const type handing on left. +// override this function can avoid a compiler error. +// In the future, if the vc++ compiler can handle such a situation, +// this part of the code will be removed. +struct TVarLivePair : std::pair { + TVarLivePair(const std::pair& _Right) : pair(_Right.first, _Right.second) {} + TVarLivePair& operator=(const TVarLivePair& _Right) { + const_cast(first) = _Right.first; + second = _Right.second; + return (*this); + } + TVarLivePair(const TVarLivePair& src) : pair(src) { } +}; +typedef std::vector TVarLiveVector; + +// I/O mapper +class TIoMapper { +public: + TIoMapper() {} + virtual ~TIoMapper() {} + // grow the reflection stage by stage + bool virtual addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*); + bool virtual doMap(TIoMapResolver*, TInfoSink&) { return true; } +}; + +// I/O mapper for GLSL +class TGlslIoMapper : public TIoMapper { +public: + TGlslIoMapper() { + memset(inVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(outVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(uniformVarMap, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(intermediates, 0, sizeof(TIntermediate*) * (EShLangCount + 1)); + profile = ENoProfile; + version = 0; + autoPushConstantMaxSize = 128; + autoPushConstantBlockPacking = ElpStd430; + } + virtual ~TGlslIoMapper() { + for (size_t stage = 0; stage < EShLangCount; stage++) { + if (inVarMaps[stage] != nullptr) { + delete inVarMaps[stage]; + inVarMaps[stage] = nullptr; + } + if (outVarMaps[stage] != nullptr) { + delete outVarMaps[stage]; + outVarMaps[stage] = nullptr; + } + if (uniformVarMap[stage] != nullptr) { + delete uniformVarMap[stage]; + uniformVarMap[stage] = nullptr; + } + if (intermediates[stage] != nullptr) + intermediates[stage] = nullptr; + } + } + // If set, the uniform block with the given name will be changed to be backed by + // push_constant if it's size is <= maxSize + void setAutoPushConstantBlock(const char* name, unsigned int maxSize, TLayoutPacking packing) { + autoPushConstantBlockName = name; + autoPushConstantMaxSize = maxSize; + autoPushConstantBlockPacking = packing; + } + // grow the reflection stage by stage + bool addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*) override; + bool doMap(TIoMapResolver*, TInfoSink&) override; + TVarLiveMap *inVarMaps[EShLangCount], *outVarMaps[EShLangCount], + *uniformVarMap[EShLangCount]; + TIntermediate* intermediates[EShLangCount]; + bool hadError = false; + EProfile profile; + int version; + +private: + TString autoPushConstantBlockName; + unsigned int autoPushConstantMaxSize; + TLayoutPacking autoPushConstantBlockPacking; +}; + +} // end namespace glslang + +#endif // _IOMAPPER_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/localintermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/localintermediate.h new file mode 100644 index 0000000..1bb4e97 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/localintermediate.h @@ -0,0 +1,1222 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _LOCAL_INTERMEDIATE_INCLUDED_ +#define _LOCAL_INTERMEDIATE_INCLUDED_ + +#include "../Include/intermediate.h" +#include "../Public/ShaderLang.h" +#include "Versions.h" + +#include +#include +#include +#include +#include + +class TInfoSink; + +namespace glslang { + +struct TMatrixSelector { + int coord1; // stay agnostic about column/row; this is parse order + int coord2; +}; + +typedef int TVectorSelector; + +const int MaxSwizzleSelectors = 4; + +template +class TSwizzleSelectors { +public: + TSwizzleSelectors() : size_(0) { } + + void push_back(selectorType comp) + { + if (size_ < MaxSwizzleSelectors) + components[size_++] = comp; + } + void resize(int s) + { + assert(s <= size_); + size_ = s; + } + int size() const { return size_; } + selectorType operator[](int i) const + { + assert(i < MaxSwizzleSelectors); + return components[i]; + } + +private: + int size_; + selectorType components[MaxSwizzleSelectors]; +}; + +// +// Some helper structures for TIntermediate. Their contents are encapsulated +// by TIntermediate. +// + +// Used for call-graph algorithms for detecting recursion, missing bodies, and dead bodies. +// A "call" is a pair: . +// There can be duplicates. General assumption is the list is small. +struct TCall { + TCall(const TString& pCaller, const TString& pCallee) : caller(pCaller), callee(pCallee) { } + TString caller; + TString callee; + bool visited; + bool currentPath; + bool errorGiven; + int calleeBodyPosition; +}; + +// A generic 1-D range. +struct TRange { + TRange(int start, int last) : start(start), last(last) { } + bool overlap(const TRange& rhs) const + { + return last >= rhs.start && start <= rhs.last; + } + int start; + int last; +}; + +// An IO range is a 3-D rectangle; the set of (location, component, index) triples all lying +// within the same location range, component range, and index value. Locations don't alias unless +// all other dimensions of their range overlap. +struct TIoRange { + TIoRange(TRange location, TRange component, TBasicType basicType, int index) + : location(location), component(component), basicType(basicType), index(index) { } + bool overlap(const TIoRange& rhs) const + { + return location.overlap(rhs.location) && component.overlap(rhs.component) && index == rhs.index; + } + TRange location; + TRange component; + TBasicType basicType; + int index; +}; + +// An offset range is a 2-D rectangle; the set of (binding, offset) pairs all lying +// within the same binding and offset range. +struct TOffsetRange { + TOffsetRange(TRange binding, TRange offset) + : binding(binding), offset(offset) { } + bool overlap(const TOffsetRange& rhs) const + { + return binding.overlap(rhs.binding) && offset.overlap(rhs.offset); + } + TRange binding; + TRange offset; +}; + +#ifndef GLSLANG_WEB +// Things that need to be tracked per xfb buffer. +struct TXfbBuffer { + TXfbBuffer() : stride(TQualifier::layoutXfbStrideEnd), implicitStride(0), contains64BitType(false), + contains32BitType(false), contains16BitType(false) { } + std::vector ranges; // byte offsets that have already been assigned + unsigned int stride; + unsigned int implicitStride; + bool contains64BitType; + bool contains32BitType; + bool contains16BitType; +}; +#endif + +// Track a set of strings describing how the module was processed. +// This includes command line options, transforms, etc., ideally inclusive enough +// to reproduce the steps used to transform the input source to the output. +// E.g., see SPIR-V OpModuleProcessed. +// Each "process" or "transform" uses is expressed in the form: +// process arg0 arg1 arg2 ... +// process arg0 arg1 arg2 ... +// where everything is textual, and there can be zero or more arguments +class TProcesses { +public: + TProcesses() {} + ~TProcesses() {} + + void addProcess(const char* process) + { + processes.push_back(process); + } + void addProcess(const std::string& process) + { + processes.push_back(process); + } + void addArgument(int arg) + { + processes.back().append(" "); + std::string argString = std::to_string(arg); + processes.back().append(argString); + } + void addArgument(const char* arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addArgument(const std::string& arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addIfNonZero(const char* process, int value) + { + if (value != 0) { + addProcess(process); + addArgument(value); + } + } + + const std::vector& getProcesses() const { return processes; } + +private: + std::vector processes; +}; + +class TSymbolTable; +class TSymbol; +class TVariable; + +// +// Texture and Sampler transformation mode. +// +enum ComputeDerivativeMode { + LayoutDerivativeNone, // default layout as SPV_NV_compute_shader_derivatives not enabled + LayoutDerivativeGroupQuads, // derivative_group_quadsNV + LayoutDerivativeGroupLinear, // derivative_group_linearNV +}; + +class TIdMaps { +public: + TMap& operator[](long long i) { return maps[i]; } + const TMap& operator[](long long i) const { return maps[i]; } +private: + TMap maps[EsiCount]; +}; + +class TNumericFeatures { +public: + TNumericFeatures() : features(0) { } + TNumericFeatures(const TNumericFeatures&) = delete; + TNumericFeatures& operator=(const TNumericFeatures&) = delete; + typedef enum : unsigned int { + shader_explicit_arithmetic_types = 1 << 0, + shader_explicit_arithmetic_types_int8 = 1 << 1, + shader_explicit_arithmetic_types_int16 = 1 << 2, + shader_explicit_arithmetic_types_int32 = 1 << 3, + shader_explicit_arithmetic_types_int64 = 1 << 4, + shader_explicit_arithmetic_types_float16 = 1 << 5, + shader_explicit_arithmetic_types_float32 = 1 << 6, + shader_explicit_arithmetic_types_float64 = 1 << 7, + shader_implicit_conversions = 1 << 8, + gpu_shader_fp64 = 1 << 9, + gpu_shader_int16 = 1 << 10, + gpu_shader_half_float = 1 << 11, + } feature; + void insert(feature f) { features |= f; } + void erase(feature f) { features &= ~f; } + bool contains(feature f) const { return (features & f) != 0; } +private: + unsigned int features; +}; + +// MustBeAssigned wraps a T, asserting that it has been assigned with +// operator =() before attempting to read with operator T() or operator ->(). +// Used to catch cases where fields are read before they have been assigned. +template +class MustBeAssigned +{ +public: + MustBeAssigned() = default; + MustBeAssigned(const T& v) : value(v) {} + operator const T&() const { assert(isSet); return value; } + const T* operator ->() const { assert(isSet); return &value; } + MustBeAssigned& operator = (const T& v) { value = v; isSet = true; return *this; } +private: + T value; + bool isSet = false; +}; + +// +// Set of helper functions to help parse and build the tree. +// +class TIntermediate { +public: + explicit TIntermediate(EShLanguage l, int v = 0, EProfile p = ENoProfile) : + language(l), + profile(p), version(v), + treeRoot(0), + resources(TBuiltInResource{}), + numEntryPoints(0), numErrors(0), numPushConstants(0), recursive(false), + invertY(false), + dxPositionW(false), + enhancedMsgs(false), + debugInfo(false), + useStorageBuffer(false), + invariantAll(false), + nanMinMaxClamp(false), + depthReplacing(false), + stencilReplacing(false), + uniqueId(0), + globalUniformBlockName(""), + atomicCounterBlockName(""), + globalUniformBlockSet(TQualifier::layoutSetEnd), + globalUniformBlockBinding(TQualifier::layoutBindingEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) +#ifndef GLSLANG_WEB + , + implicitThisName("@this"), implicitCounterName("@count"), + source(EShSourceNone), + useVulkanMemoryModel(false), + invocations(TQualifier::layoutNotSet), vertices(TQualifier::layoutNotSet), + inputPrimitive(ElgNone), outputPrimitive(ElgNone), + pixelCenterInteger(false), originUpperLeft(false),texCoordBuiltinRedeclared(false), + vertexSpacing(EvsNone), vertexOrder(EvoNone), interlockOrdering(EioNone), pointMode(false), earlyFragmentTests(false), + postDepthCoverage(false), earlyAndLateFragmentTestsAMD(false), depthLayout(EldNone), stencilLayout(ElsNone), + hlslFunctionality1(false), + blendEquations(0), xfbMode(false), multiStream(false), + layoutOverrideCoverage(false), + geoPassthroughEXT(false), + numShaderRecordBlocks(0), + computeDerivativeMode(LayoutDerivativeNone), + primitives(TQualifier::layoutNotSet), + numTaskNVBlocks(0), + layoutPrimitiveCulling(false), + numTaskEXTPayloads(0), + autoMapBindings(false), + autoMapLocations(false), + flattenUniformArrays(false), + useUnknownFormat(false), + hlslOffsets(false), + hlslIoMapping(false), + useVariablePointers(false), + textureSamplerTransformMode(EShTexSampTransKeep), + needToLegalize(false), + binaryDoubleOutput(false), + subgroupUniformControlFlow(false), + usePhysicalStorageBuffer(false), + spirvRequirement(nullptr), + spirvExecutionMode(nullptr), + uniformLocationBase(0) +#endif + { + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + xfbBuffers.resize(TQualifier::layoutXfbBufferEnd); + shiftBinding.fill(0); +#endif + } + + void setVersion(int v) + { + version = v; + } + void setProfile(EProfile p) + { + profile = p; + } + + int getVersion() const { return version; } + EProfile getProfile() const { return profile; } + void setSpv(const SpvVersion& s) + { + spvVersion = s; + + // client processes + if (spvVersion.vulkan > 0) + processes.addProcess("client vulkan100"); + if (spvVersion.openGl > 0) + processes.addProcess("client opengl100"); + + // target SPV + switch (spvVersion.spv) { + case 0: + break; + case EShTargetSpv_1_0: + break; + case EShTargetSpv_1_1: + processes.addProcess("target-env spirv1.1"); + break; + case EShTargetSpv_1_2: + processes.addProcess("target-env spirv1.2"); + break; + case EShTargetSpv_1_3: + processes.addProcess("target-env spirv1.3"); + break; + case EShTargetSpv_1_4: + processes.addProcess("target-env spirv1.4"); + break; + case EShTargetSpv_1_5: + processes.addProcess("target-env spirv1.5"); + break; + case EShTargetSpv_1_6: + processes.addProcess("target-env spirv1.6"); + break; + default: + processes.addProcess("target-env spirvUnknown"); + break; + } + + // target-environment processes + switch (spvVersion.vulkan) { + case 0: + break; + case EShTargetVulkan_1_0: + processes.addProcess("target-env vulkan1.0"); + break; + case EShTargetVulkan_1_1: + processes.addProcess("target-env vulkan1.1"); + break; + case EShTargetVulkan_1_2: + processes.addProcess("target-env vulkan1.2"); + break; + case EShTargetVulkan_1_3: + processes.addProcess("target-env vulkan1.3"); + break; + default: + processes.addProcess("target-env vulkanUnknown"); + break; + } + if (spvVersion.openGl > 0) + processes.addProcess("target-env opengl"); + } + const SpvVersion& getSpv() const { return spvVersion; } + EShLanguage getStage() const { return language; } + void addRequestedExtension(const char* extension) { requestedExtensions.insert(extension); } + const std::set& getRequestedExtensions() const { return requestedExtensions; } + bool isRayTracingStage() const { + return language >= EShLangRayGen && language <= EShLangCallableNV; + } + + void setTreeRoot(TIntermNode* r) { treeRoot = r; } + TIntermNode* getTreeRoot() const { return treeRoot; } + void incrementEntryPointCount() { ++numEntryPoints; } + int getNumEntryPoints() const { return numEntryPoints; } + int getNumErrors() const { return numErrors; } + void addPushConstantCount() { ++numPushConstants; } + void setLimits(const TBuiltInResource& r) { resources = r; } + const TBuiltInResource& getLimits() const { return resources; } + + bool postProcess(TIntermNode*, EShLanguage); + void removeTree(); + + void setEntryPointName(const char* ep) + { + entryPointName = ep; + processes.addProcess("entry-point"); + processes.addArgument(entryPointName); + } + void setEntryPointMangledName(const char* ep) { entryPointMangledName = ep; } + const std::string& getEntryPointName() const { return entryPointName; } + const std::string& getEntryPointMangledName() const { return entryPointMangledName; } + + void setDebugInfo(bool debuginfo) + { + debugInfo = debuginfo; + } + bool getDebugInfo() const { return debugInfo; } + + void setInvertY(bool invert) + { + invertY = invert; + if (invertY) + processes.addProcess("invert-y"); + } + bool getInvertY() const { return invertY; } + + void setDxPositionW(bool dxPosW) + { + dxPositionW = dxPosW; + if (dxPositionW) + processes.addProcess("dx-position-w"); + } + bool getDxPositionW() const { return dxPositionW; } + + void setEnhancedMsgs() + { + enhancedMsgs = true; + } + bool getEnhancedMsgs() const { return enhancedMsgs && getSource() == EShSourceGlsl; } + +#ifdef ENABLE_HLSL + void setSource(EShSource s) { source = s; } + EShSource getSource() const { return source; } +#else + void setSource(EShSource s) { assert(s == EShSourceGlsl); (void)s; } + EShSource getSource() const { return EShSourceGlsl; } +#endif + + bool isRecursive() const { return recursive; } + + TIntermSymbol* addSymbol(const TVariable&); + TIntermSymbol* addSymbol(const TVariable&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TType&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TIntermSymbol&); + TIntermTyped* addConversion(TOperator, const TType&, TIntermTyped*); + std::tuple addPairConversion(TOperator op, TIntermTyped* node0, TIntermTyped* node1); + TIntermTyped* addUniShapeConversion(TOperator, const TType&, TIntermTyped*); + TIntermTyped* addConversion(TBasicType convertTo, TIntermTyped* node) const; + void addBiShapeConversion(TOperator, TIntermTyped*& lhsNode, TIntermTyped*& rhsNode); + TIntermTyped* addShapeConversion(const TType&, TIntermTyped*); + TIntermTyped* addBinaryMath(TOperator, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addAssign(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addIndex(TOperator op, TIntermTyped* base, TIntermTyped* index, const TSourceLoc&); + TIntermTyped* addUnaryMath(TOperator, TIntermTyped* child, const TSourceLoc&); + TIntermTyped* addBuiltInFunctionCall(const TSourceLoc& line, TOperator, bool unary, TIntermNode*, const TType& returnType); + bool canImplicitlyPromote(TBasicType from, TBasicType to, TOperator op = EOpNull) const; + bool isIntegralPromotion(TBasicType from, TBasicType to) const; + bool isFPPromotion(TBasicType from, TBasicType to) const; + bool isIntegralConversion(TBasicType from, TBasicType to) const; + bool isFPConversion(TBasicType from, TBasicType to) const; + bool isFPIntegralConversion(TBasicType from, TBasicType to) const; + TOperator mapTypeToConstructorOp(const TType&) const; + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right); + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right, const TSourceLoc&); + TIntermAggregate* makeAggregate(TIntermNode* node); + TIntermAggregate* makeAggregate(TIntermNode* node, const TSourceLoc&); + TIntermAggregate* makeAggregate(const TSourceLoc&); + TIntermTyped* setAggregateOperator(TIntermNode*, TOperator, const TType& type, const TSourceLoc&); + bool areAllChildConst(TIntermAggregate* aggrNode); + TIntermSelection* addSelection(TIntermTyped* cond, TIntermNodePair code, const TSourceLoc&); + TIntermTyped* addSelection(TIntermTyped* cond, TIntermTyped* trueBlock, TIntermTyped* falseBlock, const TSourceLoc&); + TIntermTyped* addComma(TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addMethod(TIntermTyped*, const TType&, const TString*, const TSourceLoc&); + TIntermConstantUnion* addConstantUnion(const TConstUnionArray&, const TType&, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(bool, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(double, TBasicType, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(const TString*, const TSourceLoc&, bool literal = false) const; + TIntermTyped* promoteConstantUnion(TBasicType, TIntermConstantUnion*) const; + bool parseConstTree(TIntermNode*, TConstUnionArray, TOperator, const TType&, bool singleConstantParam = false); + TIntermLoop* addLoop(TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, const TSourceLoc&); + TIntermAggregate* addForLoop(TIntermNode*, TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, + const TSourceLoc&, TIntermLoop*&); + TIntermBranch* addBranch(TOperator, const TSourceLoc&); + TIntermBranch* addBranch(TOperator, TIntermTyped*, const TSourceLoc&); + template TIntermTyped* addSwizzle(TSwizzleSelectors&, const TSourceLoc&); + + // Low level functions to add nodes (no conversions or other higher level transformations) + // If a type is provided, the node's type will be set to it. + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&) const; + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&, + const TType&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&, const TType&) const; + + // Constant folding (in Constant.cpp) + TIntermTyped* fold(TIntermAggregate* aggrNode); + TIntermTyped* foldConstructor(TIntermAggregate* aggrNode); + TIntermTyped* foldDereference(TIntermTyped* node, int index, const TSourceLoc&); + TIntermTyped* foldSwizzle(TIntermTyped* node, TSwizzleSelectors& fields, const TSourceLoc&); + + // Tree ops + static const TIntermTyped* findLValueBase(const TIntermTyped*, bool swizzleOkay , bool BufferReferenceOk = false); + + // Linkage related + void addSymbolLinkageNodes(TIntermAggregate*& linkage, EShLanguage, TSymbolTable&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, const TSymbol&); + TIntermAggregate* findLinkerObjects() const; + + void setGlobalUniformBlockName(const char* name) { globalUniformBlockName = std::string(name); } + const char* getGlobalUniformBlockName() const { return globalUniformBlockName.c_str(); } + void setGlobalUniformSet(unsigned int set) { globalUniformBlockSet = set; } + unsigned int getGlobalUniformSet() const { return globalUniformBlockSet; } + void setGlobalUniformBinding(unsigned int binding) { globalUniformBlockBinding = binding; } + unsigned int getGlobalUniformBinding() const { return globalUniformBlockBinding; } + + void setAtomicCounterBlockName(const char* name) { atomicCounterBlockName = std::string(name); } + const char* getAtomicCounterBlockName() const { return atomicCounterBlockName.c_str(); } + void setAtomicCounterBlockSet(unsigned int set) { atomicCounterBlockSet = set; } + unsigned int getAtomicCounterBlockSet() const { return atomicCounterBlockSet; } + + + void setUseStorageBuffer() { useStorageBuffer = true; } + bool usingStorageBuffer() const { return useStorageBuffer; } + void setInvariantAll() { invariantAll = true; } + bool isInvariantAll() const { return invariantAll; } + void setDepthReplacing() { depthReplacing = true; } + bool isDepthReplacing() const { return depthReplacing; } + void setStencilReplacing() { stencilReplacing = true; } + bool isStencilReplacing() const { return stencilReplacing; } + bool setLocalSize(int dim, int size) + { + if (localSizeNotDefault[dim]) + return size == localSize[dim]; + localSizeNotDefault[dim] = true; + localSize[dim] = size; + return true; + } + unsigned int getLocalSize(int dim) const { return localSize[dim]; } + bool isLocalSizeSet() const + { + // Return true if any component has been set (i.e. any component is not default). + return localSizeNotDefault[0] || localSizeNotDefault[1] || localSizeNotDefault[2]; + } + bool setLocalSizeSpecId(int dim, int id) + { + if (localSizeSpecId[dim] != TQualifier::layoutNotSet) + return id == localSizeSpecId[dim]; + localSizeSpecId[dim] = id; + return true; + } + int getLocalSizeSpecId(int dim) const { return localSizeSpecId[dim]; } + bool isLocalSizeSpecialized() const + { + // Return true if any component has been specialized. + return localSizeSpecId[0] != TQualifier::layoutNotSet || + localSizeSpecId[1] != TQualifier::layoutNotSet || + localSizeSpecId[2] != TQualifier::layoutNotSet; + } +#ifdef GLSLANG_WEB + void output(TInfoSink&, bool tree) { } + + bool isEsProfile() const { return false; } + bool getXfbMode() const { return false; } + bool isMultiStream() const { return false; } + TLayoutGeometry getOutputPrimitive() const { return ElgNone; } + bool getPostDepthCoverage() const { return false; } + bool getEarlyFragmentTests() const { return false; } + TLayoutDepth getDepth() const { return EldNone; } + bool getPixelCenterInteger() const { return false; } + void setOriginUpperLeft() { } + bool getOriginUpperLeft() const { return true; } + TInterlockOrdering getInterlockOrdering() const { return EioNone; } + + bool getAutoMapBindings() const { return false; } + bool getAutoMapLocations() const { return false; } + int getNumPushConstants() const { return 0; } + void addShaderRecordCount() { } + void addTaskNVCount() { } + void addTaskPayloadEXTCount() { } + void setUseVulkanMemoryModel() { } + bool usingVulkanMemoryModel() const { return false; } + bool usingPhysicalStorageBuffer() const { return false; } + bool usingVariablePointers() const { return false; } + unsigned getXfbStride(int buffer) const { return 0; } + bool hasLayoutDerivativeModeNone() const { return false; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return LayoutDerivativeNone; } +#else + void output(TInfoSink&, bool tree); + + bool isEsProfile() const { return profile == EEsProfile; } + + void setShiftBinding(TResourceType res, unsigned int shift) + { + shiftBinding[res] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) + processes.addIfNonZero(name, shift); + } + + unsigned int getShiftBinding(TResourceType res) const { return shiftBinding[res]; } + + void setShiftBindingForSet(TResourceType res, unsigned int shift, unsigned int set) + { + if (shift == 0) // ignore if there's no shift: it's a no-op. + return; + + shiftBindingForSet[res][set] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) { + processes.addProcess(name); + processes.addArgument(shift); + processes.addArgument(set); + } + } + + int getShiftBindingForSet(TResourceType res, unsigned int set) const + { + const auto shift = shiftBindingForSet[res].find(set); + return shift == shiftBindingForSet[res].end() ? -1 : shift->second; + } + bool hasShiftBindingForSet(TResourceType res) const { return !shiftBindingForSet[res].empty(); } + + void setResourceSetBinding(const std::vector& shift) + { + resourceSetBinding = shift; + if (shift.size() > 0) { + processes.addProcess("resource-set-binding"); + for (int s = 0; s < (int)shift.size(); ++s) + processes.addArgument(shift[s]); + } + } + const std::vector& getResourceSetBinding() const { return resourceSetBinding; } + void setAutoMapBindings(bool map) + { + autoMapBindings = map; + if (autoMapBindings) + processes.addProcess("auto-map-bindings"); + } + bool getAutoMapBindings() const { return autoMapBindings; } + void setAutoMapLocations(bool map) + { + autoMapLocations = map; + if (autoMapLocations) + processes.addProcess("auto-map-locations"); + } + bool getAutoMapLocations() const { return autoMapLocations; } + +#ifdef ENABLE_HLSL + void setFlattenUniformArrays(bool flatten) + { + flattenUniformArrays = flatten; + if (flattenUniformArrays) + processes.addProcess("flatten-uniform-arrays"); + } + bool getFlattenUniformArrays() const { return flattenUniformArrays; } +#endif + void setNoStorageFormat(bool b) + { + useUnknownFormat = b; + if (useUnknownFormat) + processes.addProcess("no-storage-format"); + } + bool getNoStorageFormat() const { return useUnknownFormat; } + void setUseVulkanMemoryModel() + { + useVulkanMemoryModel = true; + processes.addProcess("use-vulkan-memory-model"); + } + bool usingVulkanMemoryModel() const { return useVulkanMemoryModel; } + void setUsePhysicalStorageBuffer() + { + usePhysicalStorageBuffer = true; + } + bool usingPhysicalStorageBuffer() const { return usePhysicalStorageBuffer; } + void setUseVariablePointers() + { + useVariablePointers = true; + processes.addProcess("use-variable-pointers"); + } + bool usingVariablePointers() const { return useVariablePointers; } + +#ifdef ENABLE_HLSL + template T addCounterBufferName(const T& name) const { return name + implicitCounterName; } + bool hasCounterBufferName(const TString& name) const { + size_t len = strlen(implicitCounterName); + return name.size() > len && + name.compare(name.size() - len, len, implicitCounterName) == 0; + } +#endif + + void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode) { textureSamplerTransformMode = mode; } + int getNumPushConstants() const { return numPushConstants; } + void addShaderRecordCount() { ++numShaderRecordBlocks; } + void addTaskNVCount() { ++numTaskNVBlocks; } + void addTaskPayloadEXTCount() { ++numTaskEXTPayloads; } + + bool setInvocations(int i) + { + if (invocations != TQualifier::layoutNotSet) + return invocations == i; + invocations = i; + return true; + } + int getInvocations() const { return invocations; } + bool setVertices(int m) + { + if (vertices != TQualifier::layoutNotSet) + return vertices == m; + vertices = m; + return true; + } + int getVertices() const { return vertices; } + bool setInputPrimitive(TLayoutGeometry p) + { + if (inputPrimitive != ElgNone) + return inputPrimitive == p; + inputPrimitive = p; + return true; + } + TLayoutGeometry getInputPrimitive() const { return inputPrimitive; } + bool setVertexSpacing(TVertexSpacing s) + { + if (vertexSpacing != EvsNone) + return vertexSpacing == s; + vertexSpacing = s; + return true; + } + TVertexSpacing getVertexSpacing() const { return vertexSpacing; } + bool setVertexOrder(TVertexOrder o) + { + if (vertexOrder != EvoNone) + return vertexOrder == o; + vertexOrder = o; + return true; + } + TVertexOrder getVertexOrder() const { return vertexOrder; } + void setPointMode() { pointMode = true; } + bool getPointMode() const { return pointMode; } + + bool setInterlockOrdering(TInterlockOrdering o) + { + if (interlockOrdering != EioNone) + return interlockOrdering == o; + interlockOrdering = o; + return true; + } + TInterlockOrdering getInterlockOrdering() const { return interlockOrdering; } + + void setXfbMode() { xfbMode = true; } + bool getXfbMode() const { return xfbMode; } + void setMultiStream() { multiStream = true; } + bool isMultiStream() const { return multiStream; } + bool setOutputPrimitive(TLayoutGeometry p) + { + if (outputPrimitive != ElgNone) + return outputPrimitive == p; + outputPrimitive = p; + return true; + } + TLayoutGeometry getOutputPrimitive() const { return outputPrimitive; } + void setPostDepthCoverage() { postDepthCoverage = true; } + bool getPostDepthCoverage() const { return postDepthCoverage; } + void setEarlyFragmentTests() { earlyFragmentTests = true; } + void setEarlyAndLateFragmentTestsAMD() { earlyAndLateFragmentTestsAMD = true; } + bool getEarlyFragmentTests() const { return earlyFragmentTests; } + bool getEarlyAndLateFragmentTestsAMD() const { return earlyAndLateFragmentTestsAMD; } + bool setDepth(TLayoutDepth d) + { + if (depthLayout != EldNone) + return depthLayout == d; + depthLayout = d; + return true; + } + bool setStencil(TLayoutStencil s) + { + if (stencilLayout != ElsNone) + return stencilLayout == s; + stencilLayout = s; + return true; + } + TLayoutDepth getDepth() const { return depthLayout; } + TLayoutStencil getStencil() const { return stencilLayout; } + void setOriginUpperLeft() { originUpperLeft = true; } + bool getOriginUpperLeft() const { return originUpperLeft; } + void setPixelCenterInteger() { pixelCenterInteger = true; } + bool getPixelCenterInteger() const { return pixelCenterInteger; } + void setTexCoordRedeclared() { texCoordBuiltinRedeclared = true; } + bool getTexCoordRedeclared() const { return texCoordBuiltinRedeclared; } + void addBlendEquation(TBlendEquationShift b) { blendEquations |= (1 << b); } + unsigned int getBlendEquations() const { return blendEquations; } + bool setXfbBufferStride(int buffer, unsigned stride) + { + if (xfbBuffers[buffer].stride != TQualifier::layoutXfbStrideEnd) + return xfbBuffers[buffer].stride == stride; + xfbBuffers[buffer].stride = stride; + return true; + } + unsigned getXfbStride(int buffer) const { return xfbBuffers[buffer].stride; } + int addXfbBufferOffset(const TType&); + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType, bool& contains32BitType, bool& contains16BitType) const; + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType) const; + void setLayoutOverrideCoverage() { layoutOverrideCoverage = true; } + bool getLayoutOverrideCoverage() const { return layoutOverrideCoverage; } + void setGeoPassthroughEXT() { geoPassthroughEXT = true; } + bool getGeoPassthroughEXT() const { return geoPassthroughEXT; } + void setLayoutDerivativeMode(ComputeDerivativeMode mode) { computeDerivativeMode = mode; } + bool hasLayoutDerivativeModeNone() const { return computeDerivativeMode != LayoutDerivativeNone; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return computeDerivativeMode; } + void setLayoutPrimitiveCulling() { layoutPrimitiveCulling = true; } + bool getLayoutPrimitiveCulling() const { return layoutPrimitiveCulling; } + bool setPrimitives(int m) + { + if (primitives != TQualifier::layoutNotSet) + return primitives == m; + primitives = m; + return true; + } + int getPrimitives() const { return primitives; } + const char* addSemanticName(const TString& name) + { + return semanticNameSet.insert(name).first->c_str(); + } + void addUniformLocationOverride(const char* nameStr, int location) + { + std::string name = nameStr; + uniformLocationOverrides[name] = location; + } + + int getUniformLocationOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = uniformLocationOverrides.find(name); + if (pos == uniformLocationOverrides.end()) + return -1; + else + return pos->second; + } + + void setUniformLocationBase(int base) { uniformLocationBase = base; } + int getUniformLocationBase() const { return uniformLocationBase; } + + void setNeedsLegalization() { needToLegalize = true; } + bool needsLegalization() const { return needToLegalize; } + + void setBinaryDoubleOutput() { binaryDoubleOutput = true; } + bool getBinaryDoubleOutput() { return binaryDoubleOutput; } + + void setSubgroupUniformControlFlow() { subgroupUniformControlFlow = true; } + bool getSubgroupUniformControlFlow() const { return subgroupUniformControlFlow; } + + // GL_EXT_spirv_intrinsics + void insertSpirvRequirement(const TSpirvRequirement* spirvReq); + bool hasSpirvRequirement() const { return spirvRequirement != nullptr; } + const TSpirvRequirement& getSpirvRequirement() const { return *spirvRequirement; } + void insertSpirvExecutionMode(int executionMode, const TIntermAggregate* args = nullptr); + void insertSpirvExecutionModeId(int executionMode, const TIntermAggregate* args); + bool hasSpirvExecutionMode() const { return spirvExecutionMode != nullptr; } + const TSpirvExecutionMode& getSpirvExecutionMode() const { return *spirvExecutionMode; } +#endif // GLSLANG_WEB + + void addBlockStorageOverride(const char* nameStr, TBlockStorageClass backing) + { + std::string name(nameStr); + blockBackingOverrides[name] = backing; + } + TBlockStorageClass getBlockStorageOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = blockBackingOverrides.find(name); + if (pos == blockBackingOverrides.end()) + return EbsNone; + else + return pos->second; + } +#ifdef ENABLE_HLSL + void setHlslFunctionality1() { hlslFunctionality1 = true; } + bool getHlslFunctionality1() const { return hlslFunctionality1; } + void setHlslOffsets() + { + hlslOffsets = true; + if (hlslOffsets) + processes.addProcess("hlsl-offsets"); + } + bool usingHlslOffsets() const { return hlslOffsets; } + void setHlslIoMapping(bool b) + { + hlslIoMapping = b; + if (hlslIoMapping) + processes.addProcess("hlsl-iomap"); + } + bool usingHlslIoMapping() { return hlslIoMapping; } +#else + bool getHlslFunctionality1() const { return false; } + bool usingHlslOffsets() const { return false; } + bool usingHlslIoMapping() { return false; } +#endif + + bool usingScalarBlockLayout() const { + for (auto extIt = requestedExtensions.begin(); extIt != requestedExtensions.end(); ++extIt) { + if (*extIt == E_GL_EXT_scalar_block_layout) + return true; + } + return false; + } + + bool IsRequestedExtension(const char* extension) const + { + return (requestedExtensions.find(extension) != requestedExtensions.end()); + } + + void addToCallGraph(TInfoSink&, const TString& caller, const TString& callee); + void merge(TInfoSink&, TIntermediate&); + void finalCheck(TInfoSink&, bool keepUncalled); + + void mergeGlobalUniformBlocks(TInfoSink& infoSink, TIntermediate& unit, bool mergeExistingOnly); + void mergeUniformObjects(TInfoSink& infoSink, TIntermediate& unit); + void checkStageIO(TInfoSink&, TIntermediate&); + + bool buildConvertOp(TBasicType dst, TBasicType src, TOperator& convertOp) const; + TIntermTyped* createConversion(TBasicType convertTo, TIntermTyped* node) const; + + void addIoAccessed(const TString& name) { ioAccessed.insert(name); } + bool inIoAccessed(const TString& name) const { return ioAccessed.find(name) != ioAccessed.end(); } + + int addUsedLocation(const TQualifier&, const TType&, bool& typeCollision); + int checkLocationRange(int set, const TIoRange& range, const TType&, bool& typeCollision); + int checkLocationRT(int set, int location); + int addUsedOffsets(int binding, int offset, int numOffsets); + bool addUsedConstantId(int id); + static int computeTypeLocationSize(const TType&, EShLanguage); + static int computeTypeUniformLocationSize(const TType&); + + static int getBaseAlignmentScalar(const TType&, int& size); + static int getBaseAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static int getScalarAlignment(const TType&, int& size, int& stride, bool rowMajor); + static int getMemberAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static bool improperStraddle(const TType& type, int size, int offset); + static void updateOffset(const TType& parentType, const TType& memberType, int& offset, int& memberSize); + static int getOffset(const TType& type, int index); + static int getBlockSize(const TType& blockType); + static int computeBufferReferenceTypeSize(const TType&); + static bool isIoResizeArray(const TType& type, EShLanguage language); + + bool promote(TIntermOperator*); + void setNanMinMaxClamp(bool setting) { nanMinMaxClamp = setting; } + bool getNanMinMaxClamp() const { return nanMinMaxClamp; } + + void setSourceFile(const char* file) { if (file != nullptr) sourceFile = file; } + const std::string& getSourceFile() const { return sourceFile; } + void addSourceText(const char* text, size_t len) { sourceText.append(text, len); } + const std::string& getSourceText() const { return sourceText; } + const std::map& getIncludeText() const { return includeText; } + void addIncludeText(const char* name, const char* text, size_t len) { includeText[name].assign(text,len); } + void addProcesses(const std::vector& p) + { + for (int i = 0; i < (int)p.size(); ++i) + processes.addProcess(p[i]); + } + void addProcess(const std::string& process) { processes.addProcess(process); } + void addProcessArgument(const std::string& arg) { processes.addArgument(arg); } + const std::vector& getProcesses() const { return processes.getProcesses(); } + unsigned long long getUniqueId() const { return uniqueId; } + void setUniqueId(unsigned long long id) { uniqueId = id; } + + // Certain explicit conversions are allowed conditionally +#ifdef GLSLANG_WEB + bool getArithemeticInt8Enabled() const { return false; } + bool getArithemeticInt16Enabled() const { return false; } + bool getArithemeticFloat16Enabled() const { return false; } + void updateNumericFeature(TNumericFeatures::feature f, bool on) { } +#else + bool getArithemeticInt8Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int8); + } + bool getArithemeticInt16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int16); + } + + bool getArithemeticFloat16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_half_float) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_float16); + } + void updateNumericFeature(TNumericFeatures::feature f, bool on) + { on ? numericFeatures.insert(f) : numericFeatures.erase(f); } +#endif + +protected: + TIntermSymbol* addSymbol(long long Id, const TString&, const TType&, const TConstUnionArray&, TIntermTyped* subtree, const TSourceLoc&); + void error(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void warn(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void mergeCallGraphs(TInfoSink&, TIntermediate&); + void mergeModes(TInfoSink&, TIntermediate&); + void mergeTrees(TInfoSink&, TIntermediate&); + void seedIdMap(TIdMaps& idMaps, long long& IdShift); + void remapIds(const TIdMaps& idMaps, long long idShift, TIntermediate&); + void mergeBodies(TInfoSink&, TIntermSequence& globals, const TIntermSequence& unitGlobals); + void mergeLinkerObjects(TInfoSink&, TIntermSequence& linkerObjects, const TIntermSequence& unitLinkerObjects, EShLanguage); + void mergeBlockDefinitions(TInfoSink&, TIntermSymbol* block, TIntermSymbol* unitBlock, TIntermediate* unitRoot); + void mergeImplicitArraySizes(TType&, const TType&); + void mergeErrorCheck(TInfoSink&, const TIntermSymbol&, const TIntermSymbol&, EShLanguage); + void checkCallGraphCycles(TInfoSink&); + void checkCallGraphBodies(TInfoSink&, bool keepUncalled); + void inOutLocationCheck(TInfoSink&); + void sharedBlockCheck(TInfoSink&); + bool userOutputUsed() const; + bool isSpecializationOperation(const TIntermOperator&) const; + bool isNonuniformPropagating(TOperator) const; + bool promoteUnary(TIntermUnary&); + bool promoteBinary(TIntermBinary&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, TSymbolTable&, const TString&); + bool promoteAggregate(TIntermAggregate&); + void pushSelector(TIntermSequence&, const TVectorSelector&, const TSourceLoc&); + void pushSelector(TIntermSequence&, const TMatrixSelector&, const TSourceLoc&); + bool specConstantPropagates(const TIntermTyped&, const TIntermTyped&); + void performTextureUpgradeAndSamplerRemovalTransformation(TIntermNode* root); + bool isConversionAllowed(TOperator op, TIntermTyped* node) const; + std::tuple getConversionDestinationType(TBasicType type0, TBasicType type1, TOperator op) const; + + static const char* getResourceName(TResourceType); + + const EShLanguage language; // stage, known at construction time + std::string entryPointName; + std::string entryPointMangledName; + typedef std::list TGraph; + TGraph callGraph; + + EProfile profile; // source profile + int version; // source version + SpvVersion spvVersion; + TIntermNode* treeRoot; + std::set requestedExtensions; // cumulation of all enabled or required extensions; not connected to what subset of the shader used them + MustBeAssigned resources; + int numEntryPoints; + int numErrors; + int numPushConstants; + bool recursive; + bool invertY; + bool dxPositionW; + bool enhancedMsgs; + bool debugInfo; + bool useStorageBuffer; + bool invariantAll; + bool nanMinMaxClamp; // true if desiring min/max/clamp to favor non-NaN over NaN + bool depthReplacing; + bool stencilReplacing; + int localSize[3]; + bool localSizeNotDefault[3]; + int localSizeSpecId[3]; + unsigned long long uniqueId; + + std::string globalUniformBlockName; + std::string atomicCounterBlockName; + unsigned int globalUniformBlockSet; + unsigned int globalUniformBlockBinding; + unsigned int atomicCounterBlockSet; + +#ifndef GLSLANG_WEB +public: + const char* const implicitThisName; + const char* const implicitCounterName; +protected: + EShSource source; // source language, known a bit later + bool useVulkanMemoryModel; + int invocations; + int vertices; + TLayoutGeometry inputPrimitive; + TLayoutGeometry outputPrimitive; + bool pixelCenterInteger; + bool originUpperLeft; + bool texCoordBuiltinRedeclared; + TVertexSpacing vertexSpacing; + TVertexOrder vertexOrder; + TInterlockOrdering interlockOrdering; + bool pointMode; + bool earlyFragmentTests; + bool postDepthCoverage; + bool earlyAndLateFragmentTestsAMD; + TLayoutDepth depthLayout; + TLayoutStencil stencilLayout; + bool hlslFunctionality1; + int blendEquations; // an 'or'ing of masks of shifts of TBlendEquationShift + bool xfbMode; + std::vector xfbBuffers; // all the data we need to track per xfb buffer + bool multiStream; + bool layoutOverrideCoverage; + bool geoPassthroughEXT; + int numShaderRecordBlocks; + ComputeDerivativeMode computeDerivativeMode; + int primitives; + int numTaskNVBlocks; + bool layoutPrimitiveCulling; + int numTaskEXTPayloads; + + // Base shift values + std::array shiftBinding; + + // Per-descriptor-set shift values + std::array, EResCount> shiftBindingForSet; + + std::vector resourceSetBinding; + bool autoMapBindings; + bool autoMapLocations; + bool flattenUniformArrays; + bool useUnknownFormat; + bool hlslOffsets; + bool hlslIoMapping; + bool useVariablePointers; + + std::set semanticNameSet; + + EShTextureSamplerTransformMode textureSamplerTransformMode; + + bool needToLegalize; + bool binaryDoubleOutput; + bool subgroupUniformControlFlow; + bool usePhysicalStorageBuffer; + + TSpirvRequirement* spirvRequirement; + TSpirvExecutionMode* spirvExecutionMode; + + std::unordered_map uniformLocationOverrides; + int uniformLocationBase; + TNumericFeatures numericFeatures; +#endif + std::unordered_map blockBackingOverrides; + + std::unordered_set usedConstantId; // specialization constant ids used + std::vector usedAtomics; // sets of bindings used by atomic counters + std::vector usedIo[4]; // sets of used locations, one for each of in, out, uniform, and buffers + std::vector usedIoRT[2]; // sets of used location, one for rayPayload/rayPayloadIN and other + // for callableData/callableDataIn + // set of names of statically read/written I/O that might need extra checking + std::set ioAccessed; + + // source code of shader, useful as part of debug information + std::string sourceFile; + std::string sourceText; + + // Included text. First string is a name, second is the included text + std::map includeText; + + // for OpModuleProcessed, or equivalent + TProcesses processes; + +private: + void operator=(TIntermediate&); // prevent assignments +}; + +} // end namespace glslang + +#endif // _LOCAL_INTERMEDIATE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/parseVersions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/parseVersions.h new file mode 100644 index 0000000..3c52ff1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/parseVersions.h @@ -0,0 +1,240 @@ +// +// Copyright (C) 2015-2018 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// This is implemented in Versions.cpp + +#ifndef _PARSE_VERSIONS_INCLUDED_ +#define _PARSE_VERSIONS_INCLUDED_ + +#include "../Public/ShaderLang.h" +#include "../Include/InfoSink.h" +#include "Scan.h" + +#include + +namespace glslang { + +// +// Base class for parse helpers. +// This just has version-related information and checking. +// This class should be sufficient for preprocessing. +// +class TParseVersions { +public: + TParseVersions(TIntermediate& interm, int version, EProfile profile, + const SpvVersion& spvVersion, EShLanguage language, TInfoSink& infoSink, + bool forwardCompatible, EShMessages messages) + : +#if !defined(GLSLANG_WEB) + forwardCompatible(forwardCompatible), + profile(profile), +#endif + infoSink(infoSink), version(version), + language(language), + spvVersion(spvVersion), + intermediate(interm), messages(messages), numErrors(0), currentScanner(0) { } + virtual ~TParseVersions() { } + void requireStage(const TSourceLoc&, EShLanguageMask, const char* featureDesc); + void requireStage(const TSourceLoc&, EShLanguage, const char* featureDesc); +#ifdef GLSLANG_WEB + const EProfile profile = EEsProfile; + bool isEsProfile() const { return true; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc) + { + if (! (EEsProfile & profileMask)) + error(loc, "not supported with this profile:", featureDesc, ProfileName(profile)); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc) + { + if ((EEsProfile & profileMask) && (minVersion == 0 || version < minVersion)) + error(loc, "not supported for this version or the enabled extensions", featureDesc, ""); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc) + { + profileRequires(loc, profileMask, minVersion, extension ? 1 : 0, &extension, featureDesc); + } + void initializeExtensionBehavior() { } + void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc) { } + void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc) { } + void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + TExtensionBehavior getExtensionBehavior(const char*) { return EBhMissing; } + bool extensionTurnedOn(const char* const extension) { return false; } + bool extensionsTurnedOn(int numExtensions, const char* const extensions[]) { return false; } + void updateExtensionBehavior(int line, const char* const extension, const char* behavior) { } + void updateExtensionBehavior(const char* const extension, TExtensionBehavior) { } + void checkExtensionStage(const TSourceLoc&, const char* const extension) { } + void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior) { } + void fullIntegerCheck(const TSourceLoc&, const char* op) { } + void doubleCheck(const TSourceLoc&, const char* op) { } + bool float16Arithmetic() { return false; } + void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int16Arithmetic() { return false; } + void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int8Arithmetic() { return false; } + void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + void int64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + bool relaxedErrors() const { return false; } + bool suppressWarnings() const { return true; } + bool isForwardCompatible() const { return false; } +#else + bool forwardCompatible; // true if errors are to be given for use of deprecated features + EProfile profile; // the declared profile in the shader (core by default) + bool isEsProfile() const { return profile == EEsProfile; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc); + virtual void initializeExtensionBehavior(); + virtual void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc); + virtual void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc); + virtual void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual TExtensionBehavior getExtensionBehavior(const char*); + virtual bool extensionTurnedOn(const char* const extension); + virtual bool extensionsTurnedOn(int numExtensions, const char* const extensions[]); + virtual void updateExtensionBehavior(int line, const char* const extension, const char* behavior); + virtual void updateExtensionBehavior(const char* const extension, TExtensionBehavior); + virtual bool checkExtensionsRequested(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void checkExtensionStage(const TSourceLoc&, const char* const extension); + virtual void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior); + virtual void fullIntegerCheck(const TSourceLoc&, const char* op); + + virtual void unimplemented(const TSourceLoc&, const char* featureDesc); + virtual void doubleCheck(const TSourceLoc&, const char* op); + virtual void float16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void float16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool float16Arithmetic(); + virtual void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int16Arithmetic(); + virtual void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int8ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int8Arithmetic(); + virtual void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void float16OpaqueCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void int64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt8Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void fcoopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void intcoopmatCheck(const TSourceLoc&, const char *op, bool builtIn = false); + bool relaxedErrors() const { return (messages & EShMsgRelaxedErrors) != 0; } + bool suppressWarnings() const { return (messages & EShMsgSuppressWarnings) != 0; } + bool isForwardCompatible() const { return forwardCompatible; } +#endif // GLSLANG_WEB + virtual void spvRemoved(const TSourceLoc&, const char* op); + virtual void vulkanRemoved(const TSourceLoc&, const char* op); + virtual void requireVulkan(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char *op, unsigned int version); + + +#if defined(GLSLANG_WEB) && !defined(GLSLANG_WEB_DEVEL) + void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } + void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } +#else + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; +#endif + + void addError() { ++numErrors; } + int getNumErrors() const { return numErrors; } + + void setScanner(TInputScanner* scanner) { currentScanner = scanner; } + TInputScanner* getScanner() const { return currentScanner; } + const TSourceLoc& getCurrentLoc() const { return currentScanner->getSourceLoc(); } + void setCurrentLine(int line) { currentScanner->setLine(line); } + void setCurrentColumn(int col) { currentScanner->setColumn(col); } + void setCurrentSourceName(const char* name) { currentScanner->setFile(name); } + void setCurrentString(int string) { currentScanner->setString(string); } + + void getPreamble(std::string&); +#ifdef ENABLE_HLSL + bool isReadingHLSL() const { return (messages & EShMsgReadHlsl) == EShMsgReadHlsl; } + bool hlslEnable16BitTypes() const { return (messages & EShMsgHlslEnable16BitTypes) != 0; } + bool hlslDX9Compatible() const { return (messages & EShMsgHlslDX9Compatible) != 0; } +#else + bool isReadingHLSL() const { return false; } +#endif + + TInfoSink& infoSink; + + // compilation mode + int version; // version, updated by #version in the shader + EShLanguage language; // really the stage + SpvVersion spvVersion; + TIntermediate& intermediate; // helper for making and hooking up pieces of the parse tree + +protected: + TMap extensionBehavior; // for each extension string, what its current behavior is + TMap extensionMinSpv; // for each extension string, store minimum spirv required + EShMessages messages; // errors/warnings/rule-sets + int numErrors; // number of compile-time errors encountered + TInputScanner* currentScanner; + +private: + explicit TParseVersions(const TParseVersions&); + TParseVersions& operator=(const TParseVersions&); +}; + +} // end namespace glslang + +#endif // _PARSE_VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/preprocessor/PpContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/preprocessor/PpContext.h new file mode 100644 index 0000000..714b5ea --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/preprocessor/PpContext.h @@ -0,0 +1,703 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PPCONTEXT_H +#define PPCONTEXT_H + +#include +#include +#include + +#include "../ParseHelper.h" +#include "PpTokens.h" + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4127) +#endif + +namespace glslang { + +class TPpToken { +public: + TPpToken() { clear(); } + void clear() + { + space = false; + i64val = 0; + loc.init(); + name[0] = 0; + } + + // Used for comparing macro definitions, so checks what is relevant for that. + bool operator==(const TPpToken& right) const + { + return space == right.space && + ival == right.ival && dval == right.dval && i64val == right.i64val && + strncmp(name, right.name, MaxTokenLength) == 0; + } + bool operator!=(const TPpToken& right) const { return ! operator==(right); } + + TSourceLoc loc; + // True if a space (for white space or a removed comment) should also be + // recognized, in front of the token returned: + bool space; + // Numeric value of the token: + union { + int ival; + double dval; + long long i64val; + }; + // Text string of the token: + char name[MaxTokenLength + 1]; +}; + +class TStringAtomMap { +// +// Implementation is in PpAtom.cpp +// +// Maintain a bi-directional mapping between relevant preprocessor strings and +// "atoms" which a unique integers (small, contiguous, not hash-like) per string. +// +public: + TStringAtomMap(); + + // Map string -> atom. + // Return 0 if no existing string. + int getAtom(const char* s) const + { + auto it = atomMap.find(s); + return it == atomMap.end() ? 0 : it->second; + } + + // Map a new or existing string -> atom, inventing a new atom if necessary. + int getAddAtom(const char* s) + { + int atom = getAtom(s); + if (atom == 0) { + atom = nextAtom++; + addAtomFixed(s, atom); + } + return atom; + } + + // Map atom -> string. + const char* getString(int atom) const { return stringMap[atom]->c_str(); } + +protected: + TStringAtomMap(TStringAtomMap&); + TStringAtomMap& operator=(TStringAtomMap&); + + TUnorderedMap atomMap; + TVector stringMap; // these point into the TString in atomMap + int nextAtom; + + // Bad source characters can lead to bad atoms, so gracefully handle those by + // pre-filling the table with them (to avoid if tests later). + TString badToken; + + // Add bi-directional mappings: + // - string -> atom + // - atom -> string + void addAtomFixed(const char* s, int atom) + { + auto it = atomMap.insert(std::pair(s, atom)).first; + if (stringMap.size() < (size_t)atom + 1) + stringMap.resize(atom + 100, &badToken); + stringMap[atom] = &it->first; + } +}; + +class TInputScanner; + +enum MacroExpandResult { + MacroExpandNotStarted, // macro not expanded, which might not be an error + MacroExpandError, // a clear error occurred while expanding, no expansion + MacroExpandStarted, // macro expansion process has started + MacroExpandUndef // macro is undefined and will be expanded +}; + +// This class is the result of turning a huge pile of C code communicating through globals +// into a class. This was done to allowing instancing to attain thread safety. +// Don't expect too much in terms of OO design. +class TPpContext { +public: + TPpContext(TParseContextBase&, const std::string& rootFileName, TShader::Includer&); + virtual ~TPpContext(); + + void setPreamble(const char* preamble, size_t length); + + int tokenize(TPpToken& ppToken); + int tokenPaste(int token, TPpToken&); + + class tInput { + public: + tInput(TPpContext* p) : done(false), pp(p) { } + virtual ~tInput() { } + + virtual int scan(TPpToken*) = 0; + virtual int getch() = 0; + virtual void ungetch() = 0; + virtual bool peekPasting() { return false; } // true when about to see ## + virtual bool peekContinuedPasting(int) { return false; } // true when non-spaced tokens can paste + virtual bool endOfReplacementList() { return false; } // true when at the end of a macro replacement list (RHS of #define) + virtual bool isMacroInput() { return false; } + + // Will be called when we start reading tokens from this instance + virtual void notifyActivated() {} + // Will be called when we do not read tokens from this instance anymore + virtual void notifyDeleted() {} + protected: + bool done; + TPpContext* pp; + }; + + void setInput(TInputScanner& input, bool versionWillBeError); + + void pushInput(tInput* in) + { + inputStack.push_back(in); + in->notifyActivated(); + } + void popInput() + { + inputStack.back()->notifyDeleted(); + delete inputStack.back(); + inputStack.pop_back(); + } + + // + // From PpTokens.cpp + // + + // Capture the needed parts of a token stream for macro recording/playback. + class TokenStream { + public: + // Manage a stream of these 'Token', which capture the relevant parts + // of a TPpToken, plus its atom. + class Token { + public: + Token(int atom, const TPpToken& ppToken) : + atom(atom), + space(ppToken.space), + i64val(ppToken.i64val), + name(ppToken.name) { } + int get(TPpToken& ppToken) + { + ppToken.clear(); + ppToken.space = space; + ppToken.i64val = i64val; + snprintf(ppToken.name, sizeof(ppToken.name), "%s", name.c_str()); + return atom; + } + bool isAtom(int a) const { return atom == a; } + int getAtom() const { return atom; } + bool nonSpaced() const { return !space; } + protected: + Token() {} + int atom; + bool space; // did a space precede the token? + long long i64val; + TString name; + }; + + TokenStream() : currentPos(0) { } + + void putToken(int token, TPpToken* ppToken); + bool peekToken(int atom) { return !atEnd() && stream[currentPos].isAtom(atom); } + bool peekContinuedPasting(int atom) + { + // This is basically necessary because, for example, the PP + // tokenizer only accepts valid numeric-literals plus suffixes, so + // separates numeric-literals plus bad suffix into two tokens, which + // should get both pasted together as one token when token pasting. + // + // The following code is a bit more generalized than the above example. + if (!atEnd() && atom == PpAtomIdentifier && stream[currentPos].nonSpaced()) { + switch(stream[currentPos].getAtom()) { + case PpAtomConstInt: + case PpAtomConstUint: + case PpAtomConstInt64: + case PpAtomConstUint64: + case PpAtomConstInt16: + case PpAtomConstUint16: + case PpAtomConstFloat: + case PpAtomConstDouble: + case PpAtomConstFloat16: + case PpAtomConstString: + case PpAtomIdentifier: + return true; + default: + break; + } + } + + return false; + } + int getToken(TParseContextBase&, TPpToken*); + bool atEnd() { return currentPos >= stream.size(); } + bool peekTokenizedPasting(bool lastTokenPastes); + bool peekUntokenizedPasting(); + void reset() { currentPos = 0; } + + protected: + TVector stream; + size_t currentPos; + }; + + // + // From Pp.cpp + // + + struct MacroSymbol { + MacroSymbol() : functionLike(0), busy(0), undef(0) { } + TVector args; + TokenStream body; + unsigned functionLike : 1; // 0 means object-like, 1 means function-like + unsigned busy : 1; + unsigned undef : 1; + }; + + typedef TMap TSymbolMap; + TSymbolMap macroDefs; // map atoms to macro definitions + MacroSymbol* lookupMacroDef(int atom) + { + auto existingMacroIt = macroDefs.find(atom); + return (existingMacroIt == macroDefs.end()) ? nullptr : &(existingMacroIt->second); + } + void addMacroDef(int atom, MacroSymbol& macroDef) { macroDefs[atom] = macroDef; } + +protected: + TPpContext(TPpContext&); + TPpContext& operator=(TPpContext&); + + TStringAtomMap atomStrings; + char* preamble; // string to parse, all before line 1 of string 0, it is 0 if no preamble + int preambleLength; + char** strings; // official strings of shader, starting a string 0 line 1 + size_t* lengths; + int numStrings; // how many official strings there are + int currentString; // which string we're currently parsing (-1 for preamble) + + // Scanner data: + int previous_token; + TParseContextBase& parseContext; + + // Get the next token from *stack* of input sources, popping input sources + // that are out of tokens, down until an input source is found that has a token. + // Return EndOfInput when there are no more tokens to be found by doing this. + int scanToken(TPpToken* ppToken) + { + int token = EndOfInput; + + while (! inputStack.empty()) { + token = inputStack.back()->scan(ppToken); + if (token != EndOfInput || inputStack.empty()) + break; + popInput(); + } + + return token; + } + int getChar() { return inputStack.back()->getch(); } + void ungetChar() { inputStack.back()->ungetch(); } + bool peekPasting() { return !inputStack.empty() && inputStack.back()->peekPasting(); } + bool peekContinuedPasting(int a) + { + return !inputStack.empty() && inputStack.back()->peekContinuedPasting(a); + } + bool endOfReplacementList() { return inputStack.empty() || inputStack.back()->endOfReplacementList(); } + bool isMacroInput() { return inputStack.size() > 0 && inputStack.back()->isMacroInput(); } + + static const int maxIfNesting = 65; + + int ifdepth; // current #if-#else-#endif nesting in the cpp.c file (pre-processor) + bool elseSeen[maxIfNesting]; // Keep a track of whether an else has been seen at a particular depth + int elsetracker; // #if-#else and #endif constructs...Counter. + + class tMacroInput : public tInput { + public: + tMacroInput(TPpContext* pp) : tInput(pp), prepaste(false), postpaste(false) { } + virtual ~tMacroInput() + { + for (size_t i = 0; i < args.size(); ++i) + delete args[i]; + for (size_t i = 0; i < expandedArgs.size(); ++i) + delete expandedArgs[i]; + } + + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + bool peekPasting() override { return prepaste; } + bool peekContinuedPasting(int a) override { return mac->body.peekContinuedPasting(a); } + bool endOfReplacementList() override { return mac->body.atEnd(); } + bool isMacroInput() override { return true; } + + MacroSymbol *mac; + TVector args; + TVector expandedArgs; + + protected: + bool prepaste; // true if we are just before ## + bool postpaste; // true if we are right after ## + }; + + class tMarkerInput : public tInput { + public: + tMarkerInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override + { + if (done) + return EndOfInput; + done = true; + + return marker; + } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + static const int marker = -3; + }; + + class tZeroInput : public tInput { + public: + tZeroInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + }; + + std::vector inputStack; + bool errorOnVersion; + bool versionSeen; + + // + // from Pp.cpp + // + + // Used to obtain #include content. + TShader::Includer& includer; + + int CPPdefine(TPpToken * ppToken); + int CPPundef(TPpToken * ppToken); + int CPPelse(int matchelse, TPpToken * ppToken); + int extraTokenCheck(int atom, TPpToken* ppToken, int token); + int eval(int token, int precedence, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int evalToToken(int token, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int CPPif (TPpToken * ppToken); + int CPPifdef(int defined, TPpToken * ppToken); + int CPPinclude(TPpToken * ppToken); + int CPPline(TPpToken * ppToken); + int CPPerror(TPpToken * ppToken); + int CPPpragma(TPpToken * ppToken); + int CPPversion(TPpToken * ppToken); + int CPPextension(TPpToken * ppToken); + int readCPPline(TPpToken * ppToken); + int scanHeaderName(TPpToken* ppToken, char delimit); + TokenStream* PrescanMacroArg(TokenStream&, TPpToken*, bool newLineOkay); + MacroExpandResult MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOkay); + + // + // From PpTokens.cpp + // + void pushTokenStreamInput(TokenStream&, bool pasting = false); + void UngetToken(int token, TPpToken*); + + class tTokenInput : public tInput { + public: + tTokenInput(TPpContext* pp, TokenStream* t, bool prepasting) : + tInput(pp), + tokens(t), + lastTokenPastes(prepasting) { } + virtual int scan(TPpToken *ppToken) override { return tokens->getToken(pp->parseContext, ppToken); } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + virtual bool peekPasting() override { return tokens->peekTokenizedPasting(lastTokenPastes); } + bool peekContinuedPasting(int a) override { return tokens->peekContinuedPasting(a); } + protected: + TokenStream* tokens; + bool lastTokenPastes; // true if the last token in the input is to be pasted, rather than consumed as a token + }; + + class tUngotTokenInput : public tInput { + public: + tUngotTokenInput(TPpContext* pp, int t, TPpToken* p) : tInput(pp), token(t), lval(*p) { } + virtual int scan(TPpToken *) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + protected: + int token; + TPpToken lval; + }; + + // + // From PpScanner.cpp + // + class tStringInput : public tInput { + public: + tStringInput(TPpContext* pp, TInputScanner& i) : tInput(pp), input(&i) { } + virtual int scan(TPpToken*) override; + + // Scanner used to get source stream characters. + // - Escaped newlines are handled here, invisibly to the caller. + // - All forms of newline are handled, and turned into just a '\n'. + int getch() override + { + int ch = input->get(); + + if (ch == '\\') { + // Move past escaped newlines, as many as sequentially exist + do { + if (input->peek() == '\r' || input->peek() == '\n') { + bool allowed = pp->parseContext.lineContinuationCheck(input->getSourceLoc(), pp->inComment); + if (! allowed && pp->inComment) + return '\\'; + + // escape one newline now + ch = input->get(); + int nextch = input->get(); + if (ch == '\r' && nextch == '\n') + ch = input->get(); + else + ch = nextch; + } else + return '\\'; + } while (ch == '\\'); + } + + // handle any non-escaped newline + if (ch == '\r' || ch == '\n') { + if (ch == '\r' && input->peek() == '\n') + input->get(); + return '\n'; + } + + return ch; + } + + // Scanner used to backup the source stream characters. Newlines are + // handled here, invisibly to the caller, meaning have to undo exactly + // what getch() above does (e.g., don't leave things in the middle of a + // sequence of escaped newlines). + void ungetch() override + { + input->unget(); + + do { + int ch = input->peek(); + if (ch == '\r' || ch == '\n') { + if (ch == '\n') { + // correct for two-character newline + input->unget(); + if (input->peek() != '\r') + input->get(); + } + // now in front of a complete newline, move past an escape character + input->unget(); + if (input->peek() == '\\') + input->unget(); + else { + input->get(); + break; + } + } else + break; + } while (true); + } + + protected: + TInputScanner* input; + }; + + // Holds a reference to included file data, as well as a + // prologue and an epilogue string. This can be scanned using the tInput + // interface and acts as a single source string. + class TokenizableIncludeFile : public tInput { + public: + // Copies prologue and epilogue. The includedFile must remain valid + // until this TokenizableIncludeFile is no longer used. + TokenizableIncludeFile(const TSourceLoc& startLoc, + const std::string& prologue, + TShader::Includer::IncludeResult* includedFile, + const std::string& epilogue, + TPpContext* pp) + : tInput(pp), + prologue_(prologue), + epilogue_(epilogue), + includedFile_(includedFile), + scanner(3, strings, lengths, nullptr, 0, 0, true), + prevScanner(nullptr), + stringInput(pp, scanner) + { + strings[0] = prologue_.data(); + strings[1] = includedFile_->headerData; + strings[2] = epilogue_.data(); + + lengths[0] = prologue_.size(); + lengths[1] = includedFile_->headerLength; + lengths[2] = epilogue_.size(); + + scanner.setLine(startLoc.line); + scanner.setString(startLoc.string); + + scanner.setFile(startLoc.getFilenameStr(), 0); + scanner.setFile(startLoc.getFilenameStr(), 1); + scanner.setFile(startLoc.getFilenameStr(), 2); + } + + // tInput methods: + int scan(TPpToken* t) override { return stringInput.scan(t); } + int getch() override { return stringInput.getch(); } + void ungetch() override { stringInput.ungetch(); } + + void notifyActivated() override + { + prevScanner = pp->parseContext.getScanner(); + pp->parseContext.setScanner(&scanner); + pp->push_include(includedFile_); + } + + void notifyDeleted() override + { + pp->parseContext.setScanner(prevScanner); + pp->pop_include(); + } + + private: + TokenizableIncludeFile& operator=(const TokenizableIncludeFile&); + + // Stores the prologue for this string. + const std::string prologue_; + + // Stores the epilogue for this string. + const std::string epilogue_; + + // Points to the IncludeResult that this TokenizableIncludeFile represents. + TShader::Includer::IncludeResult* includedFile_; + + // Will point to prologue_, includedFile_->headerData and epilogue_ + // This is passed to scanner constructor. + // These do not own the storage and it must remain valid until this + // object has been destroyed. + const char* strings[3]; + // Length of str_, passed to scanner constructor. + size_t lengths[3]; + // Scans over str_. + TInputScanner scanner; + // The previous effective scanner before the scanner in this instance + // has been activated. + TInputScanner* prevScanner; + // Delegate object implementing the tInput interface. + tStringInput stringInput; + }; + + int ScanFromString(char* s); + void missingEndifCheck(); + int lFloatConst(int len, int ch, TPpToken* ppToken); + int characterLiteral(TPpToken* ppToken); + + void push_include(TShader::Includer::IncludeResult* result) + { + currentSourceFile = result->headerName; + includeStack.push(result); + } + + void pop_include() + { + TShader::Includer::IncludeResult* include = includeStack.top(); + includeStack.pop(); + includer.releaseInclude(include); + if (includeStack.empty()) { + currentSourceFile = rootFileName; + } else { + currentSourceFile = includeStack.top()->headerName; + } + } + + bool inComment; + std::string rootFileName; + std::stack includeStack; + std::string currentSourceFile; + + std::istringstream strtodStream; + bool disableEscapeSequences; +}; + +} // end namespace glslang + +#endif // PPCONTEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/preprocessor/PpTokens.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/preprocessor/PpTokens.h new file mode 100644 index 0000000..7b0f815 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/preprocessor/PpTokens.h @@ -0,0 +1,179 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PARSER_H +#define PARSER_H + +namespace glslang { + +// Multi-character tokens +enum EFixedAtoms { + // single character tokens get their own char value as their token; start here for multi-character tokens + PpAtomMaxSingle = 127, + + // replace bad character tokens with this, to avoid accidental aliasing with the below + PpAtomBadToken, + + // Operators + + PPAtomAddAssign, + PPAtomSubAssign, + PPAtomMulAssign, + PPAtomDivAssign, + PPAtomModAssign, + + PpAtomRight, + PpAtomLeft, + + PpAtomRightAssign, + PpAtomLeftAssign, + PpAtomAndAssign, + PpAtomOrAssign, + PpAtomXorAssign, + + PpAtomAnd, + PpAtomOr, + PpAtomXor, + + PpAtomEQ, + PpAtomNE, + PpAtomGE, + PpAtomLE, + + PpAtomDecrement, + PpAtomIncrement, + + PpAtomColonColon, + + PpAtomPaste, + + // Constants + + PpAtomConstInt, + PpAtomConstUint, + PpAtomConstInt64, + PpAtomConstUint64, + PpAtomConstInt16, + PpAtomConstUint16, + PpAtomConstFloat, + PpAtomConstDouble, + PpAtomConstFloat16, + PpAtomConstString, + + // Identifiers + PpAtomIdentifier, + + // preprocessor "keywords" + + PpAtomDefine, + PpAtomUndef, + + PpAtomIf, + PpAtomIfdef, + PpAtomIfndef, + PpAtomElse, + PpAtomElif, + PpAtomEndif, + + PpAtomLine, + PpAtomPragma, + PpAtomError, + + // #version ... + PpAtomVersion, + PpAtomCore, + PpAtomCompatibility, + PpAtomEs, + + // #extension + PpAtomExtension, + + // __LINE__, __FILE__, __VERSION__ + + PpAtomLineMacro, + PpAtomFileMacro, + PpAtomVersionMacro, + + // #include + PpAtomInclude, + + PpAtomLast, +}; + +} // end namespace glslang + +#endif /* not PARSER_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/propagateNoContraction.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/propagateNoContraction.h new file mode 100644 index 0000000..8521ad7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/propagateNoContraction.h @@ -0,0 +1,55 @@ +// +// Copyright (C) 2015-2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Visit the nodes in the glslang intermediate tree representation to +// propagate 'noContraction' qualifier. +// + +#pragma once + +#include "../Include/intermediate.h" + +namespace glslang { + +// Propagates the 'precise' qualifier for objects (objects marked with +// 'noContraction' qualifier) from the shader source specified 'precise' +// variables to all the involved objects, and add 'noContraction' qualifier for +// the involved arithmetic operations. +// Note that the same qualifier: 'noContraction' is used in both object nodes +// and arithmetic operation nodes, but has different meaning. For object nodes, +// 'noContraction' means the object is 'precise'; and for arithmetic operation +// nodes, it means the operation should not be contracted. +void PropagateNoContraction(const glslang::TIntermediate& intermediate); +}; diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/reflection.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/reflection.h new file mode 100644 index 0000000..bfd5452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/MachineIndependent/reflection.h @@ -0,0 +1,223 @@ +// +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _REFLECTION_INCLUDED +#define _REFLECTION_INCLUDED + +#include "../Public/ShaderLang.h" +#include "../Include/Types.h" + +#include +#include + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +namespace glslang { + +class TIntermediate; +class TIntermAggregate; +class TReflectionTraverser; + +// The full reflection database +class TReflection { +public: + TReflection(EShReflectionOptions opts, EShLanguage first, EShLanguage last) + : options(opts), firstStage(first), lastStage(last), badReflection(TObjectReflection::badReflection()) + { + for (int dim=0; dim<3; ++dim) + localSize[dim] = 0; + } + + virtual ~TReflection() {} + + // grow the reflection stage by stage + bool addStage(EShLanguage, const TIntermediate&); + + // for mapping a uniform index to a uniform object's description + int getNumUniforms() { return (int)indexToUniform.size(); } + const TObjectReflection& getUniform(int i) const + { + if (i >= 0 && i < (int)indexToUniform.size()) + return indexToUniform[i]; + else + return badReflection; + } + + // for mapping a block index to the block's description + int getNumUniformBlocks() const { return (int)indexToUniformBlock.size(); } + const TObjectReflection& getUniformBlock(int i) const + { + if (i >= 0 && i < (int)indexToUniformBlock.size()) + return indexToUniformBlock[i]; + else + return badReflection; + } + + // for mapping an pipeline input index to the input's description + int getNumPipeInputs() { return (int)indexToPipeInput.size(); } + const TObjectReflection& getPipeInput(int i) const + { + if (i >= 0 && i < (int)indexToPipeInput.size()) + return indexToPipeInput[i]; + else + return badReflection; + } + + // for mapping an pipeline output index to the output's description + int getNumPipeOutputs() { return (int)indexToPipeOutput.size(); } + const TObjectReflection& getPipeOutput(int i) const + { + if (i >= 0 && i < (int)indexToPipeOutput.size()) + return indexToPipeOutput[i]; + else + return badReflection; + } + + // for mapping from an atomic counter to the uniform index + int getNumAtomicCounters() const { return (int)atomicCounterUniformIndices.size(); } + const TObjectReflection& getAtomicCounter(int i) const + { + if (i >= 0 && i < (int)atomicCounterUniformIndices.size()) + return getUniform(atomicCounterUniformIndices[i]); + else + return badReflection; + } + + // for mapping a buffer variable index to a buffer variable object's description + int getNumBufferVariables() { return (int)indexToBufferVariable.size(); } + const TObjectReflection& getBufferVariable(int i) const + { + if (i >= 0 && i < (int)indexToBufferVariable.size()) + return indexToBufferVariable[i]; + else + return badReflection; + } + + // for mapping a storage block index to the storage block's description + int getNumStorageBuffers() const { return (int)indexToBufferBlock.size(); } + const TObjectReflection& getStorageBufferBlock(int i) const + { + if (i >= 0 && i < (int)indexToBufferBlock.size()) + return indexToBufferBlock[i]; + else + return badReflection; + } + + // for mapping any name to its index (block names, uniform names and input/output names) + int getIndex(const char* name) const + { + TNameToIndex::const_iterator it = nameToIndex.find(name); + if (it == nameToIndex.end()) + return -1; + else + return it->second; + } + + // see getIndex(const char*) + int getIndex(const TString& name) const { return getIndex(name.c_str()); } + + + // for mapping any name to its index (only pipe input/output names) + int getPipeIOIndex(const char* name, const bool inOrOut) const + { + TNameToIndex::const_iterator it = inOrOut ? pipeInNameToIndex.find(name) : pipeOutNameToIndex.find(name); + if (it == (inOrOut ? pipeInNameToIndex.end() : pipeOutNameToIndex.end())) + return -1; + else + return it->second; + } + + // see gePipeIOIndex(const char*, const bool) + int getPipeIOIndex(const TString& name, const bool inOrOut) const { return getPipeIOIndex(name.c_str(), inOrOut); } + + // Thread local size + unsigned getLocalSize(int dim) const { return dim <= 2 ? localSize[dim] : 0; } + + void dump(); + +protected: + friend class glslang::TReflectionTraverser; + + void buildCounterIndices(const TIntermediate&); + void buildUniformStageMask(const TIntermediate& intermediate); + void buildAttributeReflection(EShLanguage, const TIntermediate&); + + // Need a TString hash: typedef std::unordered_map TNameToIndex; + typedef std::map TNameToIndex; + typedef std::vector TMapIndexToReflection; + typedef std::vector TIndices; + + TMapIndexToReflection& GetBlockMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferBlock; + return indexToUniformBlock; + } + TMapIndexToReflection& GetVariableMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferVariable; + return indexToUniform; + } + + EShReflectionOptions options; + + EShLanguage firstStage; + EShLanguage lastStage; + + TObjectReflection badReflection; // return for queries of -1 or generally out of range; has expected descriptions with in it for this + TNameToIndex nameToIndex; // maps names to indexes; can hold all types of data: uniform/buffer and which function names have been processed + TNameToIndex pipeInNameToIndex; // maps pipe in names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TNameToIndex pipeOutNameToIndex; // maps pipe out names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TMapIndexToReflection indexToUniform; + TMapIndexToReflection indexToUniformBlock; + TMapIndexToReflection indexToBufferVariable; + TMapIndexToReflection indexToBufferBlock; + TMapIndexToReflection indexToPipeInput; + TMapIndexToReflection indexToPipeOutput; + TIndices atomicCounterUniformIndices; + + unsigned int localSize[3]; +}; + +} // end namespace glslang + +#endif // _REFLECTION_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/ResourceLimits.h new file mode 100644 index 0000000..f70be81 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/ResourceLimits.h @@ -0,0 +1,57 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ + +#include + +#include "../Include/ResourceLimits.h" + +// Return pointer to user-writable Resource to pass through API in +// future-proof way. +extern TBuiltInResource* GetResources(); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +extern const TBuiltInResource* GetDefaultResources(); + +// Returns the DefaultTBuiltInResource as a human-readable string. +std::string GetDefaultTBuiltInResourceString(); + +// Decodes the resource limits from |config| to |resources|. +void DecodeResourceLimits(TBuiltInResource* resources, char* config); + +#endif // _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/ShaderLang.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/ShaderLang.h new file mode 100644 index 0000000..90a5302 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/ShaderLang.h @@ -0,0 +1,987 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _COMPILER_INTERFACE_INCLUDED_ +#define _COMPILER_INTERFACE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../MachineIndependent/Versions.h" + +#include +#include + +#ifdef _WIN32 + #define C_DECL __cdecl +#else + #define C_DECL +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +// +// This is the platform independent interface between an OGL driver +// and the shading language compiler/linker. +// + +#ifdef __cplusplus + extern "C" { +#endif + +// +// Call before doing any other compiler/linker operations. +// +// (Call once per process, not once per thread.) +// +GLSLANG_EXPORT int ShInitialize(); + +// +// Call this at process shutdown to clean up memory. +// +GLSLANG_EXPORT int ShFinalize(); + +// +// Types of languages the compiler can consume. +// +typedef enum { + EShLangVertex, + EShLangTessControl, + EShLangTessEvaluation, + EShLangGeometry, + EShLangFragment, + EShLangCompute, + EShLangRayGen, + EShLangRayGenNV = EShLangRayGen, + EShLangIntersect, + EShLangIntersectNV = EShLangIntersect, + EShLangAnyHit, + EShLangAnyHitNV = EShLangAnyHit, + EShLangClosestHit, + EShLangClosestHitNV = EShLangClosestHit, + EShLangMiss, + EShLangMissNV = EShLangMiss, + EShLangCallable, + EShLangCallableNV = EShLangCallable, + EShLangTask, + EShLangTaskNV = EShLangTask, + EShLangMesh, + EShLangMeshNV = EShLangMesh, + LAST_ELEMENT_MARKER(EShLangCount), +} EShLanguage; // would be better as stage, but this is ancient now + +typedef enum : unsigned { + EShLangVertexMask = (1 << EShLangVertex), + EShLangTessControlMask = (1 << EShLangTessControl), + EShLangTessEvaluationMask = (1 << EShLangTessEvaluation), + EShLangGeometryMask = (1 << EShLangGeometry), + EShLangFragmentMask = (1 << EShLangFragment), + EShLangComputeMask = (1 << EShLangCompute), + EShLangRayGenMask = (1 << EShLangRayGen), + EShLangRayGenNVMask = EShLangRayGenMask, + EShLangIntersectMask = (1 << EShLangIntersect), + EShLangIntersectNVMask = EShLangIntersectMask, + EShLangAnyHitMask = (1 << EShLangAnyHit), + EShLangAnyHitNVMask = EShLangAnyHitMask, + EShLangClosestHitMask = (1 << EShLangClosestHit), + EShLangClosestHitNVMask = EShLangClosestHitMask, + EShLangMissMask = (1 << EShLangMiss), + EShLangMissNVMask = EShLangMissMask, + EShLangCallableMask = (1 << EShLangCallable), + EShLangCallableNVMask = EShLangCallableMask, + EShLangTaskMask = (1 << EShLangTask), + EShLangTaskNVMask = EShLangTaskMask, + EShLangMeshMask = (1 << EShLangMesh), + EShLangMeshNVMask = EShLangMeshMask, + LAST_ELEMENT_MARKER(EShLanguageMaskCount), +} EShLanguageMask; + +namespace glslang { + +class TType; + +typedef enum { + EShSourceNone, + EShSourceGlsl, // GLSL, includes ESSL (OpenGL ES GLSL) + EShSourceHlsl, // HLSL + LAST_ELEMENT_MARKER(EShSourceCount), +} EShSource; // if EShLanguage were EShStage, this could be EShLanguage instead + +typedef enum { + EShClientNone, // use when there is no client, e.g. for validation + EShClientVulkan, // as GLSL dialect, specifies KHR_vulkan_glsl extension + EShClientOpenGL, // as GLSL dialect, specifies ARB_gl_spirv extension + LAST_ELEMENT_MARKER(EShClientCount), +} EShClient; + +typedef enum { + EShTargetNone, + EShTargetSpv, // SPIR-V (preferred spelling) + EshTargetSpv = EShTargetSpv, // legacy spelling + LAST_ELEMENT_MARKER(EShTargetCount), +} EShTargetLanguage; + +typedef enum { + EShTargetVulkan_1_0 = (1 << 22), // Vulkan 1.0 + EShTargetVulkan_1_1 = (1 << 22) | (1 << 12), // Vulkan 1.1 + EShTargetVulkan_1_2 = (1 << 22) | (2 << 12), // Vulkan 1.2 + EShTargetVulkan_1_3 = (1 << 22) | (3 << 12), // Vulkan 1.3 + EShTargetOpenGL_450 = 450, // OpenGL + LAST_ELEMENT_MARKER(EShTargetClientVersionCount = 5), +} EShTargetClientVersion; + +typedef EShTargetClientVersion EshTargetClientVersion; + +typedef enum { + EShTargetSpv_1_0 = (1 << 16), // SPIR-V 1.0 + EShTargetSpv_1_1 = (1 << 16) | (1 << 8), // SPIR-V 1.1 + EShTargetSpv_1_2 = (1 << 16) | (2 << 8), // SPIR-V 1.2 + EShTargetSpv_1_3 = (1 << 16) | (3 << 8), // SPIR-V 1.3 + EShTargetSpv_1_4 = (1 << 16) | (4 << 8), // SPIR-V 1.4 + EShTargetSpv_1_5 = (1 << 16) | (5 << 8), // SPIR-V 1.5 + EShTargetSpv_1_6 = (1 << 16) | (6 << 8), // SPIR-V 1.6 + LAST_ELEMENT_MARKER(EShTargetLanguageVersionCount = 7), +} EShTargetLanguageVersion; + +struct TInputLanguage { + EShSource languageFamily; // redundant information with other input, this one overrides when not EShSourceNone + EShLanguage stage; // redundant information with other input, this one overrides when not EShSourceNone + EShClient dialect; + int dialectVersion; // version of client's language definition, not the client (when not EShClientNone) + bool vulkanRulesRelaxed; +}; + +struct TClient { + EShClient client; + EShTargetClientVersion version; // version of client itself (not the client's input dialect) +}; + +struct TTarget { + EShTargetLanguage language; + EShTargetLanguageVersion version; // version to target, if SPIR-V, defined by "word 1" of the SPIR-V header + bool hlslFunctionality1; // can target hlsl_functionality1 extension(s) +}; + +// All source/client/target versions and settings. +// Can override previous methods of setting, when items are set here. +// Expected to grow, as more are added, rather than growing parameter lists. +struct TEnvironment { + TInputLanguage input; // definition of the input language + TClient client; // what client is the overall compilation being done for? + TTarget target; // what to generate +}; + +GLSLANG_EXPORT const char* StageName(EShLanguage); + +} // end namespace glslang + +// +// Types of output the linker will create. +// +typedef enum { + EShExVertexFragment, + EShExFragment +} EShExecutable; + +// +// Optimization level for the compiler. +// +typedef enum { + EShOptNoGeneration, + EShOptNone, + EShOptSimple, // Optimizations that can be done quickly + EShOptFull, // Optimizations that will take more time + LAST_ELEMENT_MARKER(EshOptLevelCount), +} EShOptimizationLevel; + +// +// Texture and Sampler transformation mode. +// +typedef enum { + EShTexSampTransKeep, // keep textures and samplers as is (default) + EShTexSampTransUpgradeTextureRemoveSampler, // change texture w/o embeded sampler into sampled texture and throw away all samplers + LAST_ELEMENT_MARKER(EShTexSampTransCount), +} EShTextureSamplerTransformMode; + +// +// Message choices for what errors and warnings are given. +// +enum EShMessages : unsigned { + EShMsgDefault = 0, // default is to give all required errors and extra warnings + EShMsgRelaxedErrors = (1 << 0), // be liberal in accepting input + EShMsgSuppressWarnings = (1 << 1), // suppress all warnings, except those required by the specification + EShMsgAST = (1 << 2), // print the AST intermediate representation + EShMsgSpvRules = (1 << 3), // issue messages for SPIR-V generation + EShMsgVulkanRules = (1 << 4), // issue messages for Vulkan-requirements of GLSL for SPIR-V + EShMsgOnlyPreprocessor = (1 << 5), // only print out errors produced by the preprocessor + EShMsgReadHlsl = (1 << 6), // use HLSL parsing rules and semantics + EShMsgCascadingErrors = (1 << 7), // get cascading errors; risks error-recovery issues, instead of an early exit + EShMsgKeepUncalled = (1 << 8), // for testing, don't eliminate uncalled functions + EShMsgHlslOffsets = (1 << 9), // allow block offsets to follow HLSL rules instead of GLSL rules + EShMsgDebugInfo = (1 << 10), // save debug information + EShMsgHlslEnable16BitTypes = (1 << 11), // enable use of 16-bit types in SPIR-V for HLSL + EShMsgHlslLegalization = (1 << 12), // enable HLSL Legalization messages + EShMsgHlslDX9Compatible = (1 << 13), // enable HLSL DX9 compatible mode (for samplers and semantics) + EShMsgBuiltinSymbolTable = (1 << 14), // print the builtin symbol table + EShMsgEnhanced = (1 << 15), // enhanced message readability + LAST_ELEMENT_MARKER(EShMsgCount), +}; + +// +// Options for building reflection +// +typedef enum { + EShReflectionDefault = 0, // default is original behaviour before options were added + EShReflectionStrictArraySuffix = (1 << 0), // reflection will follow stricter rules for array-of-structs suffixes + EShReflectionBasicArraySuffix = (1 << 1), // arrays of basic types will be appended with [0] as in GL reflection + EShReflectionIntermediateIO = (1 << 2), // reflect inputs and outputs to program, even with no vertex shader + EShReflectionSeparateBuffers = (1 << 3), // buffer variables and buffer blocks are reflected separately + EShReflectionAllBlockVariables = (1 << 4), // reflect all variables in blocks, even if they are inactive + EShReflectionUnwrapIOBlocks = (1 << 5), // unwrap input/output blocks the same as with uniform blocks + EShReflectionAllIOVariables = (1 << 6), // reflect all input/output variables, even if they are inactive + EShReflectionSharedStd140SSBO = (1 << 7), // Apply std140/shared rules for ubo to ssbo + EShReflectionSharedStd140UBO = (1 << 8), // Apply std140/shared rules for ubo to ssbo + LAST_ELEMENT_MARKER(EShReflectionCount), +} EShReflectionOptions; + +// +// Build a table for bindings. This can be used for locating +// attributes, uniforms, globals, etc., as needed. +// +typedef struct { + const char* name; + int binding; +} ShBinding; + +typedef struct { + int numBindings; + ShBinding* bindings; // array of bindings +} ShBindingTable; + +// +// ShHandle held by but opaque to the driver. It is allocated, +// managed, and de-allocated by the compiler/linker. Its contents +// are defined by and used by the compiler and linker. For example, +// symbol table information and object code passed from the compiler +// to the linker can be stored where ShHandle points. +// +// If handle creation fails, 0 will be returned. +// +typedef void* ShHandle; + +// +// Driver calls these to create and destroy compiler/linker +// objects. +// +GLSLANG_EXPORT ShHandle ShConstructCompiler(const EShLanguage, int debugOptions); // one per shader +GLSLANG_EXPORT ShHandle ShConstructLinker(const EShExecutable, int debugOptions); // one per shader pair +GLSLANG_EXPORT ShHandle ShConstructUniformMap(); // one per uniform namespace (currently entire program object) +GLSLANG_EXPORT void ShDestruct(ShHandle); + +// +// The return value of ShCompile is boolean, non-zero indicating +// success. +// +// The info-log should be written by ShCompile into +// ShHandle, so it can answer future queries. +// +GLSLANG_EXPORT int ShCompile( + const ShHandle, + const char* const shaderStrings[], + const int numStrings, + const int* lengths, + const EShOptimizationLevel, + const TBuiltInResource *resources, + int debugOptions, + int defaultVersion = 110, // use 100 for ES environment, overridden by #version in shader + bool forwardCompatible = false, // give errors for use of deprecated features + EShMessages messages = EShMsgDefault // warnings and errors + ); + +GLSLANG_EXPORT int ShLinkExt( + const ShHandle, // linker object + const ShHandle h[], // compiler objects to link together + const int numHandles); + +// +// ShSetEncrpytionMethod is a place-holder for specifying +// how source code is encrypted. +// +GLSLANG_EXPORT void ShSetEncryptionMethod(ShHandle); + +// +// All the following return 0 if the information is not +// available in the object passed down, or the object is bad. +// +GLSLANG_EXPORT const char* ShGetInfoLog(const ShHandle); +GLSLANG_EXPORT const void* ShGetExecutable(const ShHandle); +GLSLANG_EXPORT int ShSetVirtualAttributeBindings(const ShHandle, const ShBindingTable*); // to detect user aliasing +GLSLANG_EXPORT int ShSetFixedAttributeBindings(const ShHandle, const ShBindingTable*); // to force any physical mappings +// +// Tell the linker to never assign a vertex attribute to this list of physical attributes +// +GLSLANG_EXPORT int ShExcludeAttributes(const ShHandle, int *attributes, int count); + +// +// Returns the location ID of the named uniform. +// Returns -1 if error. +// +GLSLANG_EXPORT int ShGetUniformLocation(const ShHandle uniformMap, const char* name); + +#ifdef __cplusplus + } // end extern "C" +#endif + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// Deferred-Lowering C++ Interface +// ----------------------------------- +// +// Below is a new alternate C++ interface, which deprecates the above +// opaque handle-based interface. +// +// The below is further designed to handle multiple compilation units per stage, where +// the intermediate results, including the parse tree, are preserved until link time, +// rather than the above interface which is designed to have each compilation unit +// lowered at compile time. In the above model, linking occurs on the lowered results, +// whereas in this model intra-stage linking can occur at the parse tree +// (treeRoot in TIntermediate) level, and then a full stage can be lowered. +// + +#include +#include +#include + +class TCompiler; +class TInfoSink; + +namespace glslang { + +struct Version { + int major; + int minor; + int patch; + const char* flavor; +}; + +GLSLANG_EXPORT Version GetVersion(); +GLSLANG_EXPORT const char* GetEsslVersionString(); +GLSLANG_EXPORT const char* GetGlslVersionString(); +GLSLANG_EXPORT int GetKhronosToolId(); + +class TIntermediate; +class TProgram; +class TPoolAllocator; + +// Call this exactly once per process before using anything else +GLSLANG_EXPORT bool InitializeProcess(); + +// Call once per process to tear down everything +GLSLANG_EXPORT void FinalizeProcess(); + +// Resource type for IO resolver +enum TResourceType { + EResSampler, + EResTexture, + EResImage, + EResUbo, + EResSsbo, + EResUav, + EResCount +}; + +enum TBlockStorageClass +{ + EbsUniform = 0, + EbsStorageBuffer, + EbsPushConstant, + EbsNone, // not a uniform or buffer variable + EbsCount, +}; + +// Make one TShader per shader that you will link into a program. Then +// - provide the shader through setStrings() or setStringsWithLengths() +// - optionally call setEnv*(), see below for more detail +// - optionally use setPreamble() to set a special shader string that will be +// processed before all others but won't affect the validity of #version +// - optionally call addProcesses() for each setting/transform, +// see comment for class TProcesses +// - call parse(): source language and target environment must be selected +// either by correct setting of EShMessages sent to parse(), or by +// explicitly calling setEnv*() +// - query the info logs +// +// N.B.: Does not yet support having the same TShader instance being linked into +// multiple programs. +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TShader { +public: + GLSLANG_EXPORT explicit TShader(EShLanguage); + GLSLANG_EXPORT virtual ~TShader(); + GLSLANG_EXPORT void setStrings(const char* const* s, int n); + GLSLANG_EXPORT void setStringsWithLengths( + const char* const* s, const int* l, int n); + GLSLANG_EXPORT void setStringsWithLengthsAndNames( + const char* const* s, const int* l, const char* const* names, int n); + void setPreamble(const char* s) { preamble = s; } + GLSLANG_EXPORT void setEntryPoint(const char* entryPoint); + GLSLANG_EXPORT void setSourceEntryPoint(const char* sourceEntryPointName); + GLSLANG_EXPORT void addProcesses(const std::vector&); + GLSLANG_EXPORT void setUniqueId(unsigned long long id); + GLSLANG_EXPORT void setOverrideVersion(int version); + GLSLANG_EXPORT void setDebugInfo(bool debugInfo); + + // IO resolver binding data: see comments in ShaderLang.cpp + GLSLANG_EXPORT void setShiftBinding(TResourceType res, unsigned int base); + GLSLANG_EXPORT void setShiftSamplerBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftTextureBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftImageBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUavBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftCbufferBinding(unsigned int base); // synonym for setShiftUboBinding + GLSLANG_EXPORT void setShiftSsboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftBindingForSet(TResourceType res, unsigned int base, unsigned int set); + GLSLANG_EXPORT void setResourceSetBinding(const std::vector& base); + GLSLANG_EXPORT void setAutoMapBindings(bool map); + GLSLANG_EXPORT void setAutoMapLocations(bool map); + GLSLANG_EXPORT void addUniformLocationOverride(const char* name, int loc); + GLSLANG_EXPORT void setUniformLocationBase(int base); + GLSLANG_EXPORT void setInvertY(bool invert); + GLSLANG_EXPORT void setDxPositionW(bool dxPosW); + GLSLANG_EXPORT void setEnhancedMsgs(); +#ifdef ENABLE_HLSL + GLSLANG_EXPORT void setHlslIoMapping(bool hlslIoMap); + GLSLANG_EXPORT void setFlattenUniformArrays(bool flatten); +#endif + GLSLANG_EXPORT void setNoStorageFormat(bool useUnknownFormat); + GLSLANG_EXPORT void setNanMinMaxClamp(bool nanMinMaxClamp); + GLSLANG_EXPORT void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode); + GLSLANG_EXPORT void addBlockStorageOverride(const char* nameStr, glslang::TBlockStorageClass backing); + + GLSLANG_EXPORT void setGlobalUniformBlockName(const char* name); + GLSLANG_EXPORT void setAtomicCounterBlockName(const char* name); + GLSLANG_EXPORT void setGlobalUniformSet(unsigned int set); + GLSLANG_EXPORT void setGlobalUniformBinding(unsigned int binding); + GLSLANG_EXPORT void setAtomicCounterBlockSet(unsigned int set); + GLSLANG_EXPORT void setAtomicCounterBlockBinding(unsigned int binding); + + // For setting up the environment (cleared to nothingness in the constructor). + // These must be called so that parsing is done for the right source language and + // target environment, either indirectly through TranslateEnvironment() based on + // EShMessages et. al., or directly by the user. + // + // setEnvInput: The input source language and stage. If generating code for a + // specific client, the input client semantics to use and the + // version of that client's input semantics to use, otherwise + // use EShClientNone and version of 0, e.g. for validation mode. + // Note 'version' does not describe the target environment, + // just the version of the source dialect to compile under. + // For example, to choose the Vulkan dialect of GLSL defined by + // version 100 of the KHR_vulkan_glsl extension: lang = EShSourceGlsl, + // dialect = EShClientVulkan, and version = 100. + // + // See the definitions of TEnvironment, EShSource, EShLanguage, + // and EShClient for choices and more detail. + // + // setEnvClient: The client that will be hosting the execution, and its version. + // Note 'version' is not the version of the languages involved, but + // the version of the client environment. + // Use EShClientNone and version of 0 if there is no client, e.g. + // for validation mode. + // + // See EShTargetClientVersion for choices. + // + // setEnvTarget: The language to translate to when generating code, and that + // language's version. + // Use EShTargetNone and version of 0 if there is no client, e.g. + // for validation mode. + // + void setEnvInput(EShSource lang, EShLanguage envStage, EShClient client, int version) + { + environment.input.languageFamily = lang; + environment.input.stage = envStage; + environment.input.dialect = client; + environment.input.dialectVersion = version; + } + void setEnvClient(EShClient client, EShTargetClientVersion version) + { + environment.client.client = client; + environment.client.version = version; + } + void setEnvTarget(EShTargetLanguage lang, EShTargetLanguageVersion version) + { + environment.target.language = lang; + environment.target.version = version; + } + + void getStrings(const char* const* &s, int& n) { s = strings; n = numStrings; } + +#ifdef ENABLE_HLSL + void setEnvTargetHlslFunctionality1() { environment.target.hlslFunctionality1 = true; } + bool getEnvTargetHlslFunctionality1() const { return environment.target.hlslFunctionality1; } +#else + bool getEnvTargetHlslFunctionality1() const { return false; } +#endif + + void setEnvInputVulkanRulesRelaxed() { environment.input.vulkanRulesRelaxed = true; } + bool getEnvInputVulkanRulesRelaxed() const { return environment.input.vulkanRulesRelaxed; } + + // Interface to #include handlers. + // + // To support #include, a client of Glslang does the following: + // 1. Call setStringsWithNames to set the source strings and associated + // names. For example, the names could be the names of the files + // containing the shader sources. + // 2. Call parse with an Includer. + // + // When the Glslang parser encounters an #include directive, it calls + // the Includer's include method with the requested include name + // together with the current string name. The returned IncludeResult + // contains the fully resolved name of the included source, together + // with the source text that should replace the #include directive + // in the source stream. After parsing that source, Glslang will + // release the IncludeResult object. + class Includer { + public: + // An IncludeResult contains the resolved name and content of a source + // inclusion. + struct IncludeResult { + IncludeResult(const std::string& headerName, const char* const headerData, const size_t headerLength, void* userData) : + headerName(headerName), headerData(headerData), headerLength(headerLength), userData(userData) { } + // For a successful inclusion, the fully resolved name of the requested + // include. For example, in a file system-based includer, full resolution + // should convert a relative path name into an absolute path name. + // For a failed inclusion, this is an empty string. + const std::string headerName; + // The content and byte length of the requested inclusion. The + // Includer producing this IncludeResult retains ownership of the + // storage. + // For a failed inclusion, the header + // field points to a string containing error details. + const char* const headerData; + const size_t headerLength; + // Include resolver's context. + void* userData; + protected: + IncludeResult& operator=(const IncludeResult&); + IncludeResult(); + }; + + // For both include methods below: + // + // Resolves an inclusion request by name, current source name, + // and include depth. + // On success, returns an IncludeResult containing the resolved name + // and content of the include. + // On failure, returns a nullptr, or an IncludeResult + // with an empty string for the headerName and error details in the + // header field. + // The Includer retains ownership of the contents + // of the returned IncludeResult value, and those contents must + // remain valid until the releaseInclude method is called on that + // IncludeResult object. + // + // Note "local" vs. "system" is not an "either/or": "local" is an + // extra thing to do over "system". Both might get called, as per + // the C++ specification. + + // For the "system" or <>-style includes; search the "system" paths. + virtual IncludeResult* includeSystem(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // For the "local"-only aspect of a "" include. Should not search in the + // "system" paths, because on returning a failure, the parser will + // call includeSystem() to look in the "system" locations. + virtual IncludeResult* includeLocal(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // Signals that the parser will no longer use the contents of the + // specified IncludeResult. + virtual void releaseInclude(IncludeResult*) = 0; + virtual ~Includer() {} + }; + + // Fail all Includer searches + class ForbidIncluder : public Includer { + public: + virtual void releaseInclude(IncludeResult*) override { } + }; + + GLSLANG_EXPORT bool parse( + const TBuiltInResource*, int defaultVersion, EProfile defaultProfile, + bool forceDefaultVersionAndProfile, bool forwardCompatible, + EShMessages, Includer&); + + bool parse(const TBuiltInResource* res, int defaultVersion, EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages messages) + { + TShader::ForbidIncluder includer; + return parse(res, defaultVersion, defaultProfile, forceDefaultVersionAndProfile, forwardCompatible, messages, includer); + } + + // Equivalent to parse() without a default profile and without forcing defaults. + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages); + } + + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages, + Includer& includer) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages, includer); + } + + // NOTE: Doing just preprocessing to obtain a correct preprocessed shader string + // is not an officially supported or fully working path. + GLSLANG_EXPORT bool preprocess( + const TBuiltInResource* builtInResources, int defaultVersion, + EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages message, std::string* outputString, + Includer& includer); + + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + EShLanguage getStage() const { return stage; } + TIntermediate* getIntermediate() const { return intermediate; } + +protected: + TPoolAllocator* pool; + EShLanguage stage; + TCompiler* compiler; + TIntermediate* intermediate; + TInfoSink* infoSink; + // strings and lengths follow the standard for glShaderSource: + // strings is an array of numStrings pointers to string data. + // lengths can be null, but if not it is an array of numStrings + // integers containing the length of the associated strings. + // if lengths is null or lengths[n] < 0 the associated strings[n] is + // assumed to be null-terminated. + // stringNames is the optional names for all the strings. If stringNames + // is null, then none of the strings has name. If a certain element in + // stringNames is null, then the corresponding string does not have name. + const char* const* strings; // explicit code to compile, see previous comment + const int* lengths; + const char* const* stringNames; + int numStrings; // size of the above arrays + const char* preamble; // string of implicit code to compile before the explicitly provided code + + // a function in the source string can be renamed FROM this TO the name given in setEntryPoint. + std::string sourceEntryPointName; + + // overrides #version in shader source or default version if #version isn't present + int overrideVersion; + + TEnvironment environment; + + friend class TProgram; + +private: + TShader& operator=(TShader&); +}; + +#if !defined(GLSLANG_WEB) + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +// Data needed for just a single object at the granularity exchanged by the reflection API +class TObjectReflection { +public: + GLSLANG_EXPORT TObjectReflection(const std::string& pName, const TType& pType, int pOffset, int pGLDefineType, int pSize, int pIndex); + + const TType* getType() const { return type; } + GLSLANG_EXPORT int getBinding() const; + GLSLANG_EXPORT void dump() const; + static TObjectReflection badReflection() { return TObjectReflection(); } + + std::string name; + int offset; + int glDefineType; + int size; // data size in bytes for a block, array size for a (non-block) object that's an array + int index; + int counterIndex; + int numMembers; + int arrayStride; // stride of an array variable + int topLevelArraySize; // size of the top-level variable in a storage buffer member + int topLevelArrayStride; // stride of the top-level variable in a storage buffer member + EShLanguageMask stages; + +protected: + TObjectReflection() + : offset(-1), glDefineType(-1), size(-1), index(-1), counterIndex(-1), numMembers(-1), arrayStride(0), + topLevelArrayStride(0), stages(EShLanguageMask(0)), type(nullptr) + { + } + + const TType* type; +}; + +class TReflection; +class TIoMapper; +struct TVarEntryInfo; + +// Allows to customize the binding layout after linking. +// All used uniform variables will invoke at least validateBinding. +// If validateBinding returned true then the other resolveBinding, +// resolveSet, and resolveLocation are invoked to resolve the binding +// and descriptor set index respectively. +// +// Invocations happen in a particular order: +// 1) all shader inputs +// 2) all shader outputs +// 3) all uniforms with binding and set already defined +// 4) all uniforms with binding but no set defined +// 5) all uniforms with set but no binding defined +// 6) all uniforms with no binding and no set defined +// +// mapIO will use this resolver in two phases. The first +// phase is a notification phase, calling the corresponging +// notifiy callbacks, this phase ends with a call to endNotifications. +// Phase two starts directly after the call to endNotifications +// and calls all other callbacks to validate and to get the +// bindings, sets, locations, component and color indices. +// +// NOTE: that still limit checks are applied to bindings and sets +// and may result in an error. +class TIoMapResolver +{ +public: + virtual ~TIoMapResolver() {} + + // Should return true if the resulting/current binding would be okay. + // Basic idea is to do aliasing binding checks with this. + virtual bool validateBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current binding should be overridden. + // Return -1 if the current binding (including no binding) should be kept. + virtual int resolveBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current set should be overridden. + // Return -1 if the current set (including no set) should be kept. + virtual int resolveSet(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveUniformLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return true if the resulting/current setup would be okay. + // Basic idea is to do aliasing checks and reject invalid semantic names. + virtual bool validateInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current component index should be overridden. + // Return -1 if the current component index (including no index) should be kept. + virtual int resolveInOutComponent(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current color index should be overridden. + // Return -1 if the current color index (including no index) should be kept. + virtual int resolveInOutIndex(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a uniform variable + virtual void notifyBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a in or out variable + virtual void notifyInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Called by mapIO when it starts its notify pass for the given stage + virtual void beginNotifications(EShLanguage stage) = 0; + // Called by mapIO when it has finished the notify pass + virtual void endNotifications(EShLanguage stage) = 0; + // Called by mipIO when it starts its resolve pass for the given stage + virtual void beginResolve(EShLanguage stage) = 0; + // Called by mapIO when it has finished the resolve pass + virtual void endResolve(EShLanguage stage) = 0; + // Called by mapIO when it starts its symbol collect for teh given stage + virtual void beginCollect(EShLanguage stage) = 0; + // Called by mapIO when it has finished the symbol collect + virtual void endCollect(EShLanguage stage) = 0; + // Called by TSlotCollector to resolve storage locations or bindings + virtual void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by TSlotCollector to resolve resource locations or bindings + virtual void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by mapIO.addStage to set shader stage mask to mark a stage be added to this pipeline + virtual void addStage(EShLanguage stage, TIntermediate& stageIntermediate) = 0; +}; + +#endif // !GLSLANG_WEB + +// Make one TProgram per set of shaders that will get linked together. Add all +// the shaders that are to be linked together. After calling shader.parse() +// for all shaders, call link(). +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TProgram { +public: + GLSLANG_EXPORT TProgram(); + GLSLANG_EXPORT virtual ~TProgram(); + void addShader(TShader* shader) { stages[shader->stage].push_back(shader); } + std::list& getShaders(EShLanguage stage) { return stages[stage]; } + // Link Validation interface + GLSLANG_EXPORT bool link(EShMessages); + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + + TIntermediate* getIntermediate(EShLanguage stage) const { return intermediate[stage]; } + +#if !defined(GLSLANG_WEB) + + // Reflection Interface + + // call first, to do liveness analysis, index mapping, etc.; returns false on failure + GLSLANG_EXPORT bool buildReflection(int opts = EShReflectionDefault); + GLSLANG_EXPORT unsigned getLocalSize(int dim) const; // return dim'th local size + GLSLANG_EXPORT int getReflectionIndex(const char *name) const; + GLSLANG_EXPORT int getReflectionPipeIOIndex(const char* name, const bool inOrOut) const; + GLSLANG_EXPORT int getNumUniformVariables() const; + GLSLANG_EXPORT const TObjectReflection& getUniform(int index) const; + GLSLANG_EXPORT int getNumUniformBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getUniformBlock(int index) const; + GLSLANG_EXPORT int getNumPipeInputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeInput(int index) const; + GLSLANG_EXPORT int getNumPipeOutputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeOutput(int index) const; + GLSLANG_EXPORT int getNumBufferVariables() const; + GLSLANG_EXPORT const TObjectReflection& getBufferVariable(int index) const; + GLSLANG_EXPORT int getNumBufferBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getBufferBlock(int index) const; + GLSLANG_EXPORT int getNumAtomicCounters() const; + GLSLANG_EXPORT const TObjectReflection& getAtomicCounter(int index) const; + + // Legacy Reflection Interface - expressed in terms of above interface + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORMS) + int getNumLiveUniformVariables() const { return getNumUniformVariables(); } + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORM_BLOCKS) + int getNumLiveUniformBlocks() const { return getNumUniformBlocks(); } + + // can be used for glGetProgramiv(GL_ACTIVE_ATTRIBUTES) + int getNumLiveAttributes() const { return getNumPipeInputs(); } + + // can be used for glGetUniformIndices() + int getUniformIndex(const char *name) const { return getReflectionIndex(name); } + + int getPipeIOIndex(const char *name, const bool inOrOut) const + { return getReflectionPipeIOIndex(name, inOrOut); } + + // can be used for "name" part of glGetActiveUniform() + const char *getUniformName(int index) const { return getUniform(index).name.c_str(); } + + // returns the binding number + int getUniformBinding(int index) const { return getUniform(index).getBinding(); } + + // returns Shaders Stages where a Uniform is present + EShLanguageMask getUniformStages(int index) const { return getUniform(index).stages; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_BLOCK_INDEX) + int getUniformBlockIndex(int index) const { return getUniform(index).index; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_TYPE) + int getUniformType(int index) const { return getUniform(index).glDefineType; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_OFFSET) + int getUniformBufferOffset(int index) const { return getUniform(index).offset; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_SIZE) + int getUniformArraySize(int index) const { return getUniform(index).size; } + + // returns a TType* + const TType *getUniformTType(int index) const { return getUniform(index).getType(); } + + // can be used for glGetActiveUniformBlockName() + const char *getUniformBlockName(int index) const { return getUniformBlock(index).name.c_str(); } + + // can be used for glGetActiveUniformBlockiv(UNIFORM_BLOCK_DATA_SIZE) + int getUniformBlockSize(int index) const { return getUniformBlock(index).size; } + + // returns the block binding number + int getUniformBlockBinding(int index) const { return getUniformBlock(index).getBinding(); } + + // returns block index of associated counter. + int getUniformBlockCounterIndex(int index) const { return getUniformBlock(index).counterIndex; } + + // returns a TType* + const TType *getUniformBlockTType(int index) const { return getUniformBlock(index).getType(); } + + // can be used for glGetActiveAttrib() + const char *getAttributeName(int index) const { return getPipeInput(index).name.c_str(); } + + // can be used for glGetActiveAttrib() + int getAttributeType(int index) const { return getPipeInput(index).glDefineType; } + + // returns a TType* + const TType *getAttributeTType(int index) const { return getPipeInput(index).getType(); } + + GLSLANG_EXPORT void dumpReflection(); + // I/O mapping: apply base offsets and map live unbound variables + // If resolver is not provided it uses the previous approach + // and respects auto assignment and offsets. + GLSLANG_EXPORT bool mapIO(TIoMapResolver* pResolver = nullptr, TIoMapper* pIoMapper = nullptr); +#endif // !GLSLANG_WEB + +protected: + GLSLANG_EXPORT bool linkStage(EShLanguage, EShMessages); + GLSLANG_EXPORT bool crossStageCheck(EShMessages); + + TPoolAllocator* pool; + std::list stages[EShLangCount]; + TIntermediate* intermediate[EShLangCount]; + bool newedIntermediate[EShLangCount]; // track which intermediate were "new" versus reusing a singleton unit in a stage + TInfoSink* infoSink; +#if !defined(GLSLANG_WEB) + TReflection* reflection; +#endif + bool linked; + +private: + TProgram(TProgram&); + TProgram& operator=(TProgram&); +}; + +} // end namespace glslang + +#endif // _COMPILER_INTERFACE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/resource_limits_c.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/resource_limits_c.h new file mode 100644 index 0000000..05aa8eb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/Public/resource_limits_c.h @@ -0,0 +1,57 @@ +/** +BSD 2-Clause License + +Copyright (c) 2020, Travis Fort +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ + +#include "../Include/glslang_c_interface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Returns a struct that can be use to create custom resource values. +glslang_resource_t* glslang_resource(void); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +const glslang_resource_t* glslang_default_resource(void); + +// Returns the DefaultTBuiltInResource as a human-readable string. +// NOTE: User is responsible for freeing this string. +const char* glslang_default_resource_string(); + +// Decodes the resource limits from |config| to |resources|. +void glslang_decode_resource_limits(glslang_resource_t* resources, char* config); + +#ifdef __cplusplus +} +#endif + +#endif // _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.AMD.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.AMD.h new file mode 100644 index 0000000..009d2f1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.AMD.h @@ -0,0 +1,108 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextAMD_H +#define GLSLextAMD_H + +static const int GLSLextAMDVersion = 100; +static const int GLSLextAMDRevision = 7; + +// SPV_AMD_shader_ballot +static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot"; + +enum ShaderBallotAMD { + ShaderBallotBadAMD = 0, // Don't use + + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4, + + ShaderBallotCountAMD +}; + +// SPV_AMD_shader_trinary_minmax +static const char* const E_SPV_AMD_shader_trinary_minmax = "SPV_AMD_shader_trinary_minmax"; + +enum ShaderTrinaryMinMaxAMD { + ShaderTrinaryMinMaxBadAMD = 0, // Don't use + + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9, + + ShaderTrinaryMinMaxCountAMD +}; + +// SPV_AMD_shader_explicit_vertex_parameter +static const char* const E_SPV_AMD_shader_explicit_vertex_parameter = "SPV_AMD_shader_explicit_vertex_parameter"; + +enum ShaderExplicitVertexParameterAMD { + ShaderExplicitVertexParameterBadAMD = 0, // Don't use + + InterpolateAtVertexAMD = 1, + + ShaderExplicitVertexParameterCountAMD +}; + +// SPV_AMD_gcn_shader +static const char* const E_SPV_AMD_gcn_shader = "SPV_AMD_gcn_shader"; + +enum GcnShaderAMD { + GcnShaderBadAMD = 0, // Don't use + + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3, + + GcnShaderCountAMD +}; + +// SPV_AMD_gpu_shader_half_float +static const char* const E_SPV_AMD_gpu_shader_half_float = "SPV_AMD_gpu_shader_half_float"; + +// SPV_AMD_texture_gather_bias_lod +static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_gather_bias_lod"; + +// SPV_AMD_gpu_shader_int16 +static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16"; + +// SPV_AMD_shader_image_load_store_lod +static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod"; + +// SPV_AMD_shader_fragment_mask +static const char* const E_SPV_AMD_shader_fragment_mask = "SPV_AMD_shader_fragment_mask"; + +// SPV_AMD_gpu_shader_half_float_fetch +static const char* const E_SPV_AMD_gpu_shader_half_float_fetch = "SPV_AMD_gpu_shader_half_float_fetch"; + +#endif // #ifndef GLSLextAMD_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.EXT.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.EXT.h new file mode 100644 index 0000000..a247b4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.EXT.h @@ -0,0 +1,44 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextEXT_H +#define GLSLextEXT_H + +static const int GLSLextEXTVersion = 100; +static const int GLSLextEXTRevision = 2; + +static const char* const E_SPV_EXT_shader_stencil_export = "SPV_EXT_shader_stencil_export"; +static const char* const E_SPV_EXT_shader_viewport_index_layer = "SPV_EXT_shader_viewport_index_layer"; +static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fully_covered"; +static const char* const E_SPV_EXT_fragment_invocation_density = "SPV_EXT_fragment_invocation_density"; +static const char* const E_SPV_EXT_demote_to_helper_invocation = "SPV_EXT_demote_to_helper_invocation"; +static const char* const E_SPV_EXT_shader_atomic_float_add = "SPV_EXT_shader_atomic_float_add"; +static const char* const E_SPV_EXT_shader_atomic_float16_add = "SPV_EXT_shader_atomic_float16_add"; +static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader_atomic_float_min_max"; +static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64"; +static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader"; + +#endif // #ifndef GLSLextEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.KHR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.KHR.h new file mode 100644 index 0000000..d5c670f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.KHR.h @@ -0,0 +1,58 @@ +/* +** Copyright (c) 2014-2020 The Khronos Group Inc. +** Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextKHR_H +#define GLSLextKHR_H + +static const int GLSLextKHRVersion = 100; +static const int GLSLextKHRRevision = 3; + +static const char* const E_SPV_KHR_shader_ballot = "SPV_KHR_shader_ballot"; +static const char* const E_SPV_KHR_subgroup_vote = "SPV_KHR_subgroup_vote"; +static const char* const E_SPV_KHR_device_group = "SPV_KHR_device_group"; +static const char* const E_SPV_KHR_multiview = "SPV_KHR_multiview"; +static const char* const E_SPV_KHR_shader_draw_parameters = "SPV_KHR_shader_draw_parameters"; +static const char* const E_SPV_KHR_16bit_storage = "SPV_KHR_16bit_storage"; +static const char* const E_SPV_KHR_8bit_storage = "SPV_KHR_8bit_storage"; +static const char* const E_SPV_KHR_storage_buffer_storage_class = "SPV_KHR_storage_buffer_storage_class"; +static const char* const E_SPV_KHR_post_depth_coverage = "SPV_KHR_post_depth_coverage"; +static const char* const E_SPV_KHR_vulkan_memory_model = "SPV_KHR_vulkan_memory_model"; +static const char* const E_SPV_EXT_physical_storage_buffer = "SPV_EXT_physical_storage_buffer"; +static const char* const E_SPV_KHR_physical_storage_buffer = "SPV_KHR_physical_storage_buffer"; +static const char* const E_SPV_EXT_fragment_shader_interlock = "SPV_EXT_fragment_shader_interlock"; +static const char* const E_SPV_KHR_shader_clock = "SPV_KHR_shader_clock"; +static const char* const E_SPV_KHR_non_semantic_info = "SPV_KHR_non_semantic_info"; +static const char* const E_SPV_KHR_ray_tracing = "SPV_KHR_ray_tracing"; +static const char* const E_SPV_KHR_ray_query = "SPV_KHR_ray_query"; +static const char* const E_SPV_KHR_fragment_shading_rate = "SPV_KHR_fragment_shading_rate"; +static const char* const E_SPV_KHR_terminate_invocation = "SPV_KHR_terminate_invocation"; +static const char* const E_SPV_KHR_workgroup_memory_explicit_layout = "SPV_KHR_workgroup_memory_explicit_layout"; +static const char* const E_SPV_KHR_subgroup_uniform_control_flow = "SPV_KHR_subgroup_uniform_control_flow"; +static const char* const E_SPV_KHR_fragment_shader_barycentric = "SPV_KHR_fragment_shader_barycentric"; +static const char* const E_SPV_AMD_shader_early_and_late_fragment_tests = "SPV_AMD_shader_early_and_late_fragment_tests"; + +#endif // #ifndef GLSLextKHR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.NV.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.NV.h new file mode 100644 index 0000000..93c98bf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.ext.NV.h @@ -0,0 +1,84 @@ +/* +** Copyright (c) 2014-2017 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextNV_H +#define GLSLextNV_H + +enum BuiltIn; +enum Decoration; +enum Op; +enum Capability; + +static const int GLSLextNVVersion = 100; +static const int GLSLextNVRevision = 11; + +//SPV_NV_sample_mask_override_coverage +const char* const E_SPV_NV_sample_mask_override_coverage = "SPV_NV_sample_mask_override_coverage"; + +//SPV_NV_geometry_shader_passthrough +const char* const E_SPV_NV_geometry_shader_passthrough = "SPV_NV_geometry_shader_passthrough"; + +//SPV_NV_viewport_array2 +const char* const E_SPV_NV_viewport_array2 = "SPV_NV_viewport_array2"; +const char* const E_ARB_shader_viewport_layer_array = "SPV_ARB_shader_viewport_layer_array"; + +//SPV_NV_stereo_view_rendering +const char* const E_SPV_NV_stereo_view_rendering = "SPV_NV_stereo_view_rendering"; + +//SPV_NVX_multiview_per_view_attributes +const char* const E_SPV_NVX_multiview_per_view_attributes = "SPV_NVX_multiview_per_view_attributes"; + +//SPV_NV_shader_subgroup_partitioned +const char* const E_SPV_NV_shader_subgroup_partitioned = "SPV_NV_shader_subgroup_partitioned"; + +//SPV_NV_fragment_shader_barycentric +const char* const E_SPV_NV_fragment_shader_barycentric = "SPV_NV_fragment_shader_barycentric"; + +//SPV_NV_compute_shader_derivatives +const char* const E_SPV_NV_compute_shader_derivatives = "SPV_NV_compute_shader_derivatives"; + +//SPV_NV_shader_image_footprint +const char* const E_SPV_NV_shader_image_footprint = "SPV_NV_shader_image_footprint"; + +//SPV_NV_mesh_shader +const char* const E_SPV_NV_mesh_shader = "SPV_NV_mesh_shader"; + +//SPV_NV_raytracing +const char* const E_SPV_NV_ray_tracing = "SPV_NV_ray_tracing"; + +//SPV_NV_ray_tracing_motion_blur +const char* const E_SPV_NV_ray_tracing_motion_blur = "SPV_NV_ray_tracing_motion_blur"; + +//SPV_NV_shading_rate +const char* const E_SPV_NV_shading_rate = "SPV_NV_shading_rate"; + +//SPV_NV_cooperative_matrix +const char* const E_SPV_NV_cooperative_matrix = "SPV_NV_cooperative_matrix"; + +//SPV_NV_shader_sm_builtins +const char* const E_SPV_NV_shader_sm_builtins = "SPV_NV_shader_sm_builtins"; + +#endif // #ifndef GLSLextNV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.std.450.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.std.450.h new file mode 100644 index 0000000..df31092 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GlslangToSpv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GlslangToSpv.h new file mode 100644 index 0000000..3907be4 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/GlslangToSpv.h @@ -0,0 +1,61 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' +#endif + +#include "SpvTools.h" +#include "glslang/Include/intermediate.h" + +#include +#include + +#include "Logger.h" + +namespace glslang { + +void GetSpirvVersion(std::string&); +int GetSpirvGeneratorVersion(); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + SpvOptions* options = nullptr); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger* logger, SpvOptions* options = nullptr); +void OutputSpvBin(const std::vector& spirv, const char* baseName); +void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName); + +} diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/Logger.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/Logger.h new file mode 100644 index 0000000..411367c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/Logger.h @@ -0,0 +1,83 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_SPIRV_LOGGER_H +#define GLSLANG_SPIRV_LOGGER_H + +#include +#include + +namespace spv { + +// A class for holding all SPIR-V build status messages, including +// missing/TBD functionalities, warnings, and errors. +class SpvBuildLogger { +public: + SpvBuildLogger() {} + +#ifdef GLSLANG_WEB + void tbdFunctionality(const std::string& f) { } + void missingFunctionality(const std::string& f) { } + void warning(const std::string& w) { } + void error(const std::string& e) { errors.push_back(e); } + std::string getAllMessages() { return ""; } +#else + + // Registers a TBD functionality. + void tbdFunctionality(const std::string& f); + // Registers a missing functionality. + void missingFunctionality(const std::string& f); + + // Logs a warning. + void warning(const std::string& w) { warnings.push_back(w); } + // Logs an error. + void error(const std::string& e) { errors.push_back(e); } + + // Returns all messages accumulated in the order of: + // TBD functionalities, missing functionalities, warnings, errors. + std::string getAllMessages() const; +#endif + +private: + SpvBuildLogger(const SpvBuildLogger&); + + std::vector tbdFeatures; + std::vector missingFeatures; + std::vector warnings; + std::vector errors; +}; + +} // end spv namespace + +#endif // GLSLANG_SPIRV_LOGGER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/NonSemanticDebugPrintf.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/NonSemanticDebugPrintf.h new file mode 100644 index 0000000..83796d7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/NonSemanticDebugPrintf.h @@ -0,0 +1,50 @@ +// Copyright (c) 2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and/or associated documentation files (the +// "Materials"), to deal in the Materials without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Materials, and to +// permit persons to whom the Materials are furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS +// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS +// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT +// https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +// + +#ifndef SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ +#define SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticDebugPrintfRevision = 1, + NonSemanticDebugPrintfRevision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticDebugPrintfInstructions { + NonSemanticDebugPrintfDebugPrintf = 1, + NonSemanticDebugPrintfInstructionsMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h new file mode 100644 index 0000000..c52f32f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h @@ -0,0 +1,171 @@ +// Copyright (c) 2018 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +#ifndef SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ +#define SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticShaderDebugInfo100Version = 100, + NonSemanticShaderDebugInfo100Version_BitWidthPadding = 0x7fffffff +}; +enum { + NonSemanticShaderDebugInfo100Revision = 6, + NonSemanticShaderDebugInfo100Revision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100Instructions { + NonSemanticShaderDebugInfo100DebugInfoNone = 0, + NonSemanticShaderDebugInfo100DebugCompilationUnit = 1, + NonSemanticShaderDebugInfo100DebugTypeBasic = 2, + NonSemanticShaderDebugInfo100DebugTypePointer = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifier = 4, + NonSemanticShaderDebugInfo100DebugTypeArray = 5, + NonSemanticShaderDebugInfo100DebugTypeVector = 6, + NonSemanticShaderDebugInfo100DebugTypedef = 7, + NonSemanticShaderDebugInfo100DebugTypeFunction = 8, + NonSemanticShaderDebugInfo100DebugTypeEnum = 9, + NonSemanticShaderDebugInfo100DebugTypeComposite = 10, + NonSemanticShaderDebugInfo100DebugTypeMember = 11, + NonSemanticShaderDebugInfo100DebugTypeInheritance = 12, + NonSemanticShaderDebugInfo100DebugTypePtrToMember = 13, + NonSemanticShaderDebugInfo100DebugTypeTemplate = 14, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameter = 15, + NonSemanticShaderDebugInfo100DebugTypeTemplateTemplateParameter = 16, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameterPack = 17, + NonSemanticShaderDebugInfo100DebugGlobalVariable = 18, + NonSemanticShaderDebugInfo100DebugFunctionDeclaration = 19, + NonSemanticShaderDebugInfo100DebugFunction = 20, + NonSemanticShaderDebugInfo100DebugLexicalBlock = 21, + NonSemanticShaderDebugInfo100DebugLexicalBlockDiscriminator = 22, + NonSemanticShaderDebugInfo100DebugScope = 23, + NonSemanticShaderDebugInfo100DebugNoScope = 24, + NonSemanticShaderDebugInfo100DebugInlinedAt = 25, + NonSemanticShaderDebugInfo100DebugLocalVariable = 26, + NonSemanticShaderDebugInfo100DebugInlinedVariable = 27, + NonSemanticShaderDebugInfo100DebugDeclare = 28, + NonSemanticShaderDebugInfo100DebugValue = 29, + NonSemanticShaderDebugInfo100DebugOperation = 30, + NonSemanticShaderDebugInfo100DebugExpression = 31, + NonSemanticShaderDebugInfo100DebugMacroDef = 32, + NonSemanticShaderDebugInfo100DebugMacroUndef = 33, + NonSemanticShaderDebugInfo100DebugImportedEntity = 34, + NonSemanticShaderDebugInfo100DebugSource = 35, + NonSemanticShaderDebugInfo100DebugFunctionDefinition = 101, + NonSemanticShaderDebugInfo100DebugSourceContinued = 102, + NonSemanticShaderDebugInfo100DebugLine = 103, + NonSemanticShaderDebugInfo100DebugNoLine = 104, + NonSemanticShaderDebugInfo100DebugBuildIdentifier = 105, + NonSemanticShaderDebugInfo100DebugStoragePath = 106, + NonSemanticShaderDebugInfo100DebugEntryPoint = 107, + NonSemanticShaderDebugInfo100DebugTypeMatrix = 108, + NonSemanticShaderDebugInfo100InstructionsMax = 0x7fffffff +}; + + +enum NonSemanticShaderDebugInfo100DebugInfoFlags { + NonSemanticShaderDebugInfo100None = 0x0000, + NonSemanticShaderDebugInfo100FlagIsProtected = 0x01, + NonSemanticShaderDebugInfo100FlagIsPrivate = 0x02, + NonSemanticShaderDebugInfo100FlagIsPublic = 0x03, + NonSemanticShaderDebugInfo100FlagIsLocal = 0x04, + NonSemanticShaderDebugInfo100FlagIsDefinition = 0x08, + NonSemanticShaderDebugInfo100FlagFwdDecl = 0x10, + NonSemanticShaderDebugInfo100FlagArtificial = 0x20, + NonSemanticShaderDebugInfo100FlagExplicit = 0x40, + NonSemanticShaderDebugInfo100FlagPrototyped = 0x80, + NonSemanticShaderDebugInfo100FlagObjectPointer = 0x100, + NonSemanticShaderDebugInfo100FlagStaticMember = 0x200, + NonSemanticShaderDebugInfo100FlagIndirectVariable = 0x400, + NonSemanticShaderDebugInfo100FlagLValueReference = 0x800, + NonSemanticShaderDebugInfo100FlagRValueReference = 0x1000, + NonSemanticShaderDebugInfo100FlagIsOptimized = 0x2000, + NonSemanticShaderDebugInfo100FlagIsEnumClass = 0x4000, + NonSemanticShaderDebugInfo100FlagTypePassByValue = 0x8000, + NonSemanticShaderDebugInfo100FlagTypePassByReference = 0x10000, + NonSemanticShaderDebugInfo100FlagUnknownPhysicalLayout = 0x20000, + NonSemanticShaderDebugInfo100DebugInfoFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100BuildIdentifierFlags { + NonSemanticShaderDebugInfo100IdentifierPossibleDuplicates = 0x01, + NonSemanticShaderDebugInfo100BuildIdentifierFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncoding { + NonSemanticShaderDebugInfo100Unspecified = 0, + NonSemanticShaderDebugInfo100Address = 1, + NonSemanticShaderDebugInfo100Boolean = 2, + NonSemanticShaderDebugInfo100Float = 3, + NonSemanticShaderDebugInfo100Signed = 4, + NonSemanticShaderDebugInfo100SignedChar = 5, + NonSemanticShaderDebugInfo100Unsigned = 6, + NonSemanticShaderDebugInfo100UnsignedChar = 7, + NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncodingMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugCompositeType { + NonSemanticShaderDebugInfo100Class = 0, + NonSemanticShaderDebugInfo100Structure = 1, + NonSemanticShaderDebugInfo100Union = 2, + NonSemanticShaderDebugInfo100DebugCompositeTypeMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugTypeQualifier { + NonSemanticShaderDebugInfo100ConstType = 0, + NonSemanticShaderDebugInfo100VolatileType = 1, + NonSemanticShaderDebugInfo100RestrictType = 2, + NonSemanticShaderDebugInfo100AtomicType = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifierMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugOperation { + NonSemanticShaderDebugInfo100Deref = 0, + NonSemanticShaderDebugInfo100Plus = 1, + NonSemanticShaderDebugInfo100Minus = 2, + NonSemanticShaderDebugInfo100PlusUconst = 3, + NonSemanticShaderDebugInfo100BitPiece = 4, + NonSemanticShaderDebugInfo100Swap = 5, + NonSemanticShaderDebugInfo100Xderef = 6, + NonSemanticShaderDebugInfo100StackValue = 7, + NonSemanticShaderDebugInfo100Constu = 8, + NonSemanticShaderDebugInfo100Fragment = 9, + NonSemanticShaderDebugInfo100DebugOperationMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugImportedEntity { + NonSemanticShaderDebugInfo100ImportedModule = 0, + NonSemanticShaderDebugInfo100ImportedDeclaration = 1, + NonSemanticShaderDebugInfo100DebugImportedEntityMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SPVRemapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SPVRemapper.h new file mode 100644 index 0000000..d216946 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SPVRemapper.h @@ -0,0 +1,312 @@ +// +// Copyright (C) 2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef SPIRVREMAPPER_H +#define SPIRVREMAPPER_H + +#include +#include +#include +#include + +namespace spv { + +// MSVC defines __cplusplus as an older value, even when it supports almost all of 11. +// We handle that here by making our own symbol. +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1700) +# define use_cpp11 1 +#endif + +class spirvbin_base_t +{ +public: + enum Options { + NONE = 0, + STRIP = (1<<0), + MAP_TYPES = (1<<1), + MAP_NAMES = (1<<2), + MAP_FUNCS = (1<<3), + DCE_FUNCS = (1<<4), + DCE_VARS = (1<<5), + DCE_TYPES = (1<<6), + OPT_LOADSTORE = (1<<7), + OPT_FWD_LS = (1<<8), // EXPERIMENTAL: PRODUCES INVALID SCHEMA-0 SPIRV + MAP_ALL = (MAP_TYPES | MAP_NAMES | MAP_FUNCS), + DCE_ALL = (DCE_FUNCS | DCE_VARS | DCE_TYPES), + OPT_ALL = (OPT_LOADSTORE), + + ALL_BUT_STRIP = (MAP_ALL | DCE_ALL | OPT_ALL), + DO_EVERYTHING = (STRIP | ALL_BUT_STRIP) + }; +}; + +} // namespace SPV + +#if !defined (use_cpp11) +#include +#include + +namespace spv { +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int /*verbose = 0*/) { } + + void remap(std::vector& /*spv*/, unsigned int /*opts = 0*/) + { + printf("Tool not compiled for C++11, which is required for SPIR-V remapping.\n"); + exit(5); + } +}; + +} // namespace SPV + +#else // defined (use_cpp11) + +#include +#include +#include +#include +#include +#include +#include + +#include "spirv.hpp" +#include "spvIR.h" + +namespace spv { + +// class to hold SPIR-V binary data for remapping, DCE, and debug stripping +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int verbose = 0) : entryPoint(spv::NoResult), largestNewId(0), verbose(verbose), errorLatch(false) + { } + + virtual ~spirvbin_t() { } + + // remap on an existing binary in memory + void remap(std::vector& spv, const std::vector& whiteListStrings, + std::uint32_t opts = DO_EVERYTHING); + + // remap on an existing binary in memory - legacy interface without white list + void remap(std::vector& spv, std::uint32_t opts = DO_EVERYTHING); + + // Type for error/log handler functions + typedef std::function errorfn_t; + typedef std::function logfn_t; + + // Register error/log handling functions (can be lambda fn / functor / etc) + static void registerErrorHandler(errorfn_t handler) { errorHandler = handler; } + static void registerLogHandler(logfn_t handler) { logHandler = handler; } + +protected: + // This can be overridden to provide other message behavior if needed + virtual void msg(int minVerbosity, int indent, const std::string& txt) const; + +private: + // Local to global, or global to local ID map + typedef std::unordered_map idmap_t; + typedef std::unordered_set idset_t; + typedef std::unordered_map blockmap_t; + + void remap(std::uint32_t opts = DO_EVERYTHING); + + // Map of names to IDs + typedef std::unordered_map namemap_t; + + typedef std::uint32_t spirword_t; + + typedef std::pair range_t; + typedef std::function idfn_t; + typedef std::function instfn_t; + + // Special Values for ID map: + static const spv::Id unmapped; // unchanged from default value + static const spv::Id unused; // unused ID + static const int header_size; // SPIR header = 5 words + + class id_iterator_t; + + // For mapping type entries between different shaders + typedef std::vector typeentry_t; + typedef std::map globaltypes_t; + + // A set that preserves position order, and a reverse map + typedef std::set posmap_t; + typedef std::unordered_map posmap_rev_t; + + // Maps and ID to the size of its base type, if known. + typedef std::unordered_map typesize_map_t; + + // handle error + void error(const std::string& txt) const { errorLatch = true; errorHandler(txt); } + + bool isConstOp(spv::Op opCode) const; + bool isTypeOp(spv::Op opCode) const; + bool isStripOp(spv::Op opCode) const; + bool isFlowCtrl(spv::Op opCode) const; + range_t literalRange(spv::Op opCode) const; + range_t typeRange(spv::Op opCode) const; + range_t constRange(spv::Op opCode) const; + unsigned typeSizeInWords(spv::Id id) const; + unsigned idTypeSizeInWords(spv::Id id) const; + + bool isStripOp(spv::Op opCode, unsigned start) const; + + spv::Id& asId(unsigned word) { return spv[word]; } + const spv::Id& asId(unsigned word) const { return spv[word]; } + spv::Op asOpCode(unsigned word) const { return opOpCode(spv[word]); } + std::uint32_t asOpCodeHash(unsigned word); + spv::Decoration asDecoration(unsigned word) const { return spv::Decoration(spv[word]); } + unsigned asWordCount(unsigned word) const { return opWordCount(spv[word]); } + spv::Id asTypeConstId(unsigned word) const { return asId(word + (isTypeOp(asOpCode(word)) ? 1 : 2)); } + unsigned idPos(spv::Id id) const; + + static unsigned opWordCount(spirword_t data) { return data >> spv::WordCountShift; } + static spv::Op opOpCode(spirword_t data) { return spv::Op(data & spv::OpCodeMask); } + + // Header access & set methods + spirword_t magic() const { return spv[0]; } // return magic number + spirword_t bound() const { return spv[3]; } // return Id bound from header + spirword_t bound(spirword_t b) { return spv[3] = b; } + spirword_t genmagic() const { return spv[2]; } // generator magic + spirword_t genmagic(spirword_t m) { return spv[2] = m; } + spirword_t schemaNum() const { return spv[4]; } // schema number from header + + // Mapping fns: get + spv::Id localId(spv::Id id) const { return idMapL[id]; } + + // Mapping fns: set + inline spv::Id localId(spv::Id id, spv::Id newId); + void countIds(spv::Id id); + + // Return next unused new local ID. + // NOTE: boost::dynamic_bitset would be more efficient due to find_next(), + // which std::vector doens't have. + inline spv::Id nextUnusedId(spv::Id id); + + void buildLocalMaps(); + std::string literalString(unsigned word) const; // Return literal as a std::string + int literalStringWords(const std::string& str) const { return (int(str.size())+4)/4; } + + bool isNewIdMapped(spv::Id newId) const { return isMapped(newId); } + bool isOldIdUnmapped(spv::Id oldId) const { return localId(oldId) == unmapped; } + bool isOldIdUnused(spv::Id oldId) const { return localId(oldId) == unused; } + bool isOldIdMapped(spv::Id oldId) const { return !isOldIdUnused(oldId) && !isOldIdUnmapped(oldId); } + bool isFunction(spv::Id oldId) const { return fnPos.find(oldId) != fnPos.end(); } + + // bool matchType(const globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const; + // spv::Id findType(const globaltypes_t& globalTypes, spv::Id lt) const; + std::uint32_t hashType(unsigned typeStart) const; + + spirvbin_t& process(instfn_t, idfn_t, unsigned begin = 0, unsigned end = 0); + int processInstruction(unsigned word, instfn_t, idfn_t); + + void validate() const; + void mapTypeConst(); + void mapFnBodies(); + void optLoadStore(); + void dceFuncs(); + void dceVars(); + void dceTypes(); + void mapNames(); + void foldIds(); // fold IDs to smallest space + void forwardLoadStores(); // load store forwarding (EXPERIMENTAL) + void offsetIds(); // create relative offset IDs + + void applyMap(); // remap per local name map + void mapRemainder(); // map any IDs we haven't touched yet + void stripDebug(); // strip all debug info + void stripDeadRefs(); // strips debug info for now-dead references after DCE + void strip(); // remove debug symbols + + std::vector spv; // SPIR words + + std::vector stripWhiteList; + + namemap_t nameMap; // ID names from OpName + + // Since we want to also do binary ops, we can't use std::vector. we could use + // boost::dynamic_bitset, but we're trying to avoid a boost dependency. + typedef std::uint64_t bits_t; + std::vector mapped; // which new IDs have been mapped + static const int mBits = sizeof(bits_t) * 4; + + bool isMapped(spv::Id id) const { return id < maxMappedId() && ((mapped[id/mBits] & (1LL<<(id%mBits))) != 0); } + void setMapped(spv::Id id) { resizeMapped(id); mapped[id/mBits] |= (1LL<<(id%mBits)); } + void resizeMapped(spv::Id id) { if (id >= maxMappedId()) mapped.resize(id/mBits+1, 0); } + size_t maxMappedId() const { return mapped.size() * mBits; } + + // Add a strip range for a given instruction starting at 'start' + // Note: avoiding brace initializers to please older versions os MSVC. + void stripInst(unsigned start) { stripRange.push_back(range_t(start, start + asWordCount(start))); } + + // Function start and end. use unordered_map because we'll have + // many fewer functions than IDs. + std::unordered_map fnPos; + + // Which functions are called, anywhere in the module, with a call count + std::unordered_map fnCalls; + + posmap_t typeConstPos; // word positions that define types & consts (ordered) + posmap_rev_t idPosR; // reverse map from IDs to positions + typesize_map_t idTypeSizeMap; // maps each ID to its type size, if known. + + std::vector idMapL; // ID {M}ap from {L}ocal to {G}lobal IDs + + spv::Id entryPoint; // module entry point + spv::Id largestNewId; // biggest new ID we have mapped anything to + + // Sections of the binary to strip, given as [begin,end) + std::vector stripRange; + + // processing options: + std::uint32_t options; + int verbose; // verbosity level + + // Error latch: this is set if the error handler is ever executed. It would be better to + // use a try/catch block and throw, but that's not desired for certain environments, so + // this is the alternative. + mutable bool errorLatch; + + static errorfn_t errorHandler; + static logfn_t logHandler; +}; + +} // namespace SPV + +#endif // defined (use_cpp11) +#endif // SPIRVREMAPPER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SpvBuilder.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SpvBuilder.h new file mode 100644 index 0000000..f7fdc6a --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SpvBuilder.h @@ -0,0 +1,959 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// Copyright (C) 2015-2020 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// "Builder" is an interface to fully build SPIR-V IR. Allocate one of +// these to build (a thread safe) internal SPIR-V representation (IR), +// and then dump it as a binary stream according to the SPIR-V specification. +// +// A Builder has a 1:1 relationship with a SPIR-V module. +// + +#pragma once +#ifndef SpvBuilder_H +#define SpvBuilder_H + +#include "Logger.h" +#include "spirv.hpp" +#include "spvIR.h" +namespace spv { + #include "GLSL.ext.KHR.h" + #include "NonSemanticShaderDebugInfo100.h" +} + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +typedef enum { + Spv_1_0 = (1 << 16), + Spv_1_1 = (1 << 16) | (1 << 8), + Spv_1_2 = (1 << 16) | (2 << 8), + Spv_1_3 = (1 << 16) | (3 << 8), + Spv_1_4 = (1 << 16) | (4 << 8), + Spv_1_5 = (1 << 16) | (5 << 8), +} SpvVersion; + +class Builder { +public: + Builder(unsigned int spvVersion, unsigned int userNumber, SpvBuildLogger* logger); + virtual ~Builder(); + + static const int maxMatrixSize = 4; + + unsigned int getSpvVersion() const { return spvVersion; } + + void setSource(spv::SourceLanguage lang, int version) + { + sourceLang = lang; + sourceVersion = version; + } + spv::Id getStringId(const std::string& str) + { + auto sItr = stringIds.find(str); + if (sItr != stringIds.end()) + return sItr->second; + spv::Id strId = getUniqueId(); + Instruction* fileString = new Instruction(strId, NoType, OpString); + const char* file_c_str = str.c_str(); + fileString->addStringOperand(file_c_str); + strings.push_back(std::unique_ptr(fileString)); + module.mapInstruction(fileString); + stringIds[file_c_str] = strId; + return strId; + } + spv::Id getSourceFile() const + { + return sourceFileStringId; + } + void setSourceFile(const std::string& file) + { + sourceFileStringId = getStringId(file); + currentFileId = sourceFileStringId; + } + void setSourceText(const std::string& text) { sourceText = text; } + void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); } + void addModuleProcessed(const std::string& p) { moduleProcesses.push_back(p.c_str()); } + void setEmitOpLines() { emitOpLines = true; } + void setEmitNonSemanticShaderDebugInfo(bool const emit) + { + emitNonSemanticShaderDebugInfo = emit; + + if(emit) + { + importNonSemanticShaderDebugInfoInstructions(); + } + } + void setEmitNonSemanticShaderDebugSource(bool const src) + { + emitNonSemanticShaderDebugSource = src; + } + void addExtension(const char* ext) { extensions.insert(ext); } + void removeExtension(const char* ext) + { + extensions.erase(ext); + } + void addIncorporatedExtension(const char* ext, SpvVersion incorporatedVersion) + { + if (getSpvVersion() < static_cast(incorporatedVersion)) + addExtension(ext); + } + void promoteIncorporatedExtension(const char* baseExt, const char* promoExt, SpvVersion incorporatedVersion) + { + removeExtension(baseExt); + addIncorporatedExtension(promoExt, incorporatedVersion); + } + void addInclude(const std::string& name, const std::string& text) + { + spv::Id incId = getStringId(name); + includeFiles[incId] = &text; + } + Id import(const char*); + void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem) + { + addressModel = addr; + memoryModel = mem; + } + + void addCapability(spv::Capability cap) { capabilities.insert(cap); } + + // To get a new for anything needing a new one. + Id getUniqueId() { return ++uniqueId; } + + // To get a set of new s, e.g., for a set of function parameters + Id getUniqueIds(int numIds) + { + Id id = uniqueId + 1; + uniqueId += numIds; + return id; + } + + // Generate OpLine for non-filename-based #line directives (ie no filename + // seen yet): Log the current line, and if different than the last one, + // issue a new OpLine using the new line and current source file name. + void setLine(int line); + + // If filename null, generate OpLine for non-filename-based line directives, + // else do filename-based: Log the current line and file, and if different + // than the last one, issue a new OpLine using the new line and file + // name. + void setLine(int line, const char* filename); + // Low-level OpLine. See setLine() for a layered helper. + void addLine(Id fileName, int line, int column); + void addDebugScopeAndLine(Id fileName, int line, int column); + + // For creating new types (will return old type if the requested one was already made). + Id makeVoidType(); + Id makeBoolType(bool const compilerGenerated = true); + Id makePointer(StorageClass, Id pointee); + Id makeForwardPointer(StorageClass); + Id makePointerFromForwardPointer(StorageClass, Id forwardPointerType, Id pointee); + Id makeIntegerType(int width, bool hasSign); // generic + Id makeIntType(int width) { return makeIntegerType(width, true); } + Id makeUintType(int width) { return makeIntegerType(width, false); } + Id makeFloatType(int width); + Id makeStructType(const std::vector& members, const char* name, bool const compilerGenerated = true); + Id makeStructResultType(Id type0, Id type1); + Id makeVectorType(Id component, int size); + Id makeMatrixType(Id component, int cols, int rows); + Id makeArrayType(Id element, Id sizeId, int stride); // 0 stride means no stride decoration + Id makeRuntimeArray(Id element); + Id makeFunctionType(Id returnType, const std::vector& paramTypes); + Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format); + Id makeSamplerType(); + Id makeSampledImageType(Id imageType); + Id makeCooperativeMatrixType(Id component, Id scope, Id rows, Id cols); + Id makeGenericType(spv::Op opcode, std::vector& operands); + + // SPIR-V NonSemantic Shader DebugInfo Instructions + struct DebugTypeLoc { + std::string name {}; + int line {0}; + int column {0}; + }; + std::unordered_map debugTypeLocs; + Id makeDebugInfoNone(); + Id makeBoolDebugType(int const size); + Id makeIntegerDebugType(int const width, bool const hasSign); + Id makeFloatDebugType(int const width); + Id makeSequentialDebugType(Id const baseType, Id const componentCount, NonSemanticShaderDebugInfo100Instructions const sequenceType); + Id makeArrayDebugType(Id const baseType, Id const componentCount); + Id makeVectorDebugType(Id const baseType, int const componentCount); + Id makeMatrixDebugType(Id const vectorType, int const vectorCount, bool columnMajor = true); + Id makeMemberDebugType(Id const memberType, DebugTypeLoc const& debugTypeLoc); + Id makeCompositeDebugType(std::vector const& memberTypes, char const*const name, + NonSemanticShaderDebugInfo100DebugCompositeType const tag, bool const isOpaqueType = false); + Id makeDebugSource(const Id fileName); + Id makeDebugCompilationUnit(); + Id createDebugGlobalVariable(Id const type, char const*const name, Id const variable); + Id createDebugLocalVariable(Id type, char const*const name, size_t const argNumber = 0); + Id makeDebugExpression(); + Id makeDebugDeclare(Id const debugLocalVariable, Id const localVariable); + Id makeDebugValue(Id const debugLocalVariable, Id const value); + Id makeDebugFunctionType(Id returnType, const std::vector& paramTypes); + Id makeDebugFunction(Function* function, Id nameId, Id funcTypeId); + Id makeDebugLexicalBlock(uint32_t line); + std::string unmangleFunctionName(std::string const& name) const; + + // accelerationStructureNV type + Id makeAccelerationStructureType(); + // rayQueryEXT type + Id makeRayQueryType(); + + // For querying about types. + Id getTypeId(Id resultId) const { return module.getTypeId(resultId); } + Id getDerefTypeId(Id resultId) const; + Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); } + Op getTypeClass(Id typeId) const { return getOpCode(typeId); } + Op getMostBasicTypeClass(Id typeId) const; + int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); } + int getNumTypeConstituents(Id typeId) const; + int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); } + Id getScalarTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId, int) const; + StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); } + ImageFormat getImageTypeFormat(Id typeId) const + { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); } + Id getResultingAccessChainType() const; + + bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); } + bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); } + bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); } + bool isMatrix(Id resultId) const { return isMatrixType(getTypeId(resultId)); } + bool isCooperativeMatrix(Id resultId)const { return isCooperativeMatrixType(getTypeId(resultId)); } + bool isAggregate(Id resultId) const { return isAggregateType(getTypeId(resultId)); } + bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); } + + bool isBoolType(Id typeId) + { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); } + bool isIntType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) != 0; } + bool isUintType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) == 0; } + bool isFloatType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat; } + bool isPointerType(Id typeId) const { return getTypeClass(typeId) == OpTypePointer; } + bool isScalarType(Id typeId) const + { return getTypeClass(typeId) == OpTypeFloat || getTypeClass(typeId) == OpTypeInt || + getTypeClass(typeId) == OpTypeBool; } + bool isVectorType(Id typeId) const { return getTypeClass(typeId) == OpTypeVector; } + bool isMatrixType(Id typeId) const { return getTypeClass(typeId) == OpTypeMatrix; } + bool isStructType(Id typeId) const { return getTypeClass(typeId) == OpTypeStruct; } + bool isArrayType(Id typeId) const { return getTypeClass(typeId) == OpTypeArray; } +#ifdef GLSLANG_WEB + bool isCooperativeMatrixType(Id typeId)const { return false; } +#else + bool isCooperativeMatrixType(Id typeId)const { return getTypeClass(typeId) == OpTypeCooperativeMatrixNV; } +#endif + bool isAggregateType(Id typeId) const + { return isArrayType(typeId) || isStructType(typeId) || isCooperativeMatrixType(typeId); } + bool isImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeImage; } + bool isSamplerType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampler; } + bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; } + bool containsType(Id typeId, Op typeOp, unsigned int width) const; + bool containsPhysicalStorageBufferOrArray(Id typeId) const; + + bool isConstantOpCode(Op opcode) const; + bool isSpecConstantOpCode(Op opcode) const; + bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); } + bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; } + bool isSpecConstant(Id resultId) const { return isSpecConstantOpCode(getOpCode(resultId)); } + unsigned int getConstantScalar(Id resultId) const + { return module.getInstruction(resultId)->getImmediateOperand(0); } + StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); } + + bool isVariableOpCode(Op opcode) const { return opcode == OpVariable; } + bool isVariable(Id resultId) const { return isVariableOpCode(getOpCode(resultId)); } + bool isGlobalStorage(Id resultId) const { return getStorageClass(resultId) != StorageClassFunction; } + bool isGlobalVariable(Id resultId) const { return isVariable(resultId) && isGlobalStorage(resultId); } + // See if a resultId is valid for use as an initializer. + bool isValidInitializer(Id resultId) const { return isConstant(resultId) || isGlobalVariable(resultId); } + + bool isRayTracingOpCode(Op opcode) const; + + int getScalarTypeWidth(Id typeId) const + { + Id scalarTypeId = getScalarTypeId(typeId); + assert(getTypeClass(scalarTypeId) == OpTypeInt || getTypeClass(scalarTypeId) == OpTypeFloat); + return module.getInstruction(scalarTypeId)->getImmediateOperand(0); + } + + int getTypeNumColumns(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeConstituents(typeId); + } + int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); } + int getTypeNumRows(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeComponents(getContainedTypeId(typeId)); + } + int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); } + + Dim getTypeDimensionality(Id typeId) const + { + assert(isImageType(typeId)); + return (Dim)module.getInstruction(typeId)->getImmediateOperand(1); + } + Id getImageType(Id resultId) const + { + Id typeId = getTypeId(resultId); + assert(isImageType(typeId) || isSampledImageType(typeId)); + return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId; + } + bool isArrayedImageType(Id typeId) const + { + assert(isImageType(typeId)); + return module.getInstruction(typeId)->getImmediateOperand(3) != 0; + } + + // For making new constants (will return old constant if the requested one was already made). + Id makeNullConstant(Id typeId); + Id makeBoolConstant(bool b, bool specConstant = false); + Id makeInt8Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(8), (unsigned)i, specConstant); } + Id makeUint8Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(8), u, specConstant); } + Id makeInt16Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(16), (unsigned)i, specConstant); } + Id makeUint16Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(16), u, specConstant); } + Id makeIntConstant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(32), (unsigned)i, specConstant); } + Id makeUintConstant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(32), u, specConstant); } + Id makeInt64Constant(long long i, bool specConstant = false) + { return makeInt64Constant(makeIntType(64), (unsigned long long)i, specConstant); } + Id makeUint64Constant(unsigned long long u, bool specConstant = false) + { return makeInt64Constant(makeUintType(64), u, specConstant); } + Id makeFloatConstant(float f, bool specConstant = false); + Id makeDoubleConstant(double d, bool specConstant = false); + Id makeFloat16Constant(float f16, bool specConstant = false); + Id makeFpConstant(Id type, double d, bool specConstant = false); + + Id importNonSemanticShaderDebugInfoInstructions(); + + // Turn the array of constants into a proper spv constant of the requested type. + Id makeCompositeConstant(Id type, const std::vector& comps, bool specConst = false); + + // Methods for adding information outside the CFG. + Instruction* addEntryPoint(ExecutionModel, Function*, const char* name); + void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1); + void addExecutionMode(Function*, ExecutionMode mode, const std::vector& literals); + void addExecutionModeId(Function*, ExecutionMode mode, const std::vector& operandIds); + void addName(Id, const char* name); + void addMemberName(Id, int member, const char* name); + void addDecoration(Id, Decoration, int num = -1); + void addDecoration(Id, Decoration, const char*); + void addDecoration(Id, Decoration, const std::vector& literals); + void addDecoration(Id, Decoration, const std::vector& strings); + void addDecorationId(Id id, Decoration, Id idDecoration); + void addDecorationId(Id id, Decoration, const std::vector& operandIds); + void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1); + void addMemberDecoration(Id, unsigned int member, Decoration, const char*); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& literals); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& strings); + + // At the end of what block do the next create*() instructions go? + // Also reset current last DebugScope and current source line to unknown + void setBuildPoint(Block* bp) { + buildPoint = bp; + lastDebugScopeId = NoResult; + currentLine = 0; + } + Block* getBuildPoint() const { return buildPoint; } + + // Make the entry-point function. The returned pointer is only valid + // for the lifetime of this builder. + Function* makeEntryPoint(const char*); + + // Make a shader-style function, and create its entry block if entry is non-zero. + // Return the function, pass back the entry. + // The returned pointer is only valid for the lifetime of this builder. + Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, + const std::vector& paramTypes, const std::vector& paramNames, + const std::vector>& precisions, Block **entry = 0); + + // Create a return. An 'implicit' return is one not appearing in the source + // code. In the case of an implicit return, no post-return block is inserted. + void makeReturn(bool implicit, Id retVal = 0); + + // Initialize state and generate instructions for new lexical scope + void enterScope(uint32_t line); + + // Set state and generate instructions to exit current lexical scope + void leaveScope(); + + // Prepare builder for generation of instructions for a function. + void enterFunction(Function const* function); + + // Generate all the code needed to finish up a function. + void leaveFunction(); + + // Create block terminator instruction for certain statements like + // discard, terminate-invocation, terminateRayEXT, or ignoreIntersectionEXT + void makeStatementTerminator(spv::Op opcode, const char *name); + + // Create block terminator instruction for statements that have input operands + // such as OpEmitMeshTasksEXT + void makeStatementTerminator(spv::Op opcode, const std::vector& operands, const char* name); + + // Create a global or function local or IO variable. + Id createVariable(Decoration precision, StorageClass storageClass, Id type, const char* name = nullptr, + Id initializer = NoResult, bool const compilerGenerated = true); + + // Create an intermediate with an undefined value. + Id createUndefined(Id type); + + // Store into an Id and return the l-value + void createStore(Id rValue, Id lValue, spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Load from an Id and return it + Id createLoad(Id lValue, spv::Decoration precision, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Create an OpAccessChain instruction + Id createAccessChain(StorageClass, Id base, const std::vector& offsets); + + // Create an OpArrayLength instruction + Id createArrayLength(Id base, unsigned int member); + + // Create an OpCooperativeMatrixLengthNV instruction + Id createCooperativeMatrixLength(Id type); + + // Create an OpCompositeExtract instruction + Id createCompositeExtract(Id composite, Id typeId, unsigned index); + Id createCompositeExtract(Id composite, Id typeId, const std::vector& indexes); + Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index); + Id createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes); + + Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex); + Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex); + + void createNoResultOp(Op); + void createNoResultOp(Op, Id operand); + void createNoResultOp(Op, const std::vector& operands); + void createNoResultOp(Op, const std::vector& operands); + void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask); + void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics); + Id createUnaryOp(Op, Id typeId, Id operand); + Id createBinOp(Op, Id typeId, Id operand1, Id operand2); + Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createFunctionCall(spv::Function*, const std::vector&); + Id createSpecConstantOp(Op, Id typeId, const std::vector& operands, const std::vector& literals); + + // Take an rvalue (source) and a set of channels to extract from it to + // make a new rvalue, which is returned. + Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels); + + // Take a copy of an lvalue (target) and a source of components, and set the + // source components into the lvalue where the 'channels' say to put them. + // An updated version of the target is returned. + // (No true lvalue or stores are used.) + Id createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels); + + // If both the id and precision are valid, the id + // gets tagged with the requested precision. + // The passed in id is always the returned id, to simplify use patterns. + Id setPrecision(Id id, Decoration precision) + { + if (precision != NoPrecision && id != NoResult) + addDecoration(id, precision); + + return id; + } + + // Can smear a scalar to a vector for the following forms: + // - promoteScalar(scalar, vector) // smear scalar to width of vector + // - promoteScalar(vector, scalar) // smear scalar to width of vector + // - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to + // - promoteScalar(scalar, scalar) // do nothing + // Other forms are not allowed. + // + // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'. + // The type of the created vector is a vector of components of the same type as the scalar. + // + // Note: One of the arguments will change, with the result coming back that way rather than + // through the return value. + void promoteScalar(Decoration precision, Id& left, Id& right); + + // Make a value by smearing the scalar to fill the type. + // vectorType should be the correct type for making a vector of scalarVal. + // (No conversions are done.) + Id smearScalar(Decoration precision, Id scalarVal, Id vectorType); + + // Create a call to a built-in function. + Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args); + + // List of parameters used to create a texture operation + struct TextureParameters { + Id sampler; + Id coords; + Id bias; + Id lod; + Id Dref; + Id offset; + Id offsets; + Id gradX; + Id gradY; + Id sample; + Id component; + Id texelOut; + Id lodClamp; + Id granularity; + Id coarse; + bool nonprivate; + bool volatil; + }; + + // Select the correct texture operation based on all inputs, and emit the correct instruction + Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, + bool noImplicit, const TextureParameters&, ImageOperandsMask); + + // Emit the OpTextureQuery* instruction that was passed in. + // Figure out the right return value and type, and return it. + Id createTextureQueryCall(Op, const TextureParameters&, bool isUnsignedResult); + + Id createSamplePositionCall(Decoration precision, Id, Id); + + Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned); + Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id); + + // Reduction comparison for composites: For equal and not-equal resulting in a scalar. + Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */); + + // OpCompositeConstruct + Id createCompositeConstruct(Id typeId, const std::vector& constituents); + + // vector or scalar constructor + Id createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId); + + // matrix constructor + Id createMatrixConstructor(Decoration precision, const std::vector& sources, Id constructee); + + // Helper to use for building nested control flow with if-then-else. + class If { + public: + If(Id condition, unsigned int ctrl, Builder& builder); + ~If() {} + + void makeBeginElse(); + void makeEndIf(); + + private: + If(const If&); + If& operator=(If&); + + Builder& builder; + Id condition; + unsigned int control; + Function* function; + Block* headerBlock; + Block* thenBlock; + Block* elseBlock; + Block* mergeBlock; + }; + + // Make a switch statement. A switch has 'numSegments' of pieces of code, not containing + // any case/default labels, all separated by one or more case/default labels. Each possible + // case value v is a jump to the caseValues[v] segment. The defaultSegment is also in this + // number space. How to compute the value is given by 'condition', as in switch(condition). + // + // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches. + // + // Use a defaultSegment < 0 if there is no default segment (to branch to post switch). + // + // Returns the right set of basic blocks to start each code segment with, so that the caller's + // recursion stack can hold the memory for it. + // + void makeSwitch(Id condition, unsigned int control, int numSegments, const std::vector& caseValues, + const std::vector& valueToSegment, int defaultSegment, std::vector& segmentBB); + + // Add a branch to the innermost switch's merge block. + void addSwitchBreak(); + + // Move to the next code segment, passing in the return argument in makeSwitch() + void nextSwitchSegment(std::vector& segmentBB, int segment); + + // Finish off the innermost switch. + void endSwitch(std::vector& segmentBB); + + struct LoopBlocks { + LoopBlocks(Block& head, Block& body, Block& merge, Block& continue_target) : + head(head), body(body), merge(merge), continue_target(continue_target) { } + Block &head, &body, &merge, &continue_target; + private: + LoopBlocks(); + LoopBlocks& operator=(const LoopBlocks&) = delete; + }; + + // Start a new loop and prepare the builder to generate code for it. Until + // closeLoop() is called for this loop, createLoopContinue() and + // createLoopExit() will target its corresponding blocks. + LoopBlocks& makeNewLoop(); + + // Create a new block in the function containing the build point. Memory is + // owned by the function object. + Block& makeNewBlock(); + + // Add a branch to the continue_target of the current (innermost) loop. + void createLoopContinue(); + + // Add an exit (e.g. "break") from the innermost loop that we're currently + // in. + void createLoopExit(); + + // Close the innermost loop that you're in + void closeLoop(); + + // + // Access chain design for an R-Value vs. L-Value: + // + // There is a single access chain the builder is building at + // any particular time. Such a chain can be used to either to a load or + // a store, when desired. + // + // Expressions can be r-values, l-values, or both, or only r-values: + // a[b.c].d = .... // l-value + // ... = a[b.c].d; // r-value, that also looks like an l-value + // ++a[b.c].d; // r-value and l-value + // (x + y)[2]; // r-value only, can't possibly be l-value + // + // Computing an r-value means generating code. Hence, + // r-values should only be computed when they are needed, not speculatively. + // + // Computing an l-value means saving away information for later use in the compiler, + // no code is generated until the l-value is later dereferenced. It is okay + // to speculatively generate an l-value, just not okay to speculatively dereference it. + // + // The base of the access chain (the left-most variable or expression + // from which everything is based) can be set either as an l-value + // or as an r-value. Most efficient would be to set an l-value if one + // is available. If an expression was evaluated, the resulting r-value + // can be set as the chain base. + // + // The users of this single access chain can save and restore if they + // want to nest or manage multiple chains. + // + + struct AccessChain { + Id base; // for l-values, pointer to the base object, for r-values, the base object + std::vector indexChain; + Id instr; // cache the instruction that generates this access chain + std::vector swizzle; // each std::vector element selects the next GLSL component number + Id component; // a dynamic component index, can coexist with a swizzle, + // done after the swizzle, NoResult if not present + Id preSwizzleBaseType; // dereferenced type, before swizzle or component is applied; + // NoType unless a swizzle or component is present + bool isRValue; // true if 'base' is an r-value, otherwise, base is an l-value + unsigned int alignment; // bitwise OR of alignment values passed in. Accumulates worst alignment. + // Only tracks base and (optional) component selection alignment. + + // Accumulate whether anything in the chain of structures has coherent decorations. + struct CoherentFlags { + CoherentFlags() { clear(); } +#ifdef GLSLANG_WEB + void clear() { } + bool isVolatile() const { return false; } + CoherentFlags operator |=(const CoherentFlags &other) { return *this; } +#else + bool isVolatile() const { return volatil; } + bool isNonUniform() const { return nonUniform; } + bool anyCoherent() const { + return coherent || devicecoherent || queuefamilycoherent || workgroupcoherent || + subgroupcoherent || shadercallcoherent; + } + + unsigned coherent : 1; + unsigned devicecoherent : 1; + unsigned queuefamilycoherent : 1; + unsigned workgroupcoherent : 1; + unsigned subgroupcoherent : 1; + unsigned shadercallcoherent : 1; + unsigned nonprivate : 1; + unsigned volatil : 1; + unsigned isImage : 1; + unsigned nonUniform : 1; + + void clear() { + coherent = 0; + devicecoherent = 0; + queuefamilycoherent = 0; + workgroupcoherent = 0; + subgroupcoherent = 0; + shadercallcoherent = 0; + nonprivate = 0; + volatil = 0; + isImage = 0; + nonUniform = 0; + } + + CoherentFlags operator |=(const CoherentFlags &other) { + coherent |= other.coherent; + devicecoherent |= other.devicecoherent; + queuefamilycoherent |= other.queuefamilycoherent; + workgroupcoherent |= other.workgroupcoherent; + subgroupcoherent |= other.subgroupcoherent; + shadercallcoherent |= other.shadercallcoherent; + nonprivate |= other.nonprivate; + volatil |= other.volatil; + isImage |= other.isImage; + nonUniform |= other.nonUniform; + return *this; + } +#endif + }; + CoherentFlags coherentFlags; + }; + + // + // the SPIR-V builder maintains a single active chain that + // the following methods operate on + // + + // for external save and restore + AccessChain getAccessChain() { return accessChain; } + void setAccessChain(AccessChain newChain) { accessChain = newChain; } + + // clear accessChain + void clearAccessChain(); + + // set new base as an l-value base + void setAccessChainLValue(Id lValue) + { + assert(isPointer(lValue)); + accessChain.base = lValue; + } + + // set new base value as an r-value + void setAccessChainRValue(Id rValue) + { + accessChain.isRValue = true; + accessChain.base = rValue; + } + + // push offset onto the end of the chain + void accessChainPush(Id offset, AccessChain::CoherentFlags coherentFlags, unsigned int alignment) + { + accessChain.indexChain.push_back(offset); + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // push new swizzle onto the end of any existing swizzle, merging into a single swizzle + void accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType, + AccessChain::CoherentFlags coherentFlags, unsigned int alignment); + + // push a dynamic component selection onto the access chain, only applicable with a + // non-trivial swizzle or no swizzle + void accessChainPushComponent(Id component, Id preSwizzleBaseType, AccessChain::CoherentFlags coherentFlags, + unsigned int alignment) + { + if (accessChain.swizzle.size() != 1) { + accessChain.component = component; + if (accessChain.preSwizzleBaseType == NoType) + accessChain.preSwizzleBaseType = preSwizzleBaseType; + } + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // use accessChain and swizzle to store value + void accessChainStore(Id rvalue, Decoration nonUniform, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // use accessChain and swizzle to load an r-value + Id accessChainLoad(Decoration precision, Decoration l_nonUniform, Decoration r_nonUniform, Id ResultType, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, spv::Scope scope = spv::ScopeMax, + unsigned int alignment = 0); + + // Return whether or not the access chain can be represented in SPIR-V + // as an l-value. + // E.g., a[3].yx cannot be, while a[3].y and a[3].y[x] can be. + bool isSpvLvalue() const { return accessChain.swizzle.size() <= 1; } + + // get the direct pointer for an l-value + Id accessChainGetLValue(); + + // Get the inferred SPIR-V type of the result of the current access chain, + // based on the type of the base and the chain of dereferences. + Id accessChainGetInferredType(); + + // Add capabilities, extensions, remove unneeded decorations, etc., + // based on the resulting SPIR-V. + void postProcess(); + + // Prune unreachable blocks in the CFG and remove unneeded decorations. + void postProcessCFG(); + +#ifndef GLSLANG_WEB + // Add capabilities, extensions based on instructions in the module. + void postProcessFeatures(); + // Hook to visit each instruction in a block in a function + void postProcess(Instruction&); + // Hook to visit each non-32-bit sized float/int operation in a block. + void postProcessType(const Instruction&, spv::Id typeId); +#endif + + void dump(std::vector&) const; + + void createBranch(Block* block); + void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); + void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, + const std::vector& operands); + + // Sets to generate opcode for specialization constants. + void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; } + // Sets to generate opcode for non-specialization constants (normal mode). + void setToNormalCodeGenMode() { generatingOpCodeForSpecConst = false; } + // Check if the builder is generating code for spec constants. + bool isInSpecConstCodeGenMode() { return generatingOpCodeForSpecConst; } + + protected: + Id makeIntConstant(Id typeId, unsigned value, bool specConstant); + Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2); + Id findCompositeConstant(Op typeClass, Id typeId, const std::vector& comps); + Id findStructConstant(Id typeId, const std::vector& comps); + Id collapseAccessChain(); + void remapDynamicSwizzle(); + void transferAccessChainSwizzle(bool dynamic); + void simplifyAccessChainSwizzle(); + void createAndSetNoPredecessorBlock(const char*); + void createSelectionMerge(Block* mergeBlock, unsigned int control); + void dumpSourceInstructions(std::vector&) const; + void dumpSourceInstructions(const spv::Id fileId, const std::string& text, std::vector&) const; + void dumpInstructions(std::vector&, const std::vector >&) const; + void dumpModuleProcesses(std::vector&) const; + spv::MemoryAccessMask sanitizeMemoryAccessForStorageClass(spv::MemoryAccessMask memoryAccess, StorageClass sc) + const; + + unsigned int spvVersion; // the version of SPIR-V to emit in the header + SourceLanguage sourceLang; + int sourceVersion; + spv::Id sourceFileStringId; + spv::Id nonSemanticShaderCompilationUnitId {0}; + spv::Id nonSemanticShaderDebugInfo {0}; + spv::Id debugInfoNone {0}; + spv::Id debugExpression {0}; // Debug expression with zero operations. + std::string sourceText; + int currentLine; + const char* currentFile; + spv::Id currentFileId; + std::stack currentDebugScopeId; + spv::Id lastDebugScopeId; + bool emitOpLines; + bool emitNonSemanticShaderDebugInfo; + bool restoreNonSemanticShaderDebugInfo; + bool emitNonSemanticShaderDebugSource; + std::set extensions; + std::vector sourceExtensions; + std::vector moduleProcesses; + AddressingModel addressModel; + MemoryModel memoryModel; + std::set capabilities; + int builderNumber; + Module module; + Block* buildPoint; + Id uniqueId; + Function* entryPointFunction; + bool generatingOpCodeForSpecConst; + AccessChain accessChain; + + // special blocks of instructions for output + std::vector > strings; + std::vector > imports; + std::vector > entryPoints; + std::vector > executionModes; + std::vector > names; + std::vector > decorations; + std::vector > constantsTypesGlobals; + std::vector > externals; + std::vector > functions; + + // not output, internally used for quick & dirty canonical (unique) creation + + // map type opcodes to constant inst. + std::unordered_map> groupedConstants; + // map struct-id to constant instructions + std::unordered_map> groupedStructConstants; + // map type opcodes to type instructions + std::unordered_map> groupedTypes; + // map type opcodes to debug type instructions + std::unordered_map> groupedDebugTypes; + // list of OpConstantNull instructions + std::vector nullConstants; + + // stack of switches + std::stack switchMerges; + + // Our loop stack. + std::stack loops; + + // map from strings to their string ids + std::unordered_map stringIds; + + // map from include file name ids to their contents + std::map includeFiles; + + // map from core id to debug id + std::map debugId; + + // map from file name string id to DebugSource id + std::unordered_map debugSourceId; + + // The stream for outputting warnings and errors. + SpvBuildLogger* logger; +}; // end Builder class + +}; // end spv namespace + +#endif // SpvBuilder_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SpvTools.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SpvTools.h new file mode 100644 index 0000000..5386048 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/SpvTools.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2014-2016 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Call into SPIRV-Tools to disassemble, validate, and optimize. +// + +#pragma once +#ifndef GLSLANG_SPV_TOOLS_H +#define GLSLANG_SPV_TOOLS_H + +#if ENABLE_OPT +#include +#include +#include "spirv-tools/libspirv.h" +#endif + +#include "glslang/MachineIndependent/localintermediate.h" +#include "Logger.h" + +namespace glslang { + +struct SpvOptions { + bool generateDebugInfo {false}; + bool stripDebugInfo {false}; + bool disableOptimizer {true}; + bool optimizeSize {false}; + bool disassemble {false}; + bool validate {false}; + bool emitNonSemanticShaderDebugInfo {false}; + bool emitNonSemanticShaderDebugSource{ false }; +}; + +#if ENABLE_OPT + +// Use the SPIRV-Tools disassembler to print SPIR-V using a SPV_ENV_UNIVERSAL_1_3 environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv); + +// Use the SPIRV-Tools disassembler to print SPIR-V with a provided SPIR-V environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv, + spv_target_env requested_context); + +// Apply the SPIRV-Tools validator to generated SPIR-V. +void SpirvToolsValidate(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, bool prelegalization); + +// Apply the SPIRV-Tools optimizer to generated SPIR-V. HLSL SPIR-V is legalized in the process. +void SpirvToolsTransform(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, const SpvOptions*); + +// Apply the SPIRV-Tools optimizer to strip debug info from SPIR-V. This is implicitly done by +// SpirvToolsTransform if spvOptions->stripDebugInfo is set, but can be called separately if +// optimization is disabled. +void SpirvToolsStripDebugInfo(const glslang::TIntermediate& intermediate, + std::vector& spirv, spv::SpvBuildLogger*); + +#endif + +} // end namespace glslang + +#endif // GLSLANG_SPV_TOOLS_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/bitutils.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/bitutils.h new file mode 100644 index 0000000..22e44ce --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/bitutils.h @@ -0,0 +1,81 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_BITUTILS_H_ +#define LIBSPIRV_UTIL_BITUTILS_H_ + +#include +#include + +namespace spvutils { + +// Performs a bitwise copy of source to the destination type Dest. +template +Dest BitwiseCast(Src source) { + Dest dest; + static_assert(sizeof(source) == sizeof(dest), + "BitwiseCast: Source and destination must have the same size"); + std::memcpy(static_cast(&dest), &source, sizeof(dest)); + return dest; +} + +// SetBits returns an integer of type with bits set +// for position through , counting from the least +// significant bit. In particular when Num == 0, no positions are set to 1. +// A static assert will be triggered if First + Num > sizeof(T) * 8, that is, +// a bit that will not fit in the underlying type is set. +template +struct SetBits { + static_assert(First < sizeof(T) * 8, + "Tried to set a bit that is shifted too far."); + const static T get = (T(1) << First) | SetBits::get; +}; + +template +struct SetBits { + const static T get = T(0); +}; + +// This is all compile-time so we can put our tests right here. +static_assert(SetBits::get == uint32_t(0x00000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000001), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x80000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000006), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xc0000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x7FFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFF0000), + "SetBits failed"); + +static_assert(SetBits::get == uint64_t(0x0000000000000001LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x8000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0xc000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x0000000080000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x00000000FFFF0000LL), + "SetBits failed"); + +} // namespace spvutils + +#endif // LIBSPIRV_UTIL_BITUTILS_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/disassemble.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/disassemble.h new file mode 100644 index 0000000..b6a4635 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/disassemble.h @@ -0,0 +1,53 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Disassembler for SPIR-V. +// + +#pragma once +#ifndef disassembler_H +#define disassembler_H + +#include +#include + +namespace spv { + + // disassemble with glslang custom disassembler + void Disassemble(std::ostream& out, const std::vector&); + +} // end namespace spv + +#endif // disassembler_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/doc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/doc.h new file mode 100644 index 0000000..2a0b28c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/doc.h @@ -0,0 +1,259 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Parameterize the SPIR-V enumerants. +// + +#pragma once + +#include "spirv.hpp" + +#include + +namespace spv { + +// Fill in all the parameters +void Parameterize(); + +// Return the English names of all the enums. +const char* SourceString(int); +const char* AddressingString(int); +const char* MemoryString(int); +const char* ExecutionModelString(int); +const char* ExecutionModeString(int); +const char* StorageClassString(int); +const char* DecorationString(int); +const char* BuiltInString(int); +const char* DimensionString(int); +const char* SelectControlString(int); +const char* LoopControlString(int); +const char* FunctionControlString(int); +const char* SamplerAddressingModeString(int); +const char* SamplerFilterModeString(int); +const char* ImageFormatString(int); +const char* ImageChannelOrderString(int); +const char* ImageChannelTypeString(int); +const char* ImageChannelDataTypeString(int type); +const char* ImageOperandsString(int format); +const char* ImageOperands(int); +const char* FPFastMathString(int); +const char* FPRoundingModeString(int); +const char* LinkageTypeString(int); +const char* FuncParamAttrString(int); +const char* AccessQualifierString(int); +const char* MemorySemanticsString(int); +const char* MemoryAccessString(int); +const char* ExecutionScopeString(int); +const char* GroupOperationString(int); +const char* KernelEnqueueFlagsString(int); +const char* KernelProfilingInfoString(int); +const char* CapabilityString(int); +const char* OpcodeString(int); +const char* ScopeString(int mem); + +// For grouping opcodes into subsections +enum OpcodeClass { + OpClassMisc, + OpClassDebug, + OpClassAnnotate, + OpClassExtension, + OpClassMode, + OpClassType, + OpClassConstant, + OpClassMemory, + OpClassFunction, + OpClassImage, + OpClassConvert, + OpClassComposite, + OpClassArithmetic, + OpClassBit, + OpClassRelationalLogical, + OpClassDerivative, + OpClassFlowControl, + OpClassAtomic, + OpClassPrimitive, + OpClassBarrier, + OpClassGroup, + OpClassDeviceSideEnqueue, + OpClassPipe, + + OpClassCount, + OpClassMissing // all instructions start out as missing +}; + +// For parameterizing operands. +enum OperandClass { + OperandNone, + OperandId, + OperandVariableIds, + OperandOptionalLiteral, + OperandOptionalLiteralString, + OperandVariableLiterals, + OperandVariableIdLiteral, + OperandVariableLiteralId, + OperandLiteralNumber, + OperandLiteralString, + OperandVariableLiteralStrings, + OperandSource, + OperandExecutionModel, + OperandAddressing, + OperandMemory, + OperandExecutionMode, + OperandStorage, + OperandDimensionality, + OperandSamplerAddressingMode, + OperandSamplerFilterMode, + OperandSamplerImageFormat, + OperandImageChannelOrder, + OperandImageChannelDataType, + OperandImageOperands, + OperandFPFastMath, + OperandFPRoundingMode, + OperandLinkageType, + OperandAccessQualifier, + OperandFuncParamAttr, + OperandDecoration, + OperandBuiltIn, + OperandSelect, + OperandLoop, + OperandFunction, + OperandMemorySemantics, + OperandMemoryAccess, + OperandScope, + OperandGroupOperation, + OperandKernelEnqueueFlags, + OperandKernelProfilingInfo, + OperandCapability, + + OperandOpcode, + + OperandCount +}; + +// Any specific enum can have a set of capabilities that allow it: +typedef std::vector EnumCaps; + +// Parameterize a set of operands with their OperandClass(es) and descriptions. +class OperandParameters { +public: + OperandParameters() { } + void push(OperandClass oc, const char* d, bool opt = false) + { + opClass.push_back(oc); + desc.push_back(d); + optional.push_back(opt); + } + void setOptional(); + OperandClass getClass(int op) const { return opClass[op]; } + const char* getDesc(int op) const { return desc[op]; } + bool isOptional(int op) const { return optional[op]; } + int getNum() const { return (int)opClass.size(); } + +protected: + std::vector opClass; + std::vector desc; + std::vector optional; +}; + +// Parameterize an enumerant +class EnumParameters { +public: + EnumParameters() : desc(0) { } + const char* desc; +}; + +// Parameterize a set of enumerants that form an enum +class EnumDefinition : public EnumParameters { +public: + EnumDefinition() : + ceiling(0), bitmask(false), getName(0), enumParams(0), operandParams(0) { } + void set(int ceil, const char* (*name)(int), EnumParameters* ep, bool mask = false) + { + ceiling = ceil; + getName = name; + bitmask = mask; + enumParams = ep; + } + void setOperands(OperandParameters* op) { operandParams = op; } + int ceiling; // ceiling of enumerants + bool bitmask; // true if these enumerants combine into a bitmask + const char* (*getName)(int); // a function that returns the name for each enumerant value (or shift) + EnumParameters* enumParams; // parameters for each individual enumerant + OperandParameters* operandParams; // sets of operands +}; + +// Parameterize an instruction's logical format, including its known set of operands, +// per OperandParameters above. +class InstructionParameters { +public: + InstructionParameters() : + opDesc("TBD"), + opClass(OpClassMissing), + typePresent(true), // most normal, only exceptions have to be spelled out + resultPresent(true) // most normal, only exceptions have to be spelled out + { } + + void setResultAndType(bool r, bool t) + { + resultPresent = r; + typePresent = t; + } + + bool hasResult() const { return resultPresent != 0; } + bool hasType() const { return typePresent != 0; } + + const char* opDesc; + OpcodeClass opClass; + OperandParameters operands; + +protected: + int typePresent : 1; + int resultPresent : 1; +}; + +// The set of objects that hold all the instruction/operand +// parameterization information. +extern InstructionParameters InstructionDesc[]; + +// These hold definitions of the enumerants used for operands +extern EnumDefinition OperandClassParams[]; + +const char* GetOperandDesc(OperandClass operand); +void PrintImmediateRow(int imm, const char* name, const EnumParameters* enumParams, bool caps, bool hex = false); +const char* AccessQualifierString(int attr); + +void PrintOperands(const OperandParameters& operands, int reservedOperands); + +} // end namespace spv diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/hex_float.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/hex_float.h new file mode 100644 index 0000000..8be8e9f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/hex_float.h @@ -0,0 +1,1078 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ +#define LIBSPIRV_UTIL_HEX_FLOAT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER < 1800 +namespace std { +bool isnan(double f) +{ + return ::_isnan(f) != 0; +} +bool isinf(double f) +{ + return ::_finite(f) == 0; +} +} +#endif + +#include "bitutils.h" + +namespace spvutils { + +class Float16 { + public: + Float16(uint16_t v) : val(v) {} + Float16() {} + static bool isNan(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); + } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); + } + Float16(const Float16& other) { val = other.val; } + uint16_t get_value() const { return val; } + + // Returns the maximum normal value. + static Float16 max() { return Float16(0x7bff); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16(0xfbff); } + + private: + uint16_t val; +}; + +// To specialize this type, you must override uint_type to define +// an unsigned integer that can fit your floating point type. +// You must also add a isNan function that returns true if +// a value is Nan. +template +struct FloatProxyTraits { + typedef void uint_type; +}; + +template <> +struct FloatProxyTraits { + typedef uint32_t uint_type; + static bool isNan(float f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(float f) { return std::isinf(f); } + // Returns the maximum normal value. + static float max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static float lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint64_t uint_type; + static bool isNan(double f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(double f) { return std::isinf(f); } + // Returns the maximum normal value. + static double max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static double lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint16_t uint_type; + static bool isNan(Float16 f) { return Float16::isNan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } + // Returns the maximum normal value. + static Float16 max() { return Float16::max(); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16::lowest(); } +}; + +// Since copying a floating point number (especially if it is NaN) +// does not guarantee that bits are preserved, this class lets us +// store the type and use it as a float when necessary. +template +class FloatProxy { + public: + typedef typename FloatProxyTraits::uint_type uint_type; + + // Since this is to act similar to the normal floats, + // do not initialize the data by default. + FloatProxy() {} + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(T val) { data_ = BitwiseCast(val); } + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(uint_type val) { data_ = val; } + + // This is helpful to have and is guaranteed not to stomp bits. + FloatProxy operator-() const { + return static_cast(data_ ^ + (uint_type(0x1) << (sizeof(T) * 8 - 1))); + } + + // Returns the data as a floating point value. + T getAsFloat() const { return BitwiseCast(data_); } + + // Returns the raw data. + uint_type data() const { return data_; } + + // Returns true if the value represents any type of NaN. + bool isNan() { return FloatProxyTraits::isNan(getAsFloat()); } + // Returns true if the value represents any type of infinity. + bool isInfinity() { return FloatProxyTraits::isInfinity(getAsFloat()); } + + // Returns the maximum normal value. + static FloatProxy max() { + return FloatProxy(FloatProxyTraits::max()); + } + // Returns the lowest normal value. + static FloatProxy lowest() { + return FloatProxy(FloatProxyTraits::lowest()); + } + + private: + uint_type data_; +}; + +template +bool operator==(const FloatProxy& first, const FloatProxy& second) { + return first.data() == second.data(); +} + +// Reads a FloatProxy value as a normal float from a stream. +template +std::istream& operator>>(std::istream& is, FloatProxy& value) { + T float_val; + is >> float_val; + value = FloatProxy(float_val); + return is; +} + +// This is an example traits. It is not meant to be used in practice, but will +// be the default for any non-specialized type. +template +struct HexFloatTraits { + // Integer type that can store this hex-float. + typedef void uint_type; + // Signed integer type that can store this hex-float. + typedef void int_type; + // The numerical type that this HexFloat represents. + typedef void underlying_type; + // The type needed to construct the underlying type. + typedef void native_type; + // The number of bits that are actually relevant in the uint_type. + // This allows us to deal with, for example, 24-bit values in a 32-bit + // integer. + static const uint32_t num_used_bits = 0; + // Number of bits that represent the exponent. + static const uint32_t num_exponent_bits = 0; + // Number of bits that represent the fractional part. + static const uint32_t num_fraction_bits = 0; + // The bias of the exponent. (How much we need to subtract from the stored + // value to get the correct value.) + static const uint32_t exponent_bias = 0; +}; + +// Traits for IEEE float. +// 1 sign bit, 8 exponent bits, 23 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint32_t uint_type; + typedef int32_t int_type; + typedef FloatProxy underlying_type; + typedef float native_type; + static const uint_type num_used_bits = 32; + static const uint_type num_exponent_bits = 8; + static const uint_type num_fraction_bits = 23; + static const uint_type exponent_bias = 127; +}; + +// Traits for IEEE double. +// 1 sign bit, 11 exponent bits, 52 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint64_t uint_type; + typedef int64_t int_type; + typedef FloatProxy underlying_type; + typedef double native_type; + static const uint_type num_used_bits = 64; + static const uint_type num_exponent_bits = 11; + static const uint_type num_fraction_bits = 52; + static const uint_type exponent_bias = 1023; +}; + +// Traits for IEEE half. +// 1 sign bit, 5 exponent bits, 10 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint16_t uint_type; + typedef int16_t int_type; + typedef uint16_t underlying_type; + typedef uint16_t native_type; + static const uint_type num_used_bits = 16; + static const uint_type num_exponent_bits = 5; + static const uint_type num_fraction_bits = 10; + static const uint_type exponent_bias = 15; +}; + +enum round_direction { + kRoundToZero, + kRoundToNearestEven, + kRoundToPositiveInfinity, + kRoundToNegativeInfinity +}; + +// Template class that houses a floating pointer number. +// It exposes a number of constants based on the provided traits to +// assist in interpreting the bits of the value. +template > +class HexFloat { + public: + typedef typename Traits::uint_type uint_type; + typedef typename Traits::int_type int_type; + typedef typename Traits::underlying_type underlying_type; + typedef typename Traits::native_type native_type; + + explicit HexFloat(T f) : value_(f) {} + + T value() const { return value_; } + void set_value(T f) { value_ = f; } + + // These are all written like this because it is convenient to have + // compile-time constants for all of these values. + + // Pass-through values to save typing. + static const uint32_t num_used_bits = Traits::num_used_bits; + static const uint32_t exponent_bias = Traits::exponent_bias; + static const uint32_t num_exponent_bits = Traits::num_exponent_bits; + static const uint32_t num_fraction_bits = Traits::num_fraction_bits; + + // Number of bits to shift left to set the highest relevant bit. + static const uint32_t top_bit_left_shift = num_used_bits - 1; + // How many nibbles (hex characters) the fractional part takes up. + static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; + // If the fractional part does not fit evenly into a hex character (4-bits) + // then we have to left-shift to get rid of leading 0s. This is the amount + // we have to shift (might be 0). + static const uint32_t num_overflow_bits = + fraction_nibbles * 4 - num_fraction_bits; + + // The representation of the fraction, not the actual bits. This + // includes the leading bit that is usually implicit. + static const uint_type fraction_represent_mask = + spvutils::SetBits::get; + + // The topmost bit in the nibble-aligned fraction. + static const uint_type fraction_top_bit = + uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); + + // The least significant bit in the exponent, which is also the bit + // immediately to the left of the significand. + static const uint_type first_exponent_bit = uint_type(1) + << (num_fraction_bits); + + // The mask for the encoded fraction. It does not include the + // implicit bit. + static const uint_type fraction_encode_mask = + spvutils::SetBits::get; + + // The bit that is used as a sign. + static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; + + // The bits that represent the exponent. + static const uint_type exponent_mask = + spvutils::SetBits::get; + + // How far left the exponent is shifted. + static const uint32_t exponent_left_shift = num_fraction_bits; + + // How far from the right edge the fraction is shifted. + static const uint32_t fraction_right_shift = + static_cast(sizeof(uint_type) * 8) - num_fraction_bits; + + // The maximum representable unbiased exponent. + static const int_type max_exponent = + (exponent_mask >> num_fraction_bits) - exponent_bias; + // The minimum representable exponent for normalized numbers. + static const int_type min_exponent = -static_cast(exponent_bias); + + // Returns the bits associated with the value. + uint_type getBits() const { return spvutils::BitwiseCast(value_); } + + // Returns the bits associated with the value, without the leading sign bit. + uint_type getUnsignedBits() const { + return static_cast(spvutils::BitwiseCast(value_) & + ~sign_mask); + } + + // Returns the bits associated with the exponent, shifted to start at the + // lsb of the type. + const uint_type getExponentBits() const { + return static_cast((getBits() & exponent_mask) >> + num_fraction_bits); + } + + // Returns the exponent in unbiased form. This is the exponent in the + // human-friendly form. + const int_type getUnbiasedExponent() const { + return static_cast(getExponentBits() - exponent_bias); + } + + // Returns just the significand bits from the value. + const uint_type getSignificandBits() const { + return getBits() & fraction_encode_mask; + } + + // If the number was normalized, returns the unbiased exponent. + // If the number was denormal, normalize the exponent first. + const int_type getUnbiasedNormalizedExponent() const { + if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 + return 0; + } + int_type exp = getUnbiasedExponent(); + if (exp == min_exponent) { // We are in denorm land. + uint_type significand_bits = getSignificandBits(); + while ((significand_bits & (first_exponent_bit >> 1)) == 0) { + significand_bits = static_cast(significand_bits << 1); + exp = static_cast(exp - 1); + } + significand_bits &= fraction_encode_mask; + } + return exp; + } + + // Returns the signficand after it has been normalized. + const uint_type getNormalizedSignificand() const { + int_type unbiased_exponent = getUnbiasedNormalizedExponent(); + uint_type significand = getSignificandBits(); + for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { + significand = static_cast(significand << 1); + } + significand &= fraction_encode_mask; + return significand; + } + + // Returns true if this number represents a negative value. + bool isNegative() const { return (getBits() & sign_mask) != 0; } + + // Sets this HexFloat from the individual components. + // Note this assumes EVERY significand is normalized, and has an implicit + // leading one. This means that the only way that this method will set 0, + // is if you set a number so denormalized that it underflows. + // Do not use this method with raw bits extracted from a subnormal number, + // since subnormals do not have an implicit leading 1 in the significand. + // The significand is also expected to be in the + // lowest-most num_fraction_bits of the uint_type. + // The exponent is expected to be unbiased, meaning an exponent of + // 0 actually means 0. + // If underflow_round_up is set, then on underflow, if a number is non-0 + // and would underflow, we round up to the smallest denorm. + void setFromSignUnbiasedExponentAndNormalizedSignificand( + bool negative, int_type exponent, uint_type significand, + bool round_denorm_up) { + bool significand_is_zero = significand == 0; + + if (exponent <= min_exponent) { + // If this was denormalized, then we have to shift the bit on, meaning + // the significand is not zero. + significand_is_zero = false; + significand |= first_exponent_bit; + significand = static_cast(significand >> 1); + } + + while (exponent < min_exponent) { + significand = static_cast(significand >> 1); + ++exponent; + } + + if (exponent == min_exponent) { + if (significand == 0 && !significand_is_zero && round_denorm_up) { + significand = static_cast(0x1); + } + } + + uint_type new_value = 0; + if (negative) { + new_value = static_cast(new_value | sign_mask); + } + exponent = static_cast(exponent + exponent_bias); + assert(exponent >= 0); + + // put it all together + exponent = static_cast((exponent << exponent_left_shift) & + exponent_mask); + significand = static_cast(significand & fraction_encode_mask); + new_value = static_cast(new_value | (exponent | significand)); + value_ = BitwiseCast(new_value); + } + + // Increments the significand of this number by the given amount. + // If this would spill the significand into the implicit bit, + // carry is set to true and the significand is shifted to fit into + // the correct location, otherwise carry is set to false. + // All significands and to_increment are assumed to be within the bounds + // for a valid significand. + static uint_type incrementSignificand(uint_type significand, + uint_type to_increment, bool* carry) { + significand = static_cast(significand + to_increment); + *carry = false; + if (significand & first_exponent_bit) { + *carry = true; + // The implicit 1-bit will have carried, so we should zero-out the + // top bit and shift back. + significand = static_cast(significand & ~first_exponent_bit); + significand = static_cast(significand >> 1); + } + return significand; + } + + // These exist because MSVC throws warnings on negative right-shifts + // even if they are not going to be executed. Eg: + // constant_number < 0? 0: constant_number + // These convert the negative left-shifts into right shifts. + + template + uint_type negatable_left_shift(int_type N, uint_type val) + { + if(N >= 0) + return val << N; + + return val >> -N; + } + + template + uint_type negatable_right_shift(int_type N, uint_type val) + { + if(N >= 0) + return val >> N; + + return val << -N; + } + + // Returns the significand, rounded to fit in a significand in + // other_T. This is shifted so that the most significant + // bit of the rounded number lines up with the most significant bit + // of the returned significand. + template + typename other_T::uint_type getRoundedNormalizedSignificand( + round_direction dir, bool* carry_bit) { + typedef typename other_T::uint_type other_uint_type; + static const int_type num_throwaway_bits = + static_cast(num_fraction_bits) - + static_cast(other_T::num_fraction_bits); + + static const uint_type last_significant_bit = + (num_throwaway_bits < 0) + ? 0 + : negatable_left_shift(num_throwaway_bits, 1u); + static const uint_type first_rounded_bit = + (num_throwaway_bits < 1) + ? 0 + : negatable_left_shift(num_throwaway_bits - 1, 1u); + + static const uint_type throwaway_mask_bits = + num_throwaway_bits > 0 ? num_throwaway_bits : 0; + static const uint_type throwaway_mask = + spvutils::SetBits::get; + + *carry_bit = false; + other_uint_type out_val = 0; + uint_type significand = getNormalizedSignificand(); + // If we are up-casting, then we just have to shift to the right location. + if (num_throwaway_bits <= 0) { + out_val = static_cast(significand); + uint_type shift_amount = static_cast(-num_throwaway_bits); + out_val = static_cast(out_val << shift_amount); + return out_val; + } + + // If every non-representable bit is 0, then we don't have any casting to + // do. + if ((significand & throwaway_mask) == 0) { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + + bool round_away_from_zero = false; + // We actually have to narrow the significand here, so we have to follow the + // rounding rules. + switch (dir) { + case kRoundToZero: + break; + case kRoundToPositiveInfinity: + round_away_from_zero = !isNegative(); + break; + case kRoundToNegativeInfinity: + round_away_from_zero = isNegative(); + break; + case kRoundToNearestEven: + // Have to round down, round bit is 0 + if ((first_rounded_bit & significand) == 0) { + break; + } + if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { + // If any subsequent bit of the rounded portion is non-0 then we round + // up. + round_away_from_zero = true; + break; + } + // We are exactly half-way between 2 numbers, pick even. + if ((significand & last_significant_bit) != 0) { + // 1 for our last bit, round up. + round_away_from_zero = true; + break; + } + break; + } + + if (round_away_from_zero) { + return static_cast( + negatable_right_shift(num_throwaway_bits, incrementSignificand( + significand, last_significant_bit, carry_bit))); + } else { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + } + + // Casts this value to another HexFloat. If the cast is widening, + // then round_dir is ignored. If the cast is narrowing, then + // the result is rounded in the direction specified. + // This number will retain Nan and Inf values. + // It will also saturate to Inf if the number overflows, and + // underflow to (0 or min depending on rounding) if the number underflows. + template + void castTo(other_T& other, round_direction round_dir) { + other = other_T(static_cast(0)); + bool negate = isNegative(); + if (getUnsignedBits() == 0) { + if (negate) { + other.set_value(-other.value()); + } + return; + } + uint_type significand = getSignificandBits(); + bool carried = false; + typename other_T::uint_type rounded_significand = + getRoundedNormalizedSignificand(round_dir, &carried); + + int_type exponent = getUnbiasedExponent(); + if (exponent == min_exponent) { + // If we are denormal, normalize the exponent, so that we can encode + // easily. + exponent = static_cast(exponent + 1); + for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; + check_bit = static_cast(check_bit >> 1)) { + exponent = static_cast(exponent - 1); + if (check_bit & significand) break; + } + } + + bool is_nan = + (getBits() & exponent_mask) == exponent_mask && significand != 0; + bool is_inf = + !is_nan && + ((exponent + carried) > static_cast(other_T::exponent_bias) || + (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); + + // If we are Nan or Inf we should pass that through. + if (is_inf) { + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); + return; + } + if (is_nan) { + typename other_T::uint_type shifted_significand; + shifted_significand = static_cast( + negatable_left_shift( + static_cast(other_T::num_fraction_bits) - + static_cast(num_fraction_bits), significand)); + + // We are some sort of Nan. We try to keep the bit-pattern of the Nan + // as close as possible. If we had to shift off bits so we are 0, then we + // just set the last bit. + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | + (shifted_significand == 0 ? 0x1 : shifted_significand)))); + return; + } + + bool round_underflow_up = + isNegative() ? round_dir == kRoundToNegativeInfinity + : round_dir == kRoundToPositiveInfinity; + typedef typename other_T::int_type other_int_type; + // setFromSignUnbiasedExponentAndNormalizedSignificand will + // zero out any underflowing value (but retain the sign). + other.setFromSignUnbiasedExponentAndNormalizedSignificand( + negate, static_cast(exponent), rounded_significand, + round_underflow_up); + return; + } + + private: + T value_; + + static_assert(num_used_bits == + Traits::num_exponent_bits + Traits::num_fraction_bits + 1, + "The number of bits do not fit"); + static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); +}; + +// Returns 4 bits represented by the hex character. +inline uint8_t get_nibble_from_character(int character) { + const char* dec = "0123456789"; + const char* lower = "abcdef"; + const char* upper = "ABCDEF"; + const char* p = nullptr; + if ((p = strchr(dec, character))) { + return static_cast(p - dec); + } else if ((p = strchr(lower, character))) { + return static_cast(p - lower + 0xa); + } else if ((p = strchr(upper, character))) { + return static_cast(p - upper + 0xa); + } + + assert(false && "This was called with a non-hex character"); + return 0; +} + +// Outputs the given HexFloat to the stream. +template +std::ostream& operator<<(std::ostream& os, const HexFloat& value) { + typedef HexFloat HF; + typedef typename HF::uint_type uint_type; + typedef typename HF::int_type int_type; + + static_assert(HF::num_used_bits != 0, + "num_used_bits must be non-zero for a valid float"); + static_assert(HF::num_exponent_bits != 0, + "num_exponent_bits must be non-zero for a valid float"); + static_assert(HF::num_fraction_bits != 0, + "num_fractin_bits must be non-zero for a valid float"); + + const uint_type bits = spvutils::BitwiseCast(value.value()); + const char* const sign = (bits & HF::sign_mask) ? "-" : ""; + const uint_type exponent = static_cast( + (bits & HF::exponent_mask) >> HF::num_fraction_bits); + + uint_type fraction = static_cast((bits & HF::fraction_encode_mask) + << HF::num_overflow_bits); + + const bool is_zero = exponent == 0 && fraction == 0; + const bool is_denorm = exponent == 0 && !is_zero; + + // exponent contains the biased exponent we have to convert it back into + // the normal range. + int_type int_exponent = static_cast(exponent - HF::exponent_bias); + // If the number is all zeros, then we actually have to NOT shift the + // exponent. + int_exponent = is_zero ? 0 : int_exponent; + + // If we are denorm, then start shifting, and decreasing the exponent until + // our leading bit is 1. + + if (is_denorm) { + while ((fraction & HF::fraction_top_bit) == 0) { + fraction = static_cast(fraction << 1); + int_exponent = static_cast(int_exponent - 1); + } + // Since this is denormalized, we have to consume the leading 1 since it + // will end up being implicit. + fraction = static_cast(fraction << 1); // eat the leading 1 + fraction &= HF::fraction_represent_mask; + } + + uint_type fraction_nibbles = HF::fraction_nibbles; + // We do not have to display any trailing 0s, since this represents the + // fractional part. + while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { + // Shift off any trailing values; + fraction = static_cast(fraction >> 4); + --fraction_nibbles; + } + + const auto saved_flags = os.flags(); + const auto saved_fill = os.fill(); + + os << sign << "0x" << (is_zero ? '0' : '1'); + if (fraction_nibbles) { + // Make sure to keep the leading 0s in place, since this is the fractional + // part. + os << "." << std::setw(static_cast(fraction_nibbles)) + << std::setfill('0') << std::hex << fraction; + } + os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; + + os.flags(saved_flags); + os.fill(saved_fill); + + return os; +} + +// Returns true if negate_value is true and the next character on the +// input stream is a plus or minus sign. In that case we also set the fail bit +// on the stream and set the value to the zero value for its type. +template +inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, + HexFloat& value) { + if (negate_value) { + auto next_char = is.peek(); + if (next_char == '-' || next_char == '+') { + // Fail the parse. Emulate standard behaviour by setting the value to + // the zero value, and set the fail bit on the stream. + value = HexFloat(typename HexFloat::uint_type(0)); + is.setstate(std::ios_base::failbit); + return true; + } + } + return false; +} + +// Parses a floating point number from the given stream and stores it into the +// value parameter. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template +inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, + HexFloat& value) { + if (RejectParseDueToLeadingSign(is, negate_value, value)) { + return is; + } + T val; + is >> val; + if (negate_value) { + val = -val; + } + value.set_value(val); + // In the failure case, map -0.0 to 0.0. + if (is.fail() && value.getUnsignedBits() == 0u) { + value = HexFloat(typename HexFloat::uint_type(0)); + } + if (val.isInfinity()) { + // Fail the parse. Emulate standard behaviour by setting the value to + // the closest normal value, and set the fail bit on the stream. + value.set_value((value.isNegative() || negate_value) ? T::lowest() + : T::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Specialization of ParseNormalFloat for FloatProxy values. +// This will parse the float as it were a 32-bit floating point number, +// and then round it down to fit into a Float16 value. +// The number is rounded towards zero. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template <> +inline std::istream& +ParseNormalFloat, HexFloatTraits>>( + std::istream& is, bool negate_value, + HexFloat, HexFloatTraits>>& value) { + // First parse as a 32-bit float. + HexFloat> float_val(0.0f); + ParseNormalFloat(is, negate_value, float_val); + + // Then convert to 16-bit float, saturating at infinities, and + // rounding toward zero. + float_val.castTo(value, kRoundToZero); + + // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the + // fail bit and set the lowest or highest value. + if (Float16::isInfinity(value.value().getAsFloat())) { + value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Reads a HexFloat from the given stream. +// If the float is not encoded as a hex-float then it will be parsed +// as a regular float. +// This may fail if your stream does not support at least one unget. +// Nan values can be encoded with "0x1.p+exponent_bias". +// This would normally overflow a float and round to +// infinity but this special pattern is the exact representation for a NaN, +// and therefore is actually encoded as the correct NaN. To encode inf, +// either 0x0p+exponent_bias can be specified or any exponent greater than +// exponent_bias. +// Examples using IEEE 32-bit float encoding. +// 0x1.0p+128 (+inf) +// -0x1.0p-128 (-inf) +// +// 0x1.1p+128 (+Nan) +// -0x1.1p+128 (-Nan) +// +// 0x1p+129 (+inf) +// -0x1p+129 (-inf) +template +std::istream& operator>>(std::istream& is, HexFloat& value) { + using HF = HexFloat; + using uint_type = typename HF::uint_type; + using int_type = typename HF::int_type; + + value.set_value(static_cast(0.f)); + + if (is.flags() & std::ios::skipws) { + // If the user wants to skip whitespace , then we should obey that. + while (std::isspace(is.peek())) { + is.get(); + } + } + + auto next_char = is.peek(); + bool negate_value = false; + + if (next_char != '-' && next_char != '0') { + return ParseNormalFloat(is, negate_value, value); + } + + if (next_char == '-') { + negate_value = true; + is.get(); + next_char = is.peek(); + } + + if (next_char == '0') { + is.get(); // We may have to unget this. + auto maybe_hex_start = is.peek(); + if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { + is.unget(); + return ParseNormalFloat(is, negate_value, value); + } else { + is.get(); // Throw away the 'x'; + } + } else { + return ParseNormalFloat(is, negate_value, value); + } + + // This "looks" like a hex-float so treat it as one. + bool seen_p = false; + bool seen_dot = false; + uint_type fraction_index = 0; + + uint_type fraction = 0; + int_type exponent = HF::exponent_bias; + + // Strip off leading zeros so we don't have to special-case them later. + while ((next_char = is.peek()) == '0') { + is.get(); + } + + bool is_denorm = + true; // Assume denorm "representation" until we hear otherwise. + // NB: This does not mean the value is actually denorm, + // it just means that it was written 0. + bool bits_written = false; // Stays false until we write a bit. + while (!seen_p && !seen_dot) { + // Handle characters that are left of the fractional part. + if (next_char == '.') { + seen_dot = true; + } else if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + // We know this is not denormalized since we have stripped all leading + // zeroes and we are not a ".". + is_denorm = false; + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; + if (bits_written) { + // If we are here the bits represented belong in the fractional + // part of the float, and we have to adjust the exponent accordingly. + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + exponent = static_cast(exponent + 1); + } + bits_written |= write_bit != 0; + } + } else { + // We have not found our exponent yet, so we have to fail. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + bits_written = false; + while (seen_dot && !seen_p) { + // Handle only fractional parts now. + if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; + bits_written |= write_bit != 0; + if (is_denorm && !bits_written) { + // Handle modifying the exponent here this way we can handle + // an arbitrary number of hex values without overflowing our + // integer. + exponent = static_cast(exponent - 1); + } else { + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + } + } + } else { + // We still have not found our 'p' exponent yet, so this is not a valid + // hex-float. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + + bool seen_sign = false; + int8_t exponent_sign = 1; + int_type written_exponent = 0; + while (true) { + if ((next_char == '-' || next_char == '+')) { + if (seen_sign) { + is.setstate(std::ios::failbit); + return is; + } + seen_sign = true; + exponent_sign = (next_char == '-') ? -1 : 1; + } else if (::isdigit(next_char)) { + // Hex-floats express their exponent as decimal. + written_exponent = static_cast(written_exponent * 10); + written_exponent = + static_cast(written_exponent + (next_char - '0')); + } else { + break; + } + is.get(); + next_char = is.peek(); + } + + written_exponent = static_cast(written_exponent * exponent_sign); + exponent = static_cast(exponent + written_exponent); + + bool is_zero = is_denorm && (fraction == 0); + if (is_denorm && !is_zero) { + fraction = static_cast(fraction << 1); + exponent = static_cast(exponent - 1); + } else if (is_zero) { + exponent = 0; + } + + if (exponent <= 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + fraction |= static_cast(1) << HF::top_bit_left_shift; + } + + fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; + + const int_type max_exponent = + SetBits::get; + + // Handle actual denorm numbers + while (exponent < 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + exponent = static_cast(exponent + 1); + + fraction &= HF::fraction_encode_mask; + if (fraction == 0) { + // We have underflowed our fraction. We should clamp to zero. + is_zero = true; + exponent = 0; + } + } + + // We have overflowed so we should be inf/-inf. + if (exponent > max_exponent) { + exponent = max_exponent; + fraction = 0; + } + + uint_type output_bits = static_cast( + static_cast(negate_value ? 1 : 0) << HF::top_bit_left_shift); + output_bits |= fraction; + + uint_type shifted_exponent = static_cast( + static_cast(exponent << HF::exponent_left_shift) & + HF::exponent_mask); + output_bits |= shifted_exponent; + + T output_float = spvutils::BitwiseCast(output_bits); + value.set_value(output_float); + + return is; +} + +// Writes a FloatProxy value to a stream. +// Zero and normal numbers are printed in the usual notation, but with +// enough digits to fully reproduce the value. Other values (subnormal, +// NaN, and infinity) are printed as a hex float. +template +std::ostream& operator<<(std::ostream& os, const FloatProxy& value) { + auto float_val = value.getAsFloat(); + switch (std::fpclassify(float_val)) { + case FP_ZERO: + case FP_NORMAL: { + auto saved_precision = os.precision(); + os.precision(std::numeric_limits::digits10); + os << float_val; + os.precision(saved_precision); + } break; + default: + os << HexFloat>(value); + break; + } + return os; +} + +template <> +inline std::ostream& operator<<(std::ostream& os, + const FloatProxy& value) { + os << HexFloat>(value); + return os; +} +} + +#endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/spirv.hpp b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/spirv.hpp new file mode 100644 index 0000000..f85469d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/spirv.hpp @@ -0,0 +1,2533 @@ +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010600; +static const unsigned int Revision = 1; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, + SourceLanguageHLSL = 5, + SourceLanguageCPP_for_OpenCL = 6, + SourceLanguageMax = 0x7fffffff, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, + ExecutionModelTaskNV = 5267, + ExecutionModelMeshNV = 5268, + ExecutionModelRayGenerationKHR = 5313, + ExecutionModelRayGenerationNV = 5313, + ExecutionModelIntersectionKHR = 5314, + ExecutionModelIntersectionNV = 5314, + ExecutionModelAnyHitKHR = 5315, + ExecutionModelAnyHitNV = 5315, + ExecutionModelClosestHitKHR = 5316, + ExecutionModelClosestHitNV = 5316, + ExecutionModelMissKHR = 5317, + ExecutionModelMissNV = 5317, + ExecutionModelCallableKHR = 5318, + ExecutionModelCallableNV = 5318, + ExecutionModelTaskEXT = 5364, + ExecutionModelMeshEXT = 5365, + ExecutionModelMax = 0x7fffffff, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, + AddressingModelPhysicalStorageBuffer64EXT = 5348, + AddressingModelMax = 0x7fffffff, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, + MemoryModelVulkanKHR = 3, + MemoryModelMax = 0x7fffffff, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, + ExecutionModeInitializer = 33, + ExecutionModeFinalizer = 34, + ExecutionModeSubgroupSize = 35, + ExecutionModeSubgroupsPerWorkgroup = 36, + ExecutionModeSubgroupsPerWorkgroupId = 37, + ExecutionModeLocalSizeId = 38, + ExecutionModeLocalSizeHintId = 39, + ExecutionModeSubgroupUniformControlFlowKHR = 4421, + ExecutionModePostDepthCoverage = 4446, + ExecutionModeDenormPreserve = 4459, + ExecutionModeDenormFlushToZero = 4460, + ExecutionModeSignedZeroInfNanPreserve = 4461, + ExecutionModeRoundingModeRTE = 4462, + ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, + ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeStencilRefUnchangedFrontAMD = 5079, + ExecutionModeStencilRefGreaterFrontAMD = 5080, + ExecutionModeStencilRefLessFrontAMD = 5081, + ExecutionModeStencilRefUnchangedBackAMD = 5082, + ExecutionModeStencilRefGreaterBackAMD = 5083, + ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeOutputLinesEXT = 5269, + ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesEXT = 5270, + ExecutionModeOutputPrimitivesNV = 5270, + ExecutionModeDerivativeGroupQuadsNV = 5289, + ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesEXT = 5298, + ExecutionModeOutputTrianglesNV = 5298, + ExecutionModePixelInterlockOrderedEXT = 5366, + ExecutionModePixelInterlockUnorderedEXT = 5367, + ExecutionModeSampleInterlockOrderedEXT = 5368, + ExecutionModeSampleInterlockUnorderedEXT = 5369, + ExecutionModeShadingRateInterlockOrderedEXT = 5370, + ExecutionModeShadingRateInterlockUnorderedEXT = 5371, + ExecutionModeSharedLocalMemorySizeINTEL = 5618, + ExecutionModeRoundingModeRTPINTEL = 5620, + ExecutionModeRoundingModeRTNINTEL = 5621, + ExecutionModeFloatingPointModeALTINTEL = 5622, + ExecutionModeFloatingPointModeIEEEINTEL = 5623, + ExecutionModeMaxWorkgroupSizeINTEL = 5893, + ExecutionModeMaxWorkDimINTEL = 5894, + ExecutionModeNoGlobalOffsetINTEL = 5895, + ExecutionModeNumSIMDWorkitemsINTEL = 5896, + ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeMax = 0x7fffffff, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, + StorageClassStorageBuffer = 12, + StorageClassCallableDataKHR = 5328, + StorageClassCallableDataNV = 5328, + StorageClassIncomingCallableDataKHR = 5329, + StorageClassIncomingCallableDataNV = 5329, + StorageClassRayPayloadKHR = 5338, + StorageClassRayPayloadNV = 5338, + StorageClassHitAttributeKHR = 5339, + StorageClassHitAttributeNV = 5339, + StorageClassIncomingRayPayloadKHR = 5342, + StorageClassIncomingRayPayloadNV = 5342, + StorageClassShaderRecordBufferKHR = 5343, + StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, + StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassTaskPayloadWorkgroupEXT = 5402, + StorageClassCodeSectionINTEL = 5605, + StorageClassDeviceOnlyINTEL = 5936, + StorageClassHostOnlyINTEL = 5937, + StorageClassMax = 0x7fffffff, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, + DimMax = 0x7fffffff, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, + SamplerAddressingModeMax = 0x7fffffff, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, + SamplerFilterModeMax = 0x7fffffff, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, + ImageFormatR64ui = 40, + ImageFormatR64i = 41, + ImageFormatMax = 0x7fffffff, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, + ImageChannelOrderABGR = 19, + ImageChannelOrderMax = 0x7fffffff, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeMax = 0x7fffffff, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, + ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, + ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, + ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, + ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsSignExtendShift = 12, + ImageOperandsZeroExtendShift = 13, + ImageOperandsNontemporalShift = 14, + ImageOperandsOffsetsShift = 16, + ImageOperandsMax = 0x7fffffff, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, + ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, + ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, + ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, + ImageOperandsVolatileTexelKHRMask = 0x00000800, + ImageOperandsSignExtendMask = 0x00001000, + ImageOperandsZeroExtendMask = 0x00002000, + ImageOperandsNontemporalMask = 0x00004000, + ImageOperandsOffsetsMask = 0x00010000, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, + FPFastMathModeAllowContractFastINTELShift = 16, + FPFastMathModeAllowReassocINTELShift = 17, + FPFastMathModeMax = 0x7fffffff, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, + FPFastMathModeAllowContractFastINTELMask = 0x00010000, + FPFastMathModeAllowReassocINTELMask = 0x00020000, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, + FPRoundingModeMax = 0x7fffffff, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, + LinkageTypeLinkOnceODR = 2, + LinkageTypeMax = 0x7fffffff, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, + AccessQualifierMax = 0x7fffffff, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeMax = 0x7fffffff, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationUniformId = 27, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, + DecorationMaxByteOffset = 45, + DecorationAlignmentId = 46, + DecorationMaxByteOffsetId = 47, + DecorationNoSignedWrap = 4469, + DecorationNoUnsignedWrap = 4470, + DecorationExplicitInterpAMD = 4999, + DecorationOverrideCoverageNV = 5248, + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveEXT = 5271, + DecorationPerPrimitiveNV = 5271, + DecorationPerViewNV = 5272, + DecorationPerTaskNV = 5273, + DecorationPerVertexKHR = 5285, + DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, + DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, + DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, + DecorationAliasedPointerEXT = 5356, + DecorationBindlessSamplerNV = 5398, + DecorationBindlessImageNV = 5399, + DecorationBoundSamplerNV = 5400, + DecorationBoundImageNV = 5401, + DecorationSIMTCallINTEL = 5599, + DecorationReferencedIndirectlyINTEL = 5602, + DecorationClobberINTEL = 5607, + DecorationSideEffectsINTEL = 5608, + DecorationVectorComputeVariableINTEL = 5624, + DecorationFuncParamIOKindINTEL = 5625, + DecorationVectorComputeFunctionINTEL = 5626, + DecorationStackCallINTEL = 5627, + DecorationGlobalVariableOffsetINTEL = 5628, + DecorationCounterBuffer = 5634, + DecorationHlslCounterBufferGOOGLE = 5634, + DecorationHlslSemanticGOOGLE = 5635, + DecorationUserSemantic = 5635, + DecorationUserTypeGOOGLE = 5636, + DecorationFunctionRoundingModeINTEL = 5822, + DecorationFunctionDenormModeINTEL = 5823, + DecorationRegisterINTEL = 5825, + DecorationMemoryINTEL = 5826, + DecorationNumbanksINTEL = 5827, + DecorationBankwidthINTEL = 5828, + DecorationMaxPrivateCopiesINTEL = 5829, + DecorationSinglepumpINTEL = 5830, + DecorationDoublepumpINTEL = 5831, + DecorationMaxReplicatesINTEL = 5832, + DecorationSimpleDualPortINTEL = 5833, + DecorationMergeINTEL = 5834, + DecorationBankBitsINTEL = 5835, + DecorationForcePow2DepthINTEL = 5836, + DecorationBurstCoalesceINTEL = 5899, + DecorationCacheSizeINTEL = 5900, + DecorationDontStaticallyCoalesceINTEL = 5901, + DecorationPrefetchINTEL = 5902, + DecorationStallEnableINTEL = 5905, + DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationBufferLocationINTEL = 5921, + DecorationIOPipeStorageINTEL = 5944, + DecorationFunctionFloatingPointModeINTEL = 6080, + DecorationSingleElementVectorINTEL = 6085, + DecorationVectorComputeCallableFunctionINTEL = 6087, + DecorationMediaBlockIOINTEL = 6140, + DecorationMax = 0x7fffffff, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, + BuiltInSubgroupEqMask = 4416, + BuiltInSubgroupEqMaskKHR = 4416, + BuiltInSubgroupGeMask = 4417, + BuiltInSubgroupGeMaskKHR = 4417, + BuiltInSubgroupGtMask = 4418, + BuiltInSubgroupGtMaskKHR = 4418, + BuiltInSubgroupLeMask = 4419, + BuiltInSubgroupLeMaskKHR = 4419, + BuiltInSubgroupLtMask = 4420, + BuiltInSubgroupLtMaskKHR = 4420, + BuiltInBaseVertex = 4424, + BuiltInBaseInstance = 4425, + BuiltInDrawIndex = 4426, + BuiltInPrimitiveShadingRateKHR = 4432, + BuiltInDeviceIndex = 4438, + BuiltInViewIndex = 4440, + BuiltInShadingRateKHR = 4444, + BuiltInBaryCoordNoPerspAMD = 4992, + BuiltInBaryCoordNoPerspCentroidAMD = 4993, + BuiltInBaryCoordNoPerspSampleAMD = 4994, + BuiltInBaryCoordSmoothAMD = 4995, + BuiltInBaryCoordSmoothCentroidAMD = 4996, + BuiltInBaryCoordSmoothSampleAMD = 4997, + BuiltInBaryCoordPullModelAMD = 4998, + BuiltInFragStencilRefEXT = 5014, + BuiltInViewportMaskNV = 5253, + BuiltInSecondaryPositionNV = 5257, + BuiltInSecondaryViewportMaskNV = 5258, + BuiltInPositionPerViewNV = 5261, + BuiltInViewportMaskPerViewNV = 5262, + BuiltInFullyCoveredEXT = 5264, + BuiltInTaskCountNV = 5274, + BuiltInPrimitiveCountNV = 5275, + BuiltInPrimitiveIndicesNV = 5276, + BuiltInClipDistancePerViewNV = 5277, + BuiltInCullDistancePerViewNV = 5278, + BuiltInLayerPerViewNV = 5279, + BuiltInMeshViewCountNV = 5280, + BuiltInMeshViewIndicesNV = 5281, + BuiltInBaryCoordKHR = 5286, + BuiltInBaryCoordNV = 5286, + BuiltInBaryCoordNoPerspKHR = 5287, + BuiltInBaryCoordNoPerspNV = 5287, + BuiltInFragSizeEXT = 5292, + BuiltInFragmentSizeNV = 5292, + BuiltInFragInvocationCountEXT = 5293, + BuiltInInvocationsPerPixelNV = 5293, + BuiltInPrimitivePointIndicesEXT = 5294, + BuiltInPrimitiveLineIndicesEXT = 5295, + BuiltInPrimitiveTriangleIndicesEXT = 5296, + BuiltInCullPrimitiveEXT = 5299, + BuiltInLaunchIdKHR = 5319, + BuiltInLaunchIdNV = 5319, + BuiltInLaunchSizeKHR = 5320, + BuiltInLaunchSizeNV = 5320, + BuiltInWorldRayOriginKHR = 5321, + BuiltInWorldRayOriginNV = 5321, + BuiltInWorldRayDirectionKHR = 5322, + BuiltInWorldRayDirectionNV = 5322, + BuiltInObjectRayOriginKHR = 5323, + BuiltInObjectRayOriginNV = 5323, + BuiltInObjectRayDirectionKHR = 5324, + BuiltInObjectRayDirectionNV = 5324, + BuiltInRayTminKHR = 5325, + BuiltInRayTminNV = 5325, + BuiltInRayTmaxKHR = 5326, + BuiltInRayTmaxNV = 5326, + BuiltInInstanceCustomIndexKHR = 5327, + BuiltInInstanceCustomIndexNV = 5327, + BuiltInObjectToWorldKHR = 5330, + BuiltInObjectToWorldNV = 5330, + BuiltInWorldToObjectKHR = 5331, + BuiltInWorldToObjectNV = 5331, + BuiltInHitTNV = 5332, + BuiltInHitKindKHR = 5333, + BuiltInHitKindNV = 5333, + BuiltInCurrentRayTimeNV = 5334, + BuiltInIncomingRayFlagsKHR = 5351, + BuiltInIncomingRayFlagsNV = 5351, + BuiltInRayGeometryIndexKHR = 5352, + BuiltInWarpsPerSMNV = 5374, + BuiltInSMCountNV = 5375, + BuiltInWarpIDNV = 5376, + BuiltInSMIDNV = 5377, + BuiltInCullMaskKHR = 6021, + BuiltInMax = 0x7fffffff, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, + SelectionControlMax = 0x7fffffff, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, + LoopControlDependencyInfiniteShift = 2, + LoopControlDependencyLengthShift = 3, + LoopControlMinIterationsShift = 4, + LoopControlMaxIterationsShift = 5, + LoopControlIterationMultipleShift = 6, + LoopControlPeelCountShift = 7, + LoopControlPartialCountShift = 8, + LoopControlInitiationIntervalINTELShift = 16, + LoopControlMaxConcurrencyINTELShift = 17, + LoopControlDependencyArrayINTELShift = 18, + LoopControlPipelineEnableINTELShift = 19, + LoopControlLoopCoalesceINTELShift = 20, + LoopControlMaxInterleavingINTELShift = 21, + LoopControlSpeculatedIterationsINTELShift = 22, + LoopControlNoFusionINTELShift = 23, + LoopControlMax = 0x7fffffff, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, + LoopControlDependencyInfiniteMask = 0x00000004, + LoopControlDependencyLengthMask = 0x00000008, + LoopControlMinIterationsMask = 0x00000010, + LoopControlMaxIterationsMask = 0x00000020, + LoopControlIterationMultipleMask = 0x00000040, + LoopControlPeelCountMask = 0x00000080, + LoopControlPartialCountMask = 0x00000100, + LoopControlInitiationIntervalINTELMask = 0x00010000, + LoopControlMaxConcurrencyINTELMask = 0x00020000, + LoopControlDependencyArrayINTELMask = 0x00040000, + LoopControlPipelineEnableINTELMask = 0x00080000, + LoopControlLoopCoalesceINTELMask = 0x00100000, + LoopControlMaxInterleavingINTELMask = 0x00200000, + LoopControlSpeculatedIterationsINTELMask = 0x00400000, + LoopControlNoFusionINTELMask = 0x00800000, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, + FunctionControlOptNoneINTELShift = 16, + FunctionControlMax = 0x7fffffff, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, + FunctionControlOptNoneINTELMask = 0x00010000, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, + MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, + MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, + MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsVolatileShift = 15, + MemorySemanticsMax = 0x7fffffff, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, + MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, + MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, + MemorySemanticsMakeVisibleKHRMask = 0x00004000, + MemorySemanticsVolatileMask = 0x00008000, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, + MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, + MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, + MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessMax = 0x7fffffff, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, + MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, + MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, + MemoryAccessNonPrivatePointerKHRMask = 0x00000020, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, + ScopeQueueFamily = 5, + ScopeQueueFamilyKHR = 5, + ScopeShaderCallKHR = 6, + ScopeMax = 0x7fffffff, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, + GroupOperationClusteredReduce = 3, + GroupOperationPartitionedReduceNV = 6, + GroupOperationPartitionedInclusiveScanNV = 7, + GroupOperationPartitionedExclusiveScanNV = 8, + GroupOperationMax = 0x7fffffff, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, + KernelEnqueueFlagsMax = 0x7fffffff, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, + KernelProfilingInfoMax = 0x7fffffff, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, + CapabilitySubgroupDispatch = 58, + CapabilityNamedBarrier = 59, + CapabilityPipeStorage = 60, + CapabilityGroupNonUniform = 61, + CapabilityGroupNonUniformVote = 62, + CapabilityGroupNonUniformArithmetic = 63, + CapabilityGroupNonUniformBallot = 64, + CapabilityGroupNonUniformShuffle = 65, + CapabilityGroupNonUniformShuffleRelative = 66, + CapabilityGroupNonUniformClustered = 67, + CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, + CapabilityUniformDecoration = 71, + CapabilityFragmentShadingRateKHR = 4422, + CapabilitySubgroupBallotKHR = 4423, + CapabilityDrawParameters = 4427, + CapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, + CapabilitySubgroupVoteKHR = 4431, + CapabilityStorageBuffer16BitAccess = 4433, + CapabilityStorageUniformBufferBlock16 = 4433, + CapabilityStorageUniform16 = 4434, + CapabilityUniformAndStorageBuffer16BitAccess = 4434, + CapabilityStoragePushConstant16 = 4435, + CapabilityStorageInputOutput16 = 4436, + CapabilityDeviceGroup = 4437, + CapabilityMultiView = 4439, + CapabilityVariablePointersStorageBuffer = 4441, + CapabilityVariablePointers = 4442, + CapabilityAtomicStorageOps = 4445, + CapabilitySampleMaskPostDepthCoverage = 4447, + CapabilityStorageBuffer8BitAccess = 4448, + CapabilityUniformAndStorageBuffer8BitAccess = 4449, + CapabilityStoragePushConstant8 = 4450, + CapabilityDenormPreserve = 4464, + CapabilityDenormFlushToZero = 4465, + CapabilitySignedZeroInfNanPreserve = 4466, + CapabilityRoundingModeRTE = 4467, + CapabilityRoundingModeRTZ = 4468, + CapabilityRayQueryProvisionalKHR = 4471, + CapabilityRayQueryKHR = 4472, + CapabilityRayTraversalPrimitiveCullingKHR = 4478, + CapabilityRayTracingKHR = 4479, + CapabilityFloat16ImageAMD = 5008, + CapabilityImageGatherBiasLodAMD = 5009, + CapabilityFragmentMaskAMD = 5010, + CapabilityStencilExportEXT = 5013, + CapabilityImageReadWriteLodAMD = 5015, + CapabilityInt64ImageEXT = 5016, + CapabilityShaderClockKHR = 5055, + CapabilitySampleMaskOverrideCoverageNV = 5249, + CapabilityGeometryShaderPassthroughNV = 5251, + CapabilityShaderViewportIndexLayerEXT = 5254, + CapabilityShaderViewportIndexLayerNV = 5254, + CapabilityShaderViewportMaskNV = 5255, + CapabilityShaderStereoViewNV = 5259, + CapabilityPerViewAttributesNV = 5260, + CapabilityFragmentFullyCoveredEXT = 5265, + CapabilityMeshShadingNV = 5266, + CapabilityImageFootprintNV = 5282, + CapabilityMeshShadingEXT = 5283, + CapabilityFragmentBarycentricKHR = 5284, + CapabilityFragmentBarycentricNV = 5284, + CapabilityComputeDerivativeGroupQuadsNV = 5288, + CapabilityFragmentDensityEXT = 5291, + CapabilityShadingRateNV = 5291, + CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, + CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, + CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, + CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, + CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, + CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, + CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, + CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, + CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + CapabilityRayTracingNV = 5340, + CapabilityRayTracingMotionBlurNV = 5341, + CapabilityVulkanMemoryModel = 5345, + CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, + CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, + CapabilityPhysicalStorageBufferAddressesEXT = 5347, + CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilityRayTracingProvisionalKHR = 5353, + CapabilityCooperativeMatrixNV = 5357, + CapabilityFragmentShaderSampleInterlockEXT = 5363, + CapabilityFragmentShaderShadingRateInterlockEXT = 5372, + CapabilityShaderSMBuiltinsNV = 5373, + CapabilityFragmentShaderPixelInterlockEXT = 5378, + CapabilityDemoteToHelperInvocation = 5379, + CapabilityDemoteToHelperInvocationEXT = 5379, + CapabilityBindlessTextureNV = 5390, + CapabilitySubgroupShuffleINTEL = 5568, + CapabilitySubgroupBufferBlockIOINTEL = 5569, + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityRoundToInfinityINTEL = 5582, + CapabilityFloatingPointModeINTEL = 5583, + CapabilityIntegerFunctions2INTEL = 5584, + CapabilityFunctionPointersINTEL = 5603, + CapabilityIndirectReferencesINTEL = 5604, + CapabilityAsmINTEL = 5606, + CapabilityAtomicFloat32MinMaxEXT = 5612, + CapabilityAtomicFloat64MinMaxEXT = 5613, + CapabilityAtomicFloat16MinMaxEXT = 5616, + CapabilityVectorComputeINTEL = 5617, + CapabilityVectorAnyINTEL = 5619, + CapabilityExpectAssumeKHR = 5629, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + CapabilityVariableLengthArrayINTEL = 5817, + CapabilityFunctionFloatControlINTEL = 5821, + CapabilityFPGAMemoryAttributesINTEL = 5824, + CapabilityFPFastMathModeINTEL = 5837, + CapabilityArbitraryPrecisionIntegersINTEL = 5844, + CapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + CapabilityUnstructuredLoopControlsINTEL = 5886, + CapabilityFPGALoopControlsINTEL = 5888, + CapabilityKernelAttributesINTEL = 5892, + CapabilityFPGAKernelAttributesINTEL = 5897, + CapabilityFPGAMemoryAccessesINTEL = 5898, + CapabilityFPGAClusterAttributesINTEL = 5904, + CapabilityLoopFuseINTEL = 5906, + CapabilityFPGABufferLocationINTEL = 5920, + CapabilityArbitraryPrecisionFixedPointINTEL = 5922, + CapabilityUSMStorageClassesINTEL = 5935, + CapabilityIOPipesINTEL = 5943, + CapabilityBlockingPipesINTEL = 5945, + CapabilityFPGARegINTEL = 5948, + CapabilityDotProductInputAll = 6016, + CapabilityDotProductInputAllKHR = 6016, + CapabilityDotProductInput4x8Bit = 6017, + CapabilityDotProductInput4x8BitKHR = 6017, + CapabilityDotProductInput4x8BitPacked = 6018, + CapabilityDotProductInput4x8BitPackedKHR = 6018, + CapabilityDotProduct = 6019, + CapabilityDotProductKHR = 6019, + CapabilityRayCullMaskKHR = 6020, + CapabilityBitInstructions = 6025, + CapabilityAtomicFloat32AddEXT = 6033, + CapabilityAtomicFloat64AddEXT = 6034, + CapabilityLongConstantCompositeINTEL = 6089, + CapabilityOptNoneINTEL = 6094, + CapabilityAtomicFloat16AddEXT = 6095, + CapabilityDebugInfoModuleINTEL = 6114, + CapabilityMax = 0x7fffffff, +}; + +enum RayFlagsShift { + RayFlagsOpaqueKHRShift = 0, + RayFlagsNoOpaqueKHRShift = 1, + RayFlagsTerminateOnFirstHitKHRShift = 2, + RayFlagsSkipClosestHitShaderKHRShift = 3, + RayFlagsCullBackFacingTrianglesKHRShift = 4, + RayFlagsCullFrontFacingTrianglesKHRShift = 5, + RayFlagsCullOpaqueKHRShift = 6, + RayFlagsCullNoOpaqueKHRShift = 7, + RayFlagsSkipTrianglesKHRShift = 8, + RayFlagsSkipAABBsKHRShift = 9, + RayFlagsMax = 0x7fffffff, +}; + +enum RayFlagsMask { + RayFlagsMaskNone = 0, + RayFlagsOpaqueKHRMask = 0x00000001, + RayFlagsNoOpaqueKHRMask = 0x00000002, + RayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + RayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + RayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + RayFlagsCullOpaqueKHRMask = 0x00000040, + RayFlagsCullNoOpaqueKHRMask = 0x00000080, + RayFlagsSkipTrianglesKHRMask = 0x00000100, + RayFlagsSkipAABBsKHRMask = 0x00000200, +}; + +enum RayQueryIntersection { + RayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + RayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + RayQueryIntersectionMax = 0x7fffffff, +}; + +enum RayQueryCommittedIntersectionType { + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + RayQueryCommittedIntersectionTypeMax = 0x7fffffff, +}; + +enum RayQueryCandidateIntersectionType { + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + RayQueryCandidateIntersectionTypeMax = 0x7fffffff, +}; + +enum FragmentShadingRateShift { + FragmentShadingRateVertical2PixelsShift = 0, + FragmentShadingRateVertical4PixelsShift = 1, + FragmentShadingRateHorizontal2PixelsShift = 2, + FragmentShadingRateHorizontal4PixelsShift = 3, + FragmentShadingRateMax = 0x7fffffff, +}; + +enum FragmentShadingRateMask { + FragmentShadingRateMaskNone = 0, + FragmentShadingRateVertical2PixelsMask = 0x00000001, + FragmentShadingRateVertical4PixelsMask = 0x00000002, + FragmentShadingRateHorizontal2PixelsMask = 0x00000004, + FragmentShadingRateHorizontal4PixelsMask = 0x00000008, +}; + +enum FPDenormMode { + FPDenormModePreserve = 0, + FPDenormModeFlushToZero = 1, + FPDenormModeMax = 0x7fffffff, +}; + +enum FPOperationMode { + FPOperationModeIEEE = 0, + FPOperationModeALT = 1, + FPOperationModeMax = 0x7fffffff, +}; + +enum QuantizationModes { + QuantizationModesTRN = 0, + QuantizationModesTRN_ZERO = 1, + QuantizationModesRND = 2, + QuantizationModesRND_ZERO = 3, + QuantizationModesRND_INF = 4, + QuantizationModesRND_MIN_INF = 5, + QuantizationModesRND_CONV = 6, + QuantizationModesRND_CONV_ODD = 7, + QuantizationModesMax = 0x7fffffff, +}; + +enum OverflowModes { + OverflowModesWRAP = 0, + OverflowModesSAT = 1, + OverflowModesSAT_ZERO = 2, + OverflowModesSAT_SYM = 3, + OverflowModesMax = 0x7fffffff, +}; + +enum PackedVectorFormat { + PackedVectorFormatPackedVectorFormat4x8Bit = 0, + PackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + PackedVectorFormatMax = 0x7fffffff, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpTerminateInvocation = 4416, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpTraceRayKHR = 4445, + OpExecuteCallableKHR = 4446, + OpConvertUToAccelerationStructureKHR = 4447, + OpIgnoreIntersectionKHR = 4448, + OpTerminateRayKHR = 4449, + OpSDot = 4450, + OpSDotKHR = 4450, + OpUDot = 4451, + OpUDotKHR = 4451, + OpSUDot = 4452, + OpSUDotKHR = 4452, + OpSDotAccSat = 4453, + OpSDotAccSatKHR = 4453, + OpUDotAccSat = 4454, + OpUDotAccSatKHR = 4454, + OpSUDotAccSat = 4455, + OpSUDotAccSatKHR = 4455, + OpTypeRayQueryKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, + OpImageSampleFootprintNV = 5283, + OpEmitMeshTasksEXT = 5294, + OpSetMeshOutputsEXT = 5295, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTraceMotionNV = 5338, + OpTraceRayMotionNV = 5339, + OpTypeAccelerationStructureKHR = 5341, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocation = 5380, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpConvertUToImageNV = 5391, + OpConvertUToSamplerNV = 5392, + OpConvertImageToUNV = 5393, + OpConvertSamplerToUNV = 5394, + OpConvertUToSampledImageNV = 5395, + OpConvertSampledImageToUNV = 5396, + OpSamplerImageAddressingModeNV = 5397, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpConstantFunctionPointerINTEL = 5600, + OpFunctionPointerCallINTEL = 5601, + OpAsmTargetINTEL = 5609, + OpAsmINTEL = 5610, + OpAsmCallINTEL = 5611, + OpAtomicFMinEXT = 5614, + OpAtomicFMaxEXT = 5615, + OpAssumeTrueKHR = 5630, + OpExpectKHR = 5631, + OpDecorateString = 5632, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, + OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpVariableLengthArrayINTEL = 5818, + OpSaveMemoryINTEL = 5819, + OpRestoreMemoryINTEL = 5820, + OpArbitraryFloatSinCosPiINTEL = 5840, + OpArbitraryFloatCastINTEL = 5841, + OpArbitraryFloatCastFromIntINTEL = 5842, + OpArbitraryFloatCastToIntINTEL = 5843, + OpArbitraryFloatAddINTEL = 5846, + OpArbitraryFloatSubINTEL = 5847, + OpArbitraryFloatMulINTEL = 5848, + OpArbitraryFloatDivINTEL = 5849, + OpArbitraryFloatGTINTEL = 5850, + OpArbitraryFloatGEINTEL = 5851, + OpArbitraryFloatLTINTEL = 5852, + OpArbitraryFloatLEINTEL = 5853, + OpArbitraryFloatEQINTEL = 5854, + OpArbitraryFloatRecipINTEL = 5855, + OpArbitraryFloatRSqrtINTEL = 5856, + OpArbitraryFloatCbrtINTEL = 5857, + OpArbitraryFloatHypotINTEL = 5858, + OpArbitraryFloatSqrtINTEL = 5859, + OpArbitraryFloatLogINTEL = 5860, + OpArbitraryFloatLog2INTEL = 5861, + OpArbitraryFloatLog10INTEL = 5862, + OpArbitraryFloatLog1pINTEL = 5863, + OpArbitraryFloatExpINTEL = 5864, + OpArbitraryFloatExp2INTEL = 5865, + OpArbitraryFloatExp10INTEL = 5866, + OpArbitraryFloatExpm1INTEL = 5867, + OpArbitraryFloatSinINTEL = 5868, + OpArbitraryFloatCosINTEL = 5869, + OpArbitraryFloatSinCosINTEL = 5870, + OpArbitraryFloatSinPiINTEL = 5871, + OpArbitraryFloatCosPiINTEL = 5872, + OpArbitraryFloatASinINTEL = 5873, + OpArbitraryFloatASinPiINTEL = 5874, + OpArbitraryFloatACosINTEL = 5875, + OpArbitraryFloatACosPiINTEL = 5876, + OpArbitraryFloatATanINTEL = 5877, + OpArbitraryFloatATanPiINTEL = 5878, + OpArbitraryFloatATan2INTEL = 5879, + OpArbitraryFloatPowINTEL = 5880, + OpArbitraryFloatPowRINTEL = 5881, + OpArbitraryFloatPowNINTEL = 5882, + OpLoopControlINTEL = 5887, + OpFixedSqrtINTEL = 5923, + OpFixedRecipINTEL = 5924, + OpFixedRsqrtINTEL = 5925, + OpFixedSinINTEL = 5926, + OpFixedCosINTEL = 5927, + OpFixedSinCosINTEL = 5928, + OpFixedSinPiINTEL = 5929, + OpFixedCosPiINTEL = 5930, + OpFixedSinCosPiINTEL = 5931, + OpFixedLogINTEL = 5932, + OpFixedExpINTEL = 5933, + OpPtrCastToCrossWorkgroupINTEL = 5934, + OpCrossWorkgroupCastToPtrINTEL = 5938, + OpReadPipeBlockingINTEL = 5946, + OpWritePipeBlockingINTEL = 5947, + OpFPGARegINTEL = 5949, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpAtomicFAddEXT = 6035, + OpTypeBufferSurfaceINTEL = 6086, + OpTypeStructContinuedINTEL = 6090, + OpConstantCompositeContinuedINTEL = 6091, + OpSpecConstantCompositeContinuedINTEL = 6092, + OpMax = 0x7fffffff, +}; + +#ifdef SPV_ENABLE_UTILITY_CODE +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case OpNop: *hasResult = false; *hasResultType = false; break; + case OpUndef: *hasResult = true; *hasResultType = true; break; + case OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case OpSource: *hasResult = false; *hasResultType = false; break; + case OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case OpName: *hasResult = false; *hasResultType = false; break; + case OpMemberName: *hasResult = false; *hasResultType = false; break; + case OpString: *hasResult = true; *hasResultType = false; break; + case OpLine: *hasResult = false; *hasResultType = false; break; + case OpExtension: *hasResult = false; *hasResultType = false; break; + case OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case OpExtInst: *hasResult = true; *hasResultType = true; break; + case OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case OpCapability: *hasResult = false; *hasResultType = false; break; + case OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case OpTypeBool: *hasResult = true; *hasResultType = false; break; + case OpTypeInt: *hasResult = true; *hasResultType = false; break; + case OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case OpTypeVector: *hasResult = true; *hasResultType = false; break; + case OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case OpTypeImage: *hasResult = true; *hasResultType = false; break; + case OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case OpTypeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case OpTypePointer: *hasResult = true; *hasResultType = false; break; + case OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case OpTypePipe: *hasResult = true; *hasResultType = false; break; + case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpConstant: *hasResult = true; *hasResultType = true; break; + case OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case OpConstantNull: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case OpFunction: *hasResult = true; *hasResultType = true; break; + case OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case OpVariable: *hasResult = true; *hasResultType = true; break; + case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case OpLoad: *hasResult = true; *hasResultType = true; break; + case OpStore: *hasResult = false; *hasResultType = false; break; + case OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case OpAccessChain: *hasResult = true; *hasResultType = true; break; + case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpArrayLength: *hasResult = true; *hasResultType = true; break; + case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpDecorate: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case OpCopyObject: *hasResult = true; *hasResultType = true; break; + case OpTranspose: *hasResult = true; *hasResultType = true; break; + case OpSampledImage: *hasResult = true; *hasResultType = true; break; + case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageFetch: *hasResult = true; *hasResultType = true; break; + case OpImageGather: *hasResult = true; *hasResultType = true; break; + case OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageRead: *hasResult = true; *hasResultType = true; break; + case OpImageWrite: *hasResult = false; *hasResultType = false; break; + case OpImage: *hasResult = true; *hasResultType = true; break; + case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case OpUConvert: *hasResult = true; *hasResultType = true; break; + case OpSConvert: *hasResult = true; *hasResultType = true; break; + case OpFConvert: *hasResult = true; *hasResultType = true; break; + case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case OpBitcast: *hasResult = true; *hasResultType = true; break; + case OpSNegate: *hasResult = true; *hasResultType = true; break; + case OpFNegate: *hasResult = true; *hasResultType = true; break; + case OpIAdd: *hasResult = true; *hasResultType = true; break; + case OpFAdd: *hasResult = true; *hasResultType = true; break; + case OpISub: *hasResult = true; *hasResultType = true; break; + case OpFSub: *hasResult = true; *hasResultType = true; break; + case OpIMul: *hasResult = true; *hasResultType = true; break; + case OpFMul: *hasResult = true; *hasResultType = true; break; + case OpUDiv: *hasResult = true; *hasResultType = true; break; + case OpSDiv: *hasResult = true; *hasResultType = true; break; + case OpFDiv: *hasResult = true; *hasResultType = true; break; + case OpUMod: *hasResult = true; *hasResultType = true; break; + case OpSRem: *hasResult = true; *hasResultType = true; break; + case OpSMod: *hasResult = true; *hasResultType = true; break; + case OpFRem: *hasResult = true; *hasResultType = true; break; + case OpFMod: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case OpDot: *hasResult = true; *hasResultType = true; break; + case OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case OpAny: *hasResult = true; *hasResultType = true; break; + case OpAll: *hasResult = true; *hasResultType = true; break; + case OpIsNan: *hasResult = true; *hasResultType = true; break; + case OpIsInf: *hasResult = true; *hasResultType = true; break; + case OpIsFinite: *hasResult = true; *hasResultType = true; break; + case OpIsNormal: *hasResult = true; *hasResultType = true; break; + case OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case OpOrdered: *hasResult = true; *hasResultType = true; break; + case OpUnordered: *hasResult = true; *hasResultType = true; break; + case OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case OpSelect: *hasResult = true; *hasResultType = true; break; + case OpIEqual: *hasResult = true; *hasResultType = true; break; + case OpINotEqual: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpULessThan: *hasResult = true; *hasResultType = true; break; + case OpSLessThan: *hasResult = true; *hasResultType = true; break; + case OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpNot: *hasResult = true; *hasResultType = true; break; + case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case OpBitReverse: *hasResult = true; *hasResultType = true; break; + case OpBitCount: *hasResult = true; *hasResultType = true; break; + case OpDPdx: *hasResult = true; *hasResultType = true; break; + case OpDPdy: *hasResult = true; *hasResultType = true; break; + case OpFwidth: *hasResult = true; *hasResultType = true; break; + case OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case OpPhi: *hasResult = true; *hasResultType = true; break; + case OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case OpLabel: *hasResult = true; *hasResultType = false; break; + case OpBranch: *hasResult = false; *hasResultType = false; break; + case OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case OpSwitch: *hasResult = false; *hasResultType = false; break; + case OpKill: *hasResult = false; *hasResultType = false; break; + case OpReturn: *hasResult = false; *hasResultType = false; break; + case OpReturnValue: *hasResult = false; *hasResultType = false; break; + case OpUnreachable: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case OpGroupAll: *hasResult = true; *hasResultType = true; break; + case OpGroupAny: *hasResult = true; *hasResultType = true; break; + case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case OpReadPipe: *hasResult = true; *hasResultType = true; break; + case OpWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case OpNoLine: *hasResult = false; *hasResultType = false; break; + case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case OpSizeOf: *hasResult = true; *hasResultType = true; break; + case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case OpDecorateId: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case OpSDot: *hasResult = true; *hasResultType = true; break; + case OpUDot: *hasResult = true; *hasResultType = true; break; + case OpSUDot: *hasResult = true; *hasResultType = true; break; + case OpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case OpTraceNV: *hasResult = false; *hasResultType = false; break; + case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case OpExpectKHR: *hasResult = true; *hasResultType = true; break; + case OpDecorateString: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case OpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case OpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case OpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case OpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case OpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case OpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case OpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } +inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/spvIR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/spvIR.h new file mode 100644 index 0000000..0969127 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/SPIRV/spvIR.h @@ -0,0 +1,520 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// SPIRV-IR +// +// Simple in-memory representation (IR) of SPIRV. Just for holding +// Each function's CFG of blocks. Has this hierarchy: +// - Module, which is a list of +// - Function, which is a list of +// - Block, which is a list of +// - Instruction +// + +#pragma once +#ifndef spvIR_H +#define spvIR_H + +#include "spirv.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +class Block; +class Function; +class Module; + +const Id NoResult = 0; +const Id NoType = 0; + +const Decoration NoPrecision = DecorationMax; + +#ifdef __GNUC__ +# define POTENTIALLY_UNUSED __attribute__((unused)) +#else +# define POTENTIALLY_UNUSED +#endif + +POTENTIALLY_UNUSED +const MemorySemanticsMask MemorySemanticsAllMemory = + (MemorySemanticsMask)(MemorySemanticsUniformMemoryMask | + MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask | + MemorySemanticsImageMemoryMask); + +struct IdImmediate { + bool isId; // true if word is an Id, false if word is an immediate + unsigned word; + IdImmediate(bool i, unsigned w) : isId(i), word(w) {} +}; + +// +// SPIR-V IR instruction. +// + +class Instruction { +public: + Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { } + explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { } + virtual ~Instruction() {} + void addIdOperand(Id id) { + operands.push_back(id); + idOperand.push_back(true); + } + void addImmediateOperand(unsigned int immediate) { + operands.push_back(immediate); + idOperand.push_back(false); + } + void setImmediateOperand(unsigned idx, unsigned int immediate) { + assert(!idOperand[idx]); + operands[idx] = immediate; + } + + void addStringOperand(const char* str) + { + unsigned int word = 0; + unsigned int shiftAmount = 0; + char c; + + do { + c = *(str++); + word |= ((unsigned int)c) << shiftAmount; + shiftAmount += 8; + if (shiftAmount == 32) { + addImmediateOperand(word); + word = 0; + shiftAmount = 0; + } + } while (c != 0); + + // deal with partial last word + if (shiftAmount > 0) { + addImmediateOperand(word); + } + } + bool isIdOperand(int op) const { return idOperand[op]; } + void setBlock(Block* b) { block = b; } + Block* getBlock() const { return block; } + Op getOpCode() const { return opCode; } + int getNumOperands() const + { + assert(operands.size() == idOperand.size()); + return (int)operands.size(); + } + Id getResultId() const { return resultId; } + Id getTypeId() const { return typeId; } + Id getIdOperand(int op) const { + assert(idOperand[op]); + return operands[op]; + } + unsigned int getImmediateOperand(int op) const { + assert(!idOperand[op]); + return operands[op]; + } + + // Write out the binary form. + void dump(std::vector& out) const + { + // Compute the wordCount + unsigned int wordCount = 1; + if (typeId) + ++wordCount; + if (resultId) + ++wordCount; + wordCount += (unsigned int)operands.size(); + + // Write out the beginning of the instruction + out.push_back(((wordCount) << WordCountShift) | opCode); + if (typeId) + out.push_back(typeId); + if (resultId) + out.push_back(resultId); + + // Write out the operands + for (int op = 0; op < (int)operands.size(); ++op) + out.push_back(operands[op]); + } + +protected: + Instruction(const Instruction&); + Id resultId; + Id typeId; + Op opCode; + std::vector operands; // operands, both and immediates (both are unsigned int) + std::vector idOperand; // true for operands that are , false for immediates + Block* block; +}; + +// +// SPIR-V IR block. +// + +class Block { +public: + Block(Id id, Function& parent); + virtual ~Block() + { + } + + Id getId() { return instructions.front()->getResultId(); } + + Function& getParent() const { return parent; } + void addInstruction(std::unique_ptr inst); + void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} + void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } + const std::vector& getPredecessors() const { return predecessors; } + const std::vector& getSuccessors() const { return successors; } + const std::vector >& getInstructions() const { + return instructions; + } + const std::vector >& getLocalVariables() const { return localVariables; } + void setUnreachable() { unreachable = true; } + bool isUnreachable() const { return unreachable; } + // Returns the block's merge instruction, if one exists (otherwise null). + const Instruction* getMergeInstruction() const { + if (instructions.size() < 2) return nullptr; + const Instruction* nextToLast = (instructions.cend() - 2)->get(); + switch (nextToLast->getOpCode()) { + case OpSelectionMerge: + case OpLoopMerge: + return nextToLast; + default: + return nullptr; + } + return nullptr; + } + + // Change this block into a canonical dead merge block. Delete instructions + // as necessary. A canonical dead merge block has only an OpLabel and an + // OpUnreachable. + void rewriteAsCanonicalUnreachableMerge() { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + addInstruction(std::unique_ptr(new Instruction(OpUnreachable))); + } + // Change this block into a canonical dead continue target branching to the + // given header ID. Delete instructions as necessary. A canonical dead continue + // target has only an OpLabel and an unconditional branch back to the corresponding + // header. + void rewriteAsCanonicalUnreachableContinue(Block* header) { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + // Add OpBranch back to the header. + assert(header != nullptr); + Instruction* branch = new Instruction(OpBranch); + branch->addIdOperand(header->getId()); + addInstruction(std::unique_ptr(branch)); + successors.push_back(header); + } + + bool isTerminated() const + { + switch (instructions.back()->getOpCode()) { + case OpBranch: + case OpBranchConditional: + case OpSwitch: + case OpKill: + case OpTerminateInvocation: + case OpReturn: + case OpReturnValue: + case OpUnreachable: + return true; + default: + return false; + } + } + + void dump(std::vector& out) const + { + instructions[0]->dump(out); + for (int i = 0; i < (int)localVariables.size(); ++i) + localVariables[i]->dump(out); + for (int i = 1; i < (int)instructions.size(); ++i) + instructions[i]->dump(out); + } + +protected: + Block(const Block&); + Block& operator=(Block&); + + // To enforce keeping parent and ownership in sync: + friend Function; + + std::vector > instructions; + std::vector predecessors, successors; + std::vector > localVariables; + Function& parent; + + // track whether this block is known to be uncreachable (not necessarily + // true for all unreachable blocks, but should be set at least + // for the extraneous ones introduced by the builder). + bool unreachable; +}; + +// The different reasons for reaching a block in the inReadableOrder traversal. +enum ReachReason { + // Reachable from the entry block via transfers of control, i.e. branches. + ReachViaControlFlow = 0, + // A continue target that is not reachable via control flow. + ReachDeadContinue, + // A merge block that is not reachable via control flow. + ReachDeadMerge +}; + +// Traverses the control-flow graph rooted at root in an order suited for +// readable code generation. Invokes callback at every node in the traversal +// order. The callback arguments are: +// - the block, +// - the reason we reached the block, +// - if the reason was that block is an unreachable continue or unreachable merge block +// then the last parameter is the corresponding header block. +void inReadableOrder(Block* root, std::function callback); + +// +// SPIR-V IR Function. +// + +class Function { +public: + Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent); + virtual ~Function() + { + for (int i = 0; i < (int)parameterInstructions.size(); ++i) + delete parameterInstructions[i]; + + for (int i = 0; i < (int)blocks.size(); ++i) + delete blocks[i]; + } + Id getId() const { return functionInstruction.getResultId(); } + Id getParamId(int p) const { return parameterInstructions[p]->getResultId(); } + Id getParamType(int p) const { return parameterInstructions[p]->getTypeId(); } + + void addBlock(Block* block) { blocks.push_back(block); } + void removeBlock(Block* block) + { + auto found = find(blocks.begin(), blocks.end(), block); + assert(found != blocks.end()); + blocks.erase(found); + delete block; + } + + Module& getParent() const { return parent; } + Block* getEntryBlock() const { return blocks.front(); } + Block* getLastBlock() const { return blocks.back(); } + const std::vector& getBlocks() const { return blocks; } + void addLocalVariable(std::unique_ptr inst); + Id getReturnType() const { return functionInstruction.getTypeId(); } + Id getFuncId() const { return functionInstruction.getResultId(); } + void setReturnPrecision(Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionReturn = true; + } + Decoration getReturnPrecision() const + { return reducedPrecisionReturn ? DecorationRelaxedPrecision : NoPrecision; } + + void setDebugLineInfo(Id fileName, int line, int column) { + lineInstruction = std::unique_ptr{new Instruction(OpLine)}; + lineInstruction->addIdOperand(fileName); + lineInstruction->addImmediateOperand(line); + lineInstruction->addImmediateOperand(column); + } + bool hasDebugLineInfo() const { return lineInstruction != nullptr; } + + void setImplicitThis() { implicitThis = true; } + bool hasImplicitThis() const { return implicitThis; } + + void addParamPrecision(unsigned param, Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionParams.insert(param); + } + Decoration getParamPrecision(unsigned param) const + { + return reducedPrecisionParams.find(param) != reducedPrecisionParams.end() ? + DecorationRelaxedPrecision : NoPrecision; + } + + void dump(std::vector& out) const + { + // OpLine + if (lineInstruction != nullptr) { + lineInstruction->dump(out); + } + + // OpFunction + functionInstruction.dump(out); + + // OpFunctionParameter + for (int p = 0; p < (int)parameterInstructions.size(); ++p) + parameterInstructions[p]->dump(out); + + // Blocks + inReadableOrder(blocks[0], [&out](const Block* b, ReachReason, Block*) { b->dump(out); }); + Instruction end(0, 0, OpFunctionEnd); + end.dump(out); + } + +protected: + Function(const Function&); + Function& operator=(Function&); + + Module& parent; + std::unique_ptr lineInstruction; + Instruction functionInstruction; + std::vector parameterInstructions; + std::vector blocks; + bool implicitThis; // true if this is a member function expecting to be passed a 'this' as the first argument + bool reducedPrecisionReturn; + std::set reducedPrecisionParams; // list of parameter indexes that need a relaxed precision arg +}; + +// +// SPIR-V IR Module. +// + +class Module { +public: + Module() {} + virtual ~Module() + { + // TODO delete things + } + + void addFunction(Function *fun) { functions.push_back(fun); } + + void mapInstruction(Instruction *instruction) + { + spv::Id resultId = instruction->getResultId(); + // map the instruction's result id + if (resultId >= idToInstruction.size()) + idToInstruction.resize(resultId + 16); + idToInstruction[resultId] = instruction; + } + + Instruction* getInstruction(Id id) const { return idToInstruction[id]; } + const std::vector& getFunctions() const { return functions; } + spv::Id getTypeId(Id resultId) const { + return idToInstruction[resultId] == nullptr ? NoType : idToInstruction[resultId]->getTypeId(); + } + StorageClass getStorageClass(Id typeId) const + { + assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer); + return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0); + } + + void dump(std::vector& out) const + { + for (int f = 0; f < (int)functions.size(); ++f) + functions[f]->dump(out); + } + +protected: + Module(const Module&); + std::vector functions; + + // map from result id to instruction having that result id + std::vector idToInstruction; + + // map from a result id to its type id +}; + +// +// Implementation (it's here due to circular type definitions). +// + +// Add both +// - the OpFunction instruction +// - all the OpFunctionParameter instructions +__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent) + : parent(parent), lineInstruction(nullptr), + functionInstruction(id, resultType, OpFunction), implicitThis(false), + reducedPrecisionReturn(false) +{ + // OpFunction + functionInstruction.addImmediateOperand(FunctionControlMaskNone); + functionInstruction.addIdOperand(functionType); + parent.mapInstruction(&functionInstruction); + parent.addFunction(this); + + // OpFunctionParameter + Instruction* typeInst = parent.getInstruction(functionType); + int numParams = typeInst->getNumOperands() - 1; + for (int p = 0; p < numParams; ++p) { + Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter); + parent.mapInstruction(param); + parameterInstructions.push_back(param); + } +} + +__inline void Function::addLocalVariable(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + blocks[0]->addLocalVariable(std::move(inst)); + parent.mapInstruction(raw_instruction); +} + +__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false) +{ + instructions.push_back(std::unique_ptr(new Instruction(id, NoType, OpLabel))); + instructions.back()->setBlock(this); + parent.getParent().mapInstruction(instructions.back().get()); +} + +__inline void Block::addInstruction(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + instructions.push_back(std::move(inst)); + raw_instruction->setBlock(this); + if (raw_instruction->getResultId()) + parent.getParent().mapInstruction(raw_instruction); +} + +} // end spv namespace + +#endif // spvIR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/build_info.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/build_info.h new file mode 100644 index 0000000..e711b20 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/glslang/build_info.h @@ -0,0 +1,62 @@ +// Copyright (C) 2020 The Khronos Group Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of The Khronos Group Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_BUILD_INFO +#define GLSLANG_BUILD_INFO + +#define GLSLANG_VERSION_MAJOR 11 +#define GLSLANG_VERSION_MINOR 12 +#define GLSLANG_VERSION_PATCH 0 +#define GLSLANG_VERSION_FLAVOR "" + +#define GLSLANG_VERSION_GREATER_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) > (patch))))) + +#define GLSLANG_VERSION_GREATER_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH >= (patch)))))) + +#define GLSLANG_VERSION_LESS_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) < (patch))))) + +#define GLSLANG_VERSION_LESS_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH <= (patch)))))) + +#endif // GLSLANG_BUILD_INFO diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/allocator.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/allocator.h new file mode 100644 index 0000000..3a5ebca --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/allocator.h @@ -0,0 +1,448 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_ALLOCATOR_H +#define NCNN_ALLOCATOR_H + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +#include +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// the alignment of all the allocated buffers +#if NCNN_AVX512 +#define NCNN_MALLOC_ALIGN 64 +#elif NCNN_AVX +#define NCNN_MALLOC_ALIGN 32 +#else +#define NCNN_MALLOC_ALIGN 16 +#endif + +// we have some optimized kernels that may overread buffer a bit in loop +// it is common to interleave next-loop data load with arithmetic instructions +// allocating more bytes keeps us safe from SEGV_ACCERR failure +#define NCNN_MALLOC_OVERREAD 64 + +// Aligns a pointer to the specified number of bytes +// ptr Aligned pointer +// n Alignment size that must be a power of two +template +static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) +{ + return (_Tp*)(((size_t)ptr + n - 1) & -n); +} + +// Aligns a buffer size to the specified number of bytes +// The function returns the minimum number that is greater or equal to sz and is divisible by n +// sz Buffer size to align +// n Alignment size that must be a power of two +static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n) +{ + return (sz + n - 1) & -n; +} + +static NCNN_FORCEINLINE void* fastMalloc(size_t size) +{ +#if _MSC_VER + return _aligned_malloc(size, NCNN_MALLOC_ALIGN); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + void* ptr = 0; + if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) + ptr = 0; + return ptr; +#elif __ANDROID__ && __ANDROID_API__ < 17 + return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); +#else + unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); + if (!udata) + return 0; + unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN); + adata[-1] = udata; + return adata; +#endif +} + +static NCNN_FORCEINLINE void fastFree(void* ptr) +{ + if (ptr) + { +#if _MSC_VER + _aligned_free(ptr); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + free(ptr); +#elif __ANDROID__ && __ANDROID_API__ < 17 + free(ptr); +#else + unsigned char* udata = ((unsigned char**)ptr)[-1]; + free(udata); +#endif + } +} + +#if NCNN_THREADS +// exchange-add operation for atomic operations on reference counters +#if defined __riscv && !defined __riscv_atomic +// riscv target without A extension +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) +// atomic increment on the linux version of the Intel(tm) compiler +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) +#elif defined __GNUC__ +#if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) +#ifdef __ATOMIC_ACQ_REL +#define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) +#endif +#else +#if defined __ATOMIC_ACQ_REL && !defined __clang__ +// version for gcc >= 4.7 +#define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) +#endif +#endif +#elif defined _MSC_VER && !defined RC_INVOKED +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) +#else +// thread-unsafe branch +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif +#else // NCNN_THREADS +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif // NCNN_THREADS + +class NCNN_EXPORT Allocator +{ +public: + virtual ~Allocator(); + virtual void* fastMalloc(size_t size) = 0; + virtual void fastFree(void* ptr) = 0; +}; + +class PoolAllocatorPrivate; +class NCNN_EXPORT PoolAllocator : public Allocator +{ +public: + PoolAllocator(); + ~PoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + PoolAllocator(const PoolAllocator&); + PoolAllocator& operator=(const PoolAllocator&); + +private: + PoolAllocatorPrivate* const d; +}; + +class UnlockedPoolAllocatorPrivate; +class NCNN_EXPORT UnlockedPoolAllocator : public Allocator +{ +public: + UnlockedPoolAllocator(); + ~UnlockedPoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + UnlockedPoolAllocator(const UnlockedPoolAllocator&); + UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&); + +private: + UnlockedPoolAllocatorPrivate* const d; +}; + +#if NCNN_VULKAN + +class VulkanDevice; + +class NCNN_EXPORT VkBufferMemory +{ +public: + VkBuffer buffer; + + // the base offset assigned by allocator + size_t offset; + size_t capacity; + + VkDeviceMemory memory; + void* mapped_ptr; + + // buffer state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkPipelineStageFlags stage_flags; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkImageMemory +{ +public: + VkImage image; + VkImageView imageview; + + // underlying info assigned by allocator + int width; + int height; + int depth; + VkFormat format; + + VkDeviceMemory memory; + void* mapped_ptr; + + // the base offset assigned by allocator + size_t bind_offset; + size_t bind_capacity; + + // image state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkImageLayout image_layout; + mutable VkPipelineStageFlags stage_flags; + + // in-execution state, modified by command functions internally + mutable int command_refcount; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkAllocator +{ +public: + explicit VkAllocator(const VulkanDevice* _vkdev); + virtual ~VkAllocator(); + + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size) = 0; + virtual void fastFree(VkBufferMemory* ptr) = 0; + virtual int flush(VkBufferMemory* ptr); + virtual int invalidate(VkBufferMemory* ptr); + + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; + virtual void fastFree(VkImageMemory* ptr) = 0; + +public: + const VulkanDevice* vkdev; + uint32_t buffer_memory_type_index; + uint32_t image_memory_type_index; + uint32_t reserved_type_index; + bool mappable; + bool coherent; + +protected: + VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); + VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); + VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); + + VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); + VkImageView create_imageview(VkImage image, VkFormat format); +}; + +class VkBlobAllocatorPrivate; +class NCNN_EXPORT VkBlobAllocator : public VkAllocator +{ +public: + explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M + virtual ~VkBlobAllocator(); + +public: + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkBlobAllocator(const VkBlobAllocator&); + VkBlobAllocator& operator=(const VkBlobAllocator&); + +private: + VkBlobAllocatorPrivate* const d; +}; + +class VkWeightAllocatorPrivate; +class NCNN_EXPORT VkWeightAllocator : public VkAllocator +{ +public: + explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M + virtual ~VkWeightAllocator(); + +public: + // release all blocks immediately + virtual void clear(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightAllocator(const VkWeightAllocator&); + VkWeightAllocator& operator=(const VkWeightAllocator&); + +private: + VkWeightAllocatorPrivate* const d; +}; + +class VkStagingAllocatorPrivate; +class NCNN_EXPORT VkStagingAllocator : public VkAllocator +{ +public: + explicit VkStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkStagingAllocator(); + +public: + // ratio range 0 ~ 1 + // default cr = 0.75 + void set_size_compare_ratio(float scr); + + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkStagingAllocator(const VkStagingAllocator&); + VkStagingAllocator& operator=(const VkStagingAllocator&); + +private: + VkStagingAllocatorPrivate* const d; +}; + +class VkWeightStagingAllocatorPrivate; +class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator +{ +public: + explicit VkWeightStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkWeightStagingAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightStagingAllocator(const VkWeightStagingAllocator&); + VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&); + +private: + VkWeightStagingAllocatorPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator +{ +public: + VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); + virtual ~VkAndroidHardwareBufferImageAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&); + VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&); + +public: + int init(); + + int width() const; + int height() const; + uint64_t external_format() const; + +public: + AHardwareBuffer* hb; + AHardwareBuffer_Desc bufferDesc; + VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; + VkAndroidHardwareBufferPropertiesANDROID bufferProperties; + VkSamplerYcbcrConversionKHR samplerYcbcrConversion; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_ALLOCATOR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/benchmark.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/benchmark.h new file mode 100644 index 0000000..3d5c0cd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/benchmark.h @@ -0,0 +1,36 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BENCHMARK_H +#define NCNN_BENCHMARK_H + +#include "layer.h" +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +// get now timestamp in ms +NCNN_EXPORT double get_current_time(); + +#if NCNN_BENCHMARK + +NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); +NCNN_EXPORT void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); + +#endif // NCNN_BENCHMARK + +} // namespace ncnn + +#endif // NCNN_BENCHMARK_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/blob.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/blob.h new file mode 100644 index 0000000..c9f144f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/blob.h @@ -0,0 +1,44 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BLOB_H +#define NCNN_BLOB_H + +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT Blob +{ +public: + // empty + Blob(); + +public: +#if NCNN_STRING + // blob name + std::string name; +#endif // NCNN_STRING + // layer index which produce this blob as output + int producer; + // layer index which need this blob as input + int consumer; + // shape hint + Mat shape; +}; + +} // namespace ncnn + +#endif // NCNN_BLOB_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/c_api.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/c_api.h new file mode 100644 index 0000000..ebf5db5 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/c_api.h @@ -0,0 +1,336 @@ +/* Tencent is pleased to support the open source community by making ncnn available. + * + * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +#ifndef NCNN_C_API_H +#define NCNN_C_API_H + +#include "platform.h" + +#if NCNN_C_API + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT const char* ncnn_version(); + +/* allocator api */ +typedef struct __ncnn_allocator_t* ncnn_allocator_t; +struct NCNN_EXPORT __ncnn_allocator_t +{ + void* pthis; + + void* (*fast_malloc)(ncnn_allocator_t allocator, size_t size); + void (*fast_free)(ncnn_allocator_t allocator, void* ptr); +}; + +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_pool_allocator(); +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator(); +NCNN_EXPORT void ncnn_allocator_destroy(ncnn_allocator_t allocator); + +/* option api */ +typedef struct __ncnn_option_t* ncnn_option_t; + +NCNN_EXPORT ncnn_option_t ncnn_option_create(); +NCNN_EXPORT void ncnn_option_destroy(ncnn_option_t opt); + +NCNN_EXPORT int ncnn_option_get_num_threads(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads); + +NCNN_EXPORT int ncnn_option_get_use_local_pool_allocator(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_local_pool_allocator(ncnn_option_t opt, int use_local_pool_allocator); + +NCNN_EXPORT void ncnn_option_set_blob_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_option_set_workspace_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute); + +/* mat api */ +typedef struct __ncnn_mat_t* ncnn_mat_t; + +NCNN_EXPORT ncnn_mat_t ncnn_mat_create(); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_destroy(ncnn_mat_t mat); + +NCNN_EXPORT void ncnn_mat_fill_float(ncnn_mat_t mat, float v); + +NCNN_EXPORT ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d, int c, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_mat_get_dims(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_w(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_h(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_d(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_c(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_elempack(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_cstep(const ncnn_mat_t mat); +NCNN_EXPORT void* ncnn_mat_get_data(const ncnn_mat_t mat); + +NCNN_EXPORT void* ncnn_mat_get_channel_data(const ncnn_mat_t mat, int c); + +#if NCNN_PIXEL + +/* mat pixel api */ +#define NCNN_MAT_PIXEL_RGB 1 +#define NCNN_MAT_PIXEL_BGR 2 +#define NCNN_MAT_PIXEL_GRAY 3 +#define NCNN_MAT_PIXEL_RGBA 4 +#define NCNN_MAT_PIXEL_BGRA 5 +#define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16)) +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int type, int stride); +NCNN_EXPORT void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride); + +#endif /* NCNN_PIXEL */ + +NCNN_EXPORT void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals); + +NCNN_EXPORT void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int elempack, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_option_t opt); + +/* blob api */ +typedef struct __ncnn_blob_t* ncnn_blob_t; + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_blob_get_name(const ncnn_blob_t blob); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_blob_get_producer(const ncnn_blob_t blob); +NCNN_EXPORT int ncnn_blob_get_consumer(const ncnn_blob_t blob); + +NCNN_EXPORT void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int* h, int* c); + +/* paramdict api */ +typedef struct __ncnn_paramdict_t* ncnn_paramdict_t; + +NCNN_EXPORT ncnn_paramdict_t ncnn_paramdict_create(); +NCNN_EXPORT void ncnn_paramdict_destroy(ncnn_paramdict_t pd); + +NCNN_EXPORT int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id); + +NCNN_EXPORT int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def); +NCNN_EXPORT float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float def); +NCNN_EXPORT ncnn_mat_t ncnn_paramdict_get_array(const ncnn_paramdict_t pd, int id, const ncnn_mat_t def); + +NCNN_EXPORT void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i); +NCNN_EXPORT void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f); +NCNN_EXPORT void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, const ncnn_mat_t v); + +/* datareader api */ +typedef struct __ncnn_datareader_t* ncnn_datareader_t; +struct NCNN_EXPORT __ncnn_datareader_t +{ + void* pthis; + +#if NCNN_STRING + int (*scan)(ncnn_datareader_t dr, const char* format, void* p); +#endif /* NCNN_STRING */ + size_t (*read)(ncnn_datareader_t dr, void* buf, size_t size); +}; + +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create(); +#if NCNN_STDIO +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp); +#endif /* NCNN_STDIO */ +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char** mem); +NCNN_EXPORT void ncnn_datareader_destroy(ncnn_datareader_t dr); + +/* modelbin api */ +typedef struct __ncnn_modelbin_t* ncnn_modelbin_t; +struct NCNN_EXPORT __ncnn_modelbin_t +{ + void* pthis; + + ncnn_mat_t (*load_1d)(const ncnn_modelbin_t mb, int w, int type); + ncnn_mat_t (*load_2d)(const ncnn_modelbin_t mb, int w, int h, int type); + ncnn_mat_t (*load_3d)(const ncnn_modelbin_t mb, int w, int h, int c, int type); +}; + +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_datareader_t dr); +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* weights, int n); +NCNN_EXPORT void ncnn_modelbin_destroy(ncnn_modelbin_t mb); + +/* layer api */ +typedef struct __ncnn_layer_t* ncnn_layer_t; +struct NCNN_EXPORT __ncnn_layer_t +{ + void* pthis; + + int (*load_param)(ncnn_layer_t layer, const ncnn_paramdict_t pd); + int (*load_model)(ncnn_layer_t layer, const ncnn_modelbin_t mb); + + int (*create_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + int (*destroy_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + + int (*forward_1)(const ncnn_layer_t layer, const ncnn_mat_t bottom_blob, ncnn_mat_t* top_blob, const ncnn_option_t opt); + int (*forward_n)(const ncnn_layer_t layer, const ncnn_mat_t* bottom_blobs, int n, ncnn_mat_t* top_blobs, int n2, const ncnn_option_t opt); + + int (*forward_inplace_1)(const ncnn_layer_t layer, ncnn_mat_t bottom_top_blob, const ncnn_option_t opt); + int (*forward_inplace_n)(const ncnn_layer_t layer, ncnn_mat_t* bottom_top_blobs, int n, const ncnn_option_t opt); +}; + +NCNN_EXPORT ncnn_layer_t ncnn_layer_create(); +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex); +#if NCNN_STRING +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_type(const char* type); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_layer_destroy(ncnn_layer_t layer); + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_name(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_typeindex(const ncnn_layer_t layer); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_type(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_inplace(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_packing(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_image_storage(const ncnn_layer_t layer); + +NCNN_EXPORT void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_image_storage(ncnn_layer_t layer, int enable); + +NCNN_EXPORT int ncnn_layer_get_bottom_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i); +NCNN_EXPORT int ncnn_layer_get_top_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_top(const ncnn_layer_t layer, int i); + +NCNN_EXPORT void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); +NCNN_EXPORT void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); + +/* layer factory function */ +typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata); +typedef void (*ncnn_layer_destroyer_t)(ncnn_layer_t layer, void* userdata); + +typedef struct __ncnn_net_custom_layer_factory_t* ncnn_net_custom_layer_factory_t; +struct __ncnn_net_custom_layer_factory_t +{ + ncnn_layer_creator_t creator; + ncnn_layer_destroyer_t destroyer; + void* userdata; + ncnn_net_custom_layer_factory_t next; +}; + +/* net api */ +typedef struct __ncnn_net_t* ncnn_net_t; +struct __ncnn_net_t +{ + void* pthis; + + ncnn_net_custom_layer_factory_t custom_layer_factory; +}; + +NCNN_EXPORT ncnn_net_t ncnn_net_create(); +NCNN_EXPORT void ncnn_net_destroy(ncnn_net_t net); + +NCNN_EXPORT ncnn_option_t ncnn_net_get_option(ncnn_net_t net); +NCNN_EXPORT void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* type, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typeindex, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param(ncnn_net_t net, const char* path); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin(ncnn_net_t net, const char* path); +NCNN_EXPORT int ncnn_net_load_model(ncnn_net_t net, const char* path); +#endif /* NCNN_STDIO */ + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem); +#endif /* NCNN_STRING */ +#endif /* NCNN_STDIO */ +NCNN_EXPORT int ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned char* mem); +NCNN_EXPORT int ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +NCNN_EXPORT int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader_t dr); + +NCNN_EXPORT void ncnn_net_clear(ncnn_net_t net); + +NCNN_EXPORT int ncnn_net_get_input_count(const ncnn_net_t net); +NCNN_EXPORT int ncnn_net_get_output_count(const ncnn_net_t net); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_net_get_input_name(const ncnn_net_t net, int i); +NCNN_EXPORT const char* ncnn_net_get_output_name(const ncnn_net_t net, int i); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_get_input_index(const ncnn_net_t net, int i); +NCNN_EXPORT int ncnn_net_get_output_index(const ncnn_net_t net, int i); + +/* extractor api */ +typedef struct __ncnn_extractor_t* ncnn_extractor_t; + +NCNN_EXPORT ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net); +NCNN_EXPORT void ncnn_extractor_destroy(ncnn_extractor_t ex); + +NCNN_EXPORT void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_mat_t* mat); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* NCNN_C_API */ + +#endif /* NCNN_C_API_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/command.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/command.h new file mode 100644 index 0000000..337d085 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/command.h @@ -0,0 +1,136 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_COMMAND_H +#define NCNN_COMMAND_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +namespace ncnn { + +class Pipeline; +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class ImportAndroidHardwareBufferPipeline; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API +class VkComputePrivate; +class NCNN_EXPORT VkCompute +{ +public: + explicit VkCompute(const VulkanDevice* vkdev); + virtual ~VkCompute(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_download(const VkMat& src, Mat& dst, const Option& opt); + + void record_download(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkImageMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkImageMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const Mat& dispatcher); + +#if NCNN_BENCHMARK + void record_write_timestamp(uint32_t query); +#endif // NCNN_BENCHMARK + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst); + + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + int submit_and_wait(); + + int reset(); + +#if NCNN_BENCHMARK + int create_query_pool(uint32_t query_count); + + int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); +#endif // NCNN_BENCHMARK + +protected: + const VulkanDevice* vkdev; + + void barrier_readwrite(const VkMat& binding); + void barrier_readwrite(const VkImageMat& binding); + void barrier_readonly(const VkImageMat& binding); + +private: + VkComputePrivate* const d; +}; + +class VkTransferPrivate; +class NCNN_EXPORT VkTransfer +{ +public: + explicit VkTransfer(const VulkanDevice* vkdev); + virtual ~VkTransfer(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + int submit_and_wait(); + +protected: + const VulkanDevice* vkdev; + +private: + VkTransferPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_COMMAND_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/cpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/cpu.h new file mode 100644 index 0000000..d03e7e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/cpu.h @@ -0,0 +1,173 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_CPU_H +#define NCNN_CPU_H + +#include + +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#endif +#if defined __ANDROID__ || defined __linux__ +#include // cpu_set_t +#endif + +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT CpuSet +{ +public: + CpuSet(); + void enable(int cpu); + void disable(int cpu); + void disable_all(); + bool is_enabled(int cpu) const; + int num_enabled() const; + +public: +#if (defined _WIN32 && !(defined __MINGW32__)) + ULONG_PTR mask; +#endif +#if defined __ANDROID__ || defined __linux__ + cpu_set_t cpu_set; +#endif +#if __APPLE__ + unsigned int policy; +#endif +}; + +// test optional cpu features +// edsp = armv7 edsp +NCNN_EXPORT int cpu_support_arm_edsp(); +// neon = armv7 neon or aarch64 asimd +NCNN_EXPORT int cpu_support_arm_neon(); +// vfpv4 = armv7 fp16 + fma +NCNN_EXPORT int cpu_support_arm_vfpv4(); +// asimdhp = aarch64 asimd half precision +NCNN_EXPORT int cpu_support_arm_asimdhp(); +// asimddp = aarch64 asimd dot product +NCNN_EXPORT int cpu_support_arm_asimddp(); +// asimdfhm = aarch64 asimd fhm +NCNN_EXPORT int cpu_support_arm_asimdfhm(); +// bf16 = aarch64 bf16 +NCNN_EXPORT int cpu_support_arm_bf16(); +// i8mm = aarch64 i8mm +NCNN_EXPORT int cpu_support_arm_i8mm(); +// sve = aarch64 sve +NCNN_EXPORT int cpu_support_arm_sve(); +// sve2 = aarch64 sve2 +NCNN_EXPORT int cpu_support_arm_sve2(); +// svebf16 = aarch64 svebf16 +NCNN_EXPORT int cpu_support_arm_svebf16(); +// svei8mm = aarch64 svei8mm +NCNN_EXPORT int cpu_support_arm_svei8mm(); +// svef32mm = aarch64 svef32mm +NCNN_EXPORT int cpu_support_arm_svef32mm(); + +// avx = x86 avx +NCNN_EXPORT int cpu_support_x86_avx(); +// fma = x86 fma +NCNN_EXPORT int cpu_support_x86_fma(); +// xop = x86 xop +NCNN_EXPORT int cpu_support_x86_xop(); +// f16c = x86 f16c +NCNN_EXPORT int cpu_support_x86_f16c(); +// avx2 = x86 avx2 + fma + f16c +NCNN_EXPORT int cpu_support_x86_avx2(); +// avx_vnni = x86 avx vnni +NCNN_EXPORT int cpu_support_x86_avx_vnni(); +// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl +NCNN_EXPORT int cpu_support_x86_avx512(); +// avx512_vnni = x86 avx512 vnni +NCNN_EXPORT int cpu_support_x86_avx512_vnni(); +// avx512_bf16 = x86 avx512 bf16 +NCNN_EXPORT int cpu_support_x86_avx512_bf16(); +// avx512_fp16 = x86 avx512 fp16 +NCNN_EXPORT int cpu_support_x86_avx512_fp16(); + +// lsx = loongarch lsx +NCNN_EXPORT int cpu_support_loongarch_lsx(); +// lasx = loongarch lasx +NCNN_EXPORT int cpu_support_loongarch_lasx(); + +// msa = mips mas +NCNN_EXPORT int cpu_support_mips_msa(); +// mmi = loongson mmi +NCNN_EXPORT int cpu_support_loongson_mmi(); + +// v = riscv vector +NCNN_EXPORT int cpu_support_riscv_v(); +// zfh = riscv half-precision float +NCNN_EXPORT int cpu_support_riscv_zfh(); +// vlenb = riscv vector length in bytes +NCNN_EXPORT int cpu_riscv_vlenb(); + +// cpu info +NCNN_EXPORT int get_cpu_count(); +NCNN_EXPORT int get_little_cpu_count(); +NCNN_EXPORT int get_big_cpu_count(); + +NCNN_EXPORT int get_physical_cpu_count(); +NCNN_EXPORT int get_physical_little_cpu_count(); +NCNN_EXPORT int get_physical_big_cpu_count(); + +// cpu l2 varies from 64k to 1M, but l3 can be zero +NCNN_EXPORT int get_cpu_level2_cache_size(); +NCNN_EXPORT int get_cpu_level3_cache_size(); + +// bind all threads on little clusters if powersave enabled +// affects HMP arch cpu like ARM big.LITTLE +// only implemented on android at the moment +// switching powersave is expensive and not thread-safe +// 0 = all cores enabled(default) +// 1 = only little clusters enabled +// 2 = only big clusters enabled +// return 0 if success for setter function +NCNN_EXPORT int get_cpu_powersave(); +NCNN_EXPORT int set_cpu_powersave(int powersave); + +// convenient wrapper +NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); + +// set explicit thread affinity +NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); + +// misc function wrapper for openmp routines +NCNN_EXPORT int get_omp_num_threads(); +NCNN_EXPORT void set_omp_num_threads(int num_threads); + +NCNN_EXPORT int get_omp_dynamic(); +NCNN_EXPORT void set_omp_dynamic(int dynamic); + +NCNN_EXPORT int get_omp_thread_num(); + +NCNN_EXPORT int get_kmp_blocktime(); +NCNN_EXPORT void set_kmp_blocktime(int time_ms); + +// need to flush denormals on Intel Chipset. +// Other architectures such as ARM can be added as needed. +// 0 = DAZ OFF, FTZ OFF +// 1 = DAZ ON , FTZ OFF +// 2 = DAZ OFF, FTZ ON +// 3 = DAZ ON, FTZ ON +NCNN_EXPORT int get_flush_denormals(); +NCNN_EXPORT int set_flush_denormals(int flush_denormals); + +} // namespace ncnn + +#endif // NCNN_CPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/datareader.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/datareader.h new file mode 100644 index 0000000..ed2aba3 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/datareader.h @@ -0,0 +1,122 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_DATAREADER_H +#define NCNN_DATAREADER_H + +#include "platform.h" +#if NCNN_STDIO +#include +#endif + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// data read wrapper +class NCNN_EXPORT DataReader +{ +public: + DataReader(); + virtual ~DataReader(); + +#if NCNN_STRING + // parse plain param text + // return 1 if scan success + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + + // read binary param and model data + // return bytes read + virtual size_t read(void* buf, size_t size) const; + + // get model data reference + // return bytes referenced + virtual size_t reference(size_t size, const void** buf) const; +}; + +#if NCNN_STDIO +class DataReaderFromStdioPrivate; +class NCNN_EXPORT DataReaderFromStdio : public DataReader +{ +public: + explicit DataReaderFromStdio(FILE* fp); + virtual ~DataReaderFromStdio(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromStdio(const DataReaderFromStdio&); + DataReaderFromStdio& operator=(const DataReaderFromStdio&); + +private: + DataReaderFromStdioPrivate* const d; +}; +#endif // NCNN_STDIO + +class DataReaderFromMemoryPrivate; +class NCNN_EXPORT DataReaderFromMemory : public DataReader +{ +public: + explicit DataReaderFromMemory(const unsigned char*& mem); + virtual ~DataReaderFromMemory(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + virtual size_t reference(size_t size, const void** buf) const; + +private: + DataReaderFromMemory(const DataReaderFromMemory&); + DataReaderFromMemory& operator=(const DataReaderFromMemory&); + +private: + DataReaderFromMemoryPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +class DataReaderFromAndroidAssetPrivate; +class NCNN_EXPORT DataReaderFromAndroidAsset : public DataReader +{ +public: + explicit DataReaderFromAndroidAsset(AAsset* asset); + virtual ~DataReaderFromAndroidAsset(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromAndroidAsset(const DataReaderFromAndroidAsset&); + DataReaderFromAndroidAsset& operator=(const DataReaderFromAndroidAsset&); + +private: + DataReaderFromAndroidAssetPrivate* const d; +}; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +} // namespace ncnn + +#endif // NCNN_DATAREADER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/gpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/gpu.h new file mode 100644 index 0000000..1cda182 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/gpu.h @@ -0,0 +1,360 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_GPU_H +#define NCNN_GPU_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +#include "vulkan_header_fix.h" + +namespace ncnn { + +// instance +NCNN_EXPORT int create_gpu_instance(); +NCNN_EXPORT void destroy_gpu_instance(); + +// instance extension capability +extern int support_VK_KHR_external_memory_capabilities; +extern int support_VK_KHR_get_physical_device_properties2; +extern int support_VK_KHR_get_surface_capabilities2; +extern int support_VK_KHR_surface; +extern int support_VK_EXT_debug_utils; +#if __ANDROID_API__ >= 26 +extern int support_VK_KHR_android_surface; +#endif // __ANDROID_API__ >= 26 + +// VK_KHR_external_memory_capabilities +extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR; + +// VK_KHR_get_physical_device_properties2 +extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; +extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; +extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; +extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; +extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR; + +// VK_KHR_get_surface_capabilities2 +extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; + +// VK_KHR_surface +extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; +extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; +extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; +extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; + +#if __ANDROID_API__ >= 26 +// VK_KHR_android_surface +extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; +#endif // __ANDROID_API__ >= 26 + +// VK_NV_cooperative_matrix +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; + +// get info +NCNN_EXPORT int get_gpu_count(); +NCNN_EXPORT int get_default_gpu_index(); + +class GpuInfoPrivate; +class NCNN_EXPORT GpuInfo +{ +public: + explicit GpuInfo(); + virtual ~GpuInfo(); + + // vulkan physical device + VkPhysicalDevice physical_device() const; + + // memory properties + const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const; + + // info + uint32_t api_version() const; + uint32_t driver_version() const; + uint32_t vendor_id() const; + uint32_t device_id() const; + const char* device_name() const; + uint8_t* pipeline_cache_uuid() const; + + // 0 = discrete gpu + // 1 = integrated gpu + // 2 = virtual gpu + // 3 = cpu + int type() const; + + // hardware limit + uint32_t max_shared_memory_size() const; + uint32_t max_workgroup_count_x() const; + uint32_t max_workgroup_count_y() const; + uint32_t max_workgroup_count_z() const; + uint32_t max_workgroup_invocations() const; + uint32_t max_workgroup_size_x() const; + uint32_t max_workgroup_size_y() const; + uint32_t max_workgroup_size_z() const; + size_t memory_map_alignment() const; + size_t buffer_offset_alignment() const; + size_t non_coherent_atom_size() const; + size_t buffer_image_granularity() const; + uint32_t max_image_dimension_1d() const; + uint32_t max_image_dimension_2d() const; + uint32_t max_image_dimension_3d() const; + float timestamp_period() const; + + // runtime + uint32_t compute_queue_family_index() const; + uint32_t graphics_queue_family_index() const; + uint32_t transfer_queue_family_index() const; + + uint32_t compute_queue_count() const; + uint32_t graphics_queue_count() const; + uint32_t transfer_queue_count() const; + + // property + bool unified_compute_transfer_queue() const; + + // subgroup + uint32_t subgroup_size() const; + bool support_subgroup_basic() const; + bool support_subgroup_vote() const; + bool support_subgroup_ballot() const; + bool support_subgroup_shuffle() const; + + // bug is not feature + bool bug_storage_buffer_no_l1() const; + bool bug_corrupted_online_pipeline_cache() const; + bool bug_buffer_image_load_zero() const; + + // but sometimes bug is a feature + bool bug_implicit_fp16_arithmetic() const; + + // fp16 and int8 feature + bool support_fp16_packed() const; + bool support_fp16_storage() const; + bool support_fp16_arithmetic() const; + bool support_int8_packed() const; + bool support_int8_storage() const; + bool support_int8_arithmetic() const; + + // ycbcr conversion feature + bool support_ycbcr_conversion() const; + + // cooperative matrix feature + bool support_cooperative_matrix() const; + bool support_cooperative_matrix_16_8_8() const; + + // extension capability + int support_VK_KHR_8bit_storage() const; + int support_VK_KHR_16bit_storage() const; + int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_create_renderpass2() const; + int support_VK_KHR_dedicated_allocation() const; + int support_VK_KHR_descriptor_update_template() const; + int support_VK_KHR_external_memory() const; + int support_VK_KHR_get_memory_requirements2() const; + int support_VK_KHR_maintenance1() const; + int support_VK_KHR_maintenance2() const; + int support_VK_KHR_maintenance3() const; + int support_VK_KHR_multiview() const; + int support_VK_KHR_portability_subset() const; + int support_VK_KHR_push_descriptor() const; + int support_VK_KHR_sampler_ycbcr_conversion() const; + int support_VK_KHR_shader_float16_int8() const; + int support_VK_KHR_shader_float_controls() const; + int support_VK_KHR_storage_buffer_storage_class() const; + int support_VK_KHR_swapchain() const; + int support_VK_EXT_descriptor_indexing() const; + int support_VK_EXT_memory_budget() const; + int support_VK_EXT_queue_family_foreign() const; +#if __ANDROID_API__ >= 26 + int support_VK_ANDROID_external_memory_android_hardware_buffer() const; +#endif // __ANDROID_API__ >= 26 + int support_VK_NV_cooperative_matrix() const; + +private: + GpuInfo(const GpuInfo&); + GpuInfo& operator=(const GpuInfo&); + +private: + friend int create_gpu_instance(); + GpuInfoPrivate* const d; +}; + +NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); + +class VkAllocator; +class VkCompute; +class Option; +class PipelineCache; +class VulkanDevicePrivate; +class NCNN_EXPORT VulkanDevice +{ +public: + VulkanDevice(int device_index = get_default_gpu_index()); + ~VulkanDevice(); + + const GpuInfo& info; + + VkDevice vkdevice() const; + + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; + + // with fixed workgroup size + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const; + + // helper for creating pipeline + int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; + int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; + int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, VkPipeline* pipeline) const; + int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + + uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; + bool is_mappable(uint32_t memory_type_index) const; + bool is_coherent(uint32_t memory_type_index) const; + + VkQueue acquire_queue(uint32_t queue_family_index) const; + void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; + + // allocator on this device + VkAllocator* acquire_blob_allocator() const; + void reclaim_blob_allocator(VkAllocator* allocator) const; + + VkAllocator* acquire_staging_allocator() const; + void reclaim_staging_allocator(VkAllocator* allocator) const; + + // immutable sampler for texelfetch + const VkSampler* immutable_texelfetch_sampler() const; + + // dummy buffer image + VkMat get_dummy_buffer() const; + VkImageMat get_dummy_image() const; + VkImageMat get_dummy_image_readonly() const; + + // pipeline cache on this device + const PipelineCache* get_pipeline_cache() const; + + // test image allocation + bool shape_support_image_storage(const Mat& shape) const; + + // current gpu heap memory budget in MB + uint32_t get_heap_budget() const; + + // utility operator + void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + + // VK_KHR_bind_memory2 + PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; + PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + + // VK_KHR_create_renderpass2 + PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; + PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; + PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; + PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; + + // VK_KHR_descriptor_update_template + PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; + PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; + PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; + + // VK_KHR_get_memory_requirements2 + PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; + PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; + PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; + + // VK_KHR_maintenance1 + PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; + + // VK_KHR_maintenance3 + PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; + + // VK_KHR_push_descriptor + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; + PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; + + // VK_KHR_sampler_ycbcr_conversion + PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; + PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; + + // VK_KHR_swapchain + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkQueuePresentKHR vkQueuePresentKHR; + +#if __ANDROID_API__ >= 26 + // VK_ANDROID_external_memory_android_hardware_buffer + PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; + PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; +#endif // __ANDROID_API__ >= 26 + +protected: + // device extension + int init_device_extension(); + +private: + VulkanDevice(const VulkanDevice&); + VulkanDevice& operator=(const VulkanDevice&); + +private: + VulkanDevicePrivate* const d; +}; + +NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); + +// online spirv compilation +NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector& spirv); + +// info from spirv +class NCNN_EXPORT ShaderInfo +{ +public: + int specialization_count; + int binding_count; + int push_constant_count; + + // 0 = null + // 1 = storage buffer + // 2 = storage image + // 3 = combined image sampler + int binding_types[16]; // 16 is large enough I think ... + + int reserved_0; + int reserved_1; + int reserved_2; + int reserved_3; +}; + +NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info); + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_GPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer.h new file mode 100644 index 0000000..d02f65b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer.h @@ -0,0 +1,214 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_H +#define NCNN_LAYER_H + +#include "mat.h" +#include "modelbin.h" +#include "option.h" +#include "paramdict.h" +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include "command.h" +#include "pipeline.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +class NCNN_EXPORT Layer +{ +public: + // empty + Layer(); + // virtual destructor + virtual ~Layer(); + + // load layer specific parameter from parsed dict + // return 0 if success + virtual int load_param(const ParamDict& pd); + + // load layer specific weight data from model binary + // return 0 if success + virtual int load_model(const ModelBin& mb); + + // layer implementation specific setup + // return 0 if success + virtual int create_pipeline(const Option& opt); + + // layer implementation specific clean + // return 0 if success + virtual int destroy_pipeline(const Option& opt); + +public: + // one input and one output blob + bool one_blob_only; + + // support inplace inference + bool support_inplace; + + // support vulkan compute + bool support_vulkan; + + // accept input blob with packed storage + bool support_packing; + + // accept bf16 + bool support_bf16_storage; + + // accept fp16 + bool support_fp16_storage; + + // accept int8 + bool support_int8_storage; + + // shader image storage + bool support_image_storage; + + // shader tensor storage + bool support_tensor_storage; + + bool support_reserved_00; + + bool support_reserved_0; + bool support_reserved_1; + bool support_reserved_2; + bool support_reserved_3; + bool support_reserved_4; + bool support_reserved_5; + bool support_reserved_6; + bool support_reserved_7; + bool support_reserved_8; + bool support_reserved_9; + + // feature disabled set + int featmask; + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +#if NCNN_VULKAN +public: + // upload weight blob from host to device + virtual int upload_model(VkTransfer& cmd, const Option& opt); + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + +public: + // assigned immediately after creating this layer + const VulkanDevice* vkdev; +#endif // NCNN_VULKAN + +public: + // custom user data + void* userdata; + // layer type index + int typeindex; +#if NCNN_STRING + // layer type name + std::string type; + // layer name + std::string name; +#endif // NCNN_STRING + // blob index which this layer needs as input + std::vector bottoms; + // blob index which this layer produces as output + std::vector tops; + // shape hint + std::vector bottom_shapes; + std::vector top_shapes; +}; + +// layer factory function +typedef Layer* (*layer_creator_func)(void*); +typedef void (*layer_destroyer_func)(Layer*, void*); + +struct layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; +}; + +struct custom_layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + +#if NCNN_STRING +// get layer type from type name +NCNN_EXPORT int layer_to_index(const char* type); +// create layer from type name +NCNN_EXPORT Layer* create_layer(const char* type); +#endif // NCNN_STRING +// create layer from layer type +NCNN_EXPORT Layer* create_layer(int index); + +#define DEFINE_LAYER_CREATOR(name) \ + ::ncnn::Layer* name##_layer_creator(void* /*userdata*/) \ + { \ + return new name; \ + } + +#define DEFINE_LAYER_DESTROYER(name) \ + void name##_layer_destroyer(::ncnn::Layer* layer, void* /*userdata*/) \ + { \ + delete layer; \ + } + +} // namespace ncnn + +#endif // NCNN_LAYER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_shader_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_shader_type.h new file mode 100644 index 0000000..c143e7d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_shader_type.h @@ -0,0 +1,29 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_SHADER_TYPE_H +#define NCNN_LAYER_SHADER_TYPE_H + +namespace ncnn { + +namespace LayerShaderType { +enum LayerShaderType +{ +#include "layer_shader_type_enum.h" +}; +} // namespace LayerShaderType + +} // namespace ncnn + +#endif // NCNN_LAYER_SHADER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_shader_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_shader_type_enum.h new file mode 100644 index 0000000..bad5605 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_shader_type_enum.h @@ -0,0 +1,369 @@ +// Layer Shader Enum header +// +// This file is auto-generated by cmake, don't edit it. + +absval = 0, +absval_pack4 = 1, +absval_pack8 = 2, +batchnorm = 3, +batchnorm_pack4 = 4, +batchnorm_pack8 = 5, +concat = 6, +concat_pack4 = 7, +concat_pack4to1 = 8, +concat_pack8 = 9, +concat_pack8to1 = 10, +concat_pack8to4 = 11, +convolution = 12, +convolution_1x1s1d1 = 13, +convolution_3x3s1d1_winograd23_transform_input = 14, +convolution_3x3s1d1_winograd23_transform_output = 15, +convolution_3x3s1d1_winograd43_transform_input = 16, +convolution_3x3s1d1_winograd43_transform_output = 17, +convolution_3x3s1d1_winograd_gemm = 18, +convolution_gemm = 19, +convolution_pack1to4 = 20, +convolution_pack1to4_1x1s1d1 = 21, +convolution_pack1to4_3x3s1d1_winograd_gemm = 22, +convolution_pack1to4_gemm = 23, +convolution_pack1to8 = 24, +convolution_pack1to8_1x1s1d1 = 25, +convolution_pack1to8_3x3s1d1_winograd_gemm = 26, +convolution_pack1to8_gemm = 27, +convolution_pack4 = 28, +convolution_pack4_1x1s1d1 = 29, +convolution_pack4_1x1s1d1_cm_16_8_8 = 30, +convolution_pack4_3x3s1d1_winograd23_transform_input = 31, +convolution_pack4_3x3s1d1_winograd23_transform_output = 32, +convolution_pack4_3x3s1d1_winograd43_transform_input = 33, +convolution_pack4_3x3s1d1_winograd43_transform_output = 34, +convolution_pack4_3x3s1d1_winograd_gemm = 35, +convolution_pack4_3x3s1d1_winograd_gemm_cm_16_8_8 = 36, +convolution_pack4_gemm = 37, +convolution_pack4_gemm_cm_16_8_8 = 38, +convolution_pack4to1 = 39, +convolution_pack4to1_1x1s1d1 = 40, +convolution_pack4to1_3x3s1d1_winograd_gemm = 41, +convolution_pack4to1_gemm = 42, +convolution_pack4to8 = 43, +convolution_pack4to8_1x1s1d1 = 44, +convolution_pack4to8_3x3s1d1_winograd_gemm = 45, +convolution_pack4to8_gemm = 46, +convolution_pack8 = 47, +convolution_pack8_1x1s1d1 = 48, +convolution_pack8_3x3s1d1_winograd23_transform_input = 49, +convolution_pack8_3x3s1d1_winograd23_transform_output = 50, +convolution_pack8_3x3s1d1_winograd43_transform_input = 51, +convolution_pack8_3x3s1d1_winograd43_transform_output = 52, +convolution_pack8_3x3s1d1_winograd_gemm = 53, +convolution_pack8_gemm = 54, +convolution_pack8to1 = 55, +convolution_pack8to1_1x1s1d1 = 56, +convolution_pack8to1_3x3s1d1_winograd_gemm = 57, +convolution_pack8to1_gemm = 58, +convolution_pack8to4 = 59, +convolution_pack8to4_1x1s1d1 = 60, +convolution_pack8to4_3x3s1d1_winograd_gemm = 61, +convolution_pack8to4_gemm = 62, +crop = 63, +crop_pack1to4 = 64, +crop_pack1to8 = 65, +crop_pack4 = 66, +crop_pack4to1 = 67, +crop_pack4to8 = 68, +crop_pack8 = 69, +crop_pack8to1 = 70, +crop_pack8to4 = 71, +deconvolution = 72, +deconvolution_col2im = 73, +deconvolution_gemm = 74, +deconvolution_pack1to4 = 75, +deconvolution_pack1to4_gemm = 76, +deconvolution_pack1to8 = 77, +deconvolution_pack1to8_gemm = 78, +deconvolution_pack4 = 79, +deconvolution_pack4_col2im = 80, +deconvolution_pack4_gemm = 81, +deconvolution_pack4_gemm_cm_16_8_8 = 82, +deconvolution_pack4to1 = 83, +deconvolution_pack4to1_gemm = 84, +deconvolution_pack4to8 = 85, +deconvolution_pack4to8_gemm = 86, +deconvolution_pack8 = 87, +deconvolution_pack8_col2im = 88, +deconvolution_pack8_gemm = 89, +deconvolution_pack8to1 = 90, +deconvolution_pack8to1_gemm = 91, +deconvolution_pack8to4 = 92, +deconvolution_pack8to4_gemm = 93, +dropout = 94, +dropout_pack4 = 95, +dropout_pack8 = 96, +eltwise = 97, +eltwise_pack4 = 98, +eltwise_pack8 = 99, +elu = 100, +elu_pack4 = 101, +elu_pack8 = 102, +flatten = 103, +flatten_pack1to4 = 104, +flatten_pack1to8 = 105, +flatten_pack4 = 106, +flatten_pack4to8 = 107, +flatten_pack8 = 108, +innerproduct = 109, +innerproduct_gemm = 110, +innerproduct_gemm_wp1to4 = 111, +innerproduct_gemm_wp1to8 = 112, +innerproduct_gemm_wp4 = 113, +innerproduct_gemm_wp4to1 = 114, +innerproduct_gemm_wp4to8 = 115, +innerproduct_gemm_wp8 = 116, +innerproduct_gemm_wp8to1 = 117, +innerproduct_gemm_wp8to4 = 118, +innerproduct_pack1to4 = 119, +innerproduct_pack1to8 = 120, +innerproduct_pack4 = 121, +innerproduct_pack4to1 = 122, +innerproduct_pack4to8 = 123, +innerproduct_pack8 = 124, +innerproduct_pack8to1 = 125, +innerproduct_pack8to4 = 126, +innerproduct_reduce_sum8 = 127, +innerproduct_reduce_sum8_pack4 = 128, +innerproduct_reduce_sum8_pack8 = 129, +innerproduct_sum8 = 130, +innerproduct_sum8_pack1to4 = 131, +innerproduct_sum8_pack1to8 = 132, +innerproduct_sum8_pack4 = 133, +innerproduct_sum8_pack4to1 = 134, +innerproduct_sum8_pack4to8 = 135, +innerproduct_sum8_pack8 = 136, +innerproduct_sum8_pack8to1 = 137, +innerproduct_sum8_pack8to4 = 138, +lrn_norm = 139, +lrn_norm_across_channel_pack4 = 140, +lrn_norm_across_channel_pack8 = 141, +lrn_norm_within_channel_pack4 = 142, +lrn_norm_within_channel_pack8 = 143, +lrn_square_pad = 144, +lrn_square_pad_across_channel_pack4 = 145, +lrn_square_pad_across_channel_pack8 = 146, +lrn_square_pad_within_channel_pack4 = 147, +lrn_square_pad_within_channel_pack8 = 148, +pooling = 149, +pooling_adaptive = 150, +pooling_adaptive_pack4 = 151, +pooling_adaptive_pack8 = 152, +pooling_global = 153, +pooling_global_pack4 = 154, +pooling_global_pack8 = 155, +pooling_pack4 = 156, +pooling_pack8 = 157, +prelu = 158, +prelu_pack4 = 159, +prelu_pack8 = 160, +relu = 161, +relu_pack4 = 162, +relu_pack8 = 163, +reshape = 164, +reshape_pack1to4 = 165, +reshape_pack1to8 = 166, +reshape_pack4 = 167, +reshape_pack4to1 = 168, +reshape_pack4to8 = 169, +reshape_pack8 = 170, +reshape_pack8to1 = 171, +reshape_pack8to4 = 172, +scale = 173, +scale_pack4 = 174, +scale_pack8 = 175, +sigmoid = 176, +sigmoid_pack4 = 177, +sigmoid_pack8 = 178, +slice = 179, +slice_pack1to4 = 180, +slice_pack1to8 = 181, +slice_pack4 = 182, +slice_pack4to8 = 183, +slice_pack8 = 184, +softmax_div_sum = 185, +softmax_div_sum_pack4 = 186, +softmax_div_sum_pack8 = 187, +softmax_exp_sub_max = 188, +softmax_exp_sub_max_pack4 = 189, +softmax_exp_sub_max_pack8 = 190, +softmax_reduce_max = 191, +softmax_reduce_max_pack4 = 192, +softmax_reduce_max_pack8 = 193, +softmax_reduce_sum = 194, +softmax_reduce_sum_pack4 = 195, +softmax_reduce_sum_pack8 = 196, +tanh = 197, +tanh_pack4 = 198, +tanh_pack8 = 199, +binaryop = 200, +binaryop_broadcast_inner = 201, +binaryop_broadcast_inner_pack4 = 202, +binaryop_broadcast_inner_pack8 = 203, +binaryop_broadcast_outer = 204, +binaryop_broadcast_outer_pack4 = 205, +binaryop_broadcast_outer_pack8 = 206, +binaryop_pack4 = 207, +binaryop_pack8 = 208, +unaryop = 209, +unaryop_pack4 = 210, +unaryop_pack8 = 211, +convolutiondepthwise = 212, +convolutiondepthwise_group = 213, +convolutiondepthwise_group_pack1to4 = 214, +convolutiondepthwise_group_pack1to8 = 215, +convolutiondepthwise_group_pack4 = 216, +convolutiondepthwise_group_pack4to1 = 217, +convolutiondepthwise_group_pack4to8 = 218, +convolutiondepthwise_group_pack8 = 219, +convolutiondepthwise_group_pack8to1 = 220, +convolutiondepthwise_group_pack8to4 = 221, +convolutiondepthwise_pack4 = 222, +convolutiondepthwise_pack8 = 223, +padding = 224, +padding_3d = 225, +padding_3d_pack4 = 226, +padding_3d_pack8 = 227, +padding_pack1to4 = 228, +padding_pack1to8 = 229, +padding_pack4 = 230, +padding_pack4to1 = 231, +padding_pack4to8 = 232, +padding_pack8 = 233, +padding_pack8to1 = 234, +padding_pack8to4 = 235, +normalize_coeffs = 236, +normalize_coeffs_pack4 = 237, +normalize_coeffs_pack8 = 238, +normalize_norm = 239, +normalize_norm_pack4 = 240, +normalize_norm_pack8 = 241, +normalize_reduce_sum4_fp16_to_fp32 = 242, +normalize_reduce_sum4_fp16_to_fp32_pack4 = 243, +normalize_reduce_sum4_fp16_to_fp32_pack8 = 244, +normalize_reduce_sum4_fp32 = 245, +normalize_reduce_sum4_fp32_pack4 = 246, +normalize_reduce_sum4_fp32_pack8 = 247, +permute = 248, +permute_pack1to4 = 249, +permute_pack1to8 = 250, +permute_pack4 = 251, +permute_pack4to1 = 252, +permute_pack4to8 = 253, +permute_pack8 = 254, +permute_pack8to1 = 255, +permute_pack8to4 = 256, +priorbox = 257, +priorbox_mxnet = 258, +interp = 259, +interp_bicubic = 260, +interp_bicubic_coeffs = 261, +interp_bicubic_pack4 = 262, +interp_bicubic_pack8 = 263, +interp_pack4 = 264, +interp_pack8 = 265, +deconvolutiondepthwise = 266, +deconvolutiondepthwise_group = 267, +deconvolutiondepthwise_group_pack1to4 = 268, +deconvolutiondepthwise_group_pack1to8 = 269, +deconvolutiondepthwise_group_pack4 = 270, +deconvolutiondepthwise_group_pack4to1 = 271, +deconvolutiondepthwise_group_pack4to8 = 272, +deconvolutiondepthwise_group_pack8 = 273, +deconvolutiondepthwise_group_pack8to1 = 274, +deconvolutiondepthwise_group_pack8to4 = 275, +deconvolutiondepthwise_pack4 = 276, +deconvolutiondepthwise_pack8 = 277, +shufflechannel = 278, +shufflechannel_pack4 = 279, +shufflechannel_pack8 = 280, +instancenorm_coeffs = 281, +instancenorm_coeffs_pack4 = 282, +instancenorm_coeffs_pack8 = 283, +instancenorm_norm = 284, +instancenorm_norm_pack4 = 285, +instancenorm_norm_pack8 = 286, +instancenorm_reduce_mean = 287, +instancenorm_reduce_mean_pack4 = 288, +instancenorm_reduce_mean_pack8 = 289, +instancenorm_reduce_sum4_fp16_to_fp32 = 290, +instancenorm_reduce_sum4_fp16_to_fp32_pack4 = 291, +instancenorm_reduce_sum4_fp16_to_fp32_pack8 = 292, +instancenorm_reduce_sum4_fp32 = 293, +instancenorm_reduce_sum4_fp32_pack4 = 294, +instancenorm_reduce_sum4_fp32_pack8 = 295, +instancenorm_sub_mean_square = 296, +instancenorm_sub_mean_square_pack4 = 297, +instancenorm_sub_mean_square_pack8 = 298, +clip = 299, +clip_pack4 = 300, +clip_pack8 = 301, +reorg = 302, +reorg_pack1to4 = 303, +reorg_pack1to8 = 304, +reorg_pack4 = 305, +reorg_pack4to8 = 306, +reorg_pack8 = 307, +packing = 308, +packing_fp16_to_fp32 = 309, +packing_fp32_to_fp16 = 310, +packing_pack1to4 = 311, +packing_pack1to4_fp16_to_fp32 = 312, +packing_pack1to4_fp32_to_fp16 = 313, +packing_pack1to8 = 314, +packing_pack1to8_fp16_to_fp32 = 315, +packing_pack1to8_fp32_to_fp16 = 316, +packing_pack4 = 317, +packing_pack4_fp16_to_fp32 = 318, +packing_pack4_fp32_to_fp16 = 319, +packing_pack4to1 = 320, +packing_pack4to1_fp16_to_fp32 = 321, +packing_pack4to1_fp32_to_fp16 = 322, +packing_pack4to8 = 323, +packing_pack4to8_fp16_to_fp32 = 324, +packing_pack4to8_fp32_to_fp16 = 325, +packing_pack8 = 326, +packing_pack8_fp16_to_fp32 = 327, +packing_pack8_fp32_to_fp16 = 328, +packing_pack8to1 = 329, +packing_pack8to1_fp16_to_fp32 = 330, +packing_pack8to1_fp32_to_fp16 = 331, +packing_pack8to4 = 332, +packing_pack8to4_fp16_to_fp32 = 333, +packing_pack8to4_fp32_to_fp16 = 334, +cast_fp16_to_fp32 = 335, +cast_fp16_to_fp32_pack4 = 336, +cast_fp16_to_fp32_pack8 = 337, +cast_fp32_to_fp16 = 338, +cast_fp32_to_fp16_pack4 = 339, +cast_fp32_to_fp16_pack8 = 340, +hardsigmoid = 341, +hardsigmoid_pack4 = 342, +hardsigmoid_pack8 = 343, +hardswish = 344, +hardswish_pack4 = 345, +hardswish_pack8 = 346, +pixelshuffle = 347, +pixelshuffle_pack4 = 348, +pixelshuffle_pack4to1 = 349, +pixelshuffle_pack8 = 350, +pixelshuffle_pack8to1 = 351, +pixelshuffle_pack8to4 = 352, +deepcopy = 353, +deepcopy_pack4 = 354, +deepcopy_pack8 = 355, +mish = 356, +mish_pack4 = 357, +mish_pack8 = 358, +swish = 359, +swish_pack4 = 360, +swish_pack8 = 361, +convert_ycbcr = 362, +vulkan_activation = 363, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_type.h new file mode 100644 index 0000000..511c714 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_type.h @@ -0,0 +1,30 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_TYPE_H +#define NCNN_LAYER_TYPE_H + +namespace ncnn { + +namespace LayerType { +enum LayerType +{ +#include "layer_type_enum.h" + CustomBit = (1 << 8), +}; +} // namespace LayerType + +} // namespace ncnn + +#endif // NCNN_LAYER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_type_enum.h new file mode 100644 index 0000000..9c1be52 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/layer_type_enum.h @@ -0,0 +1,105 @@ +// Layer Type Enum header +// +// This file is auto-generated by cmake, don't edit it. + +AbsVal = 0, +ArgMax = 1, +BatchNorm = 2, +Bias = 3, +BNLL = 4, +Concat = 5, +Convolution = 6, +Crop = 7, +Deconvolution = 8, +Dropout = 9, +Eltwise = 10, +ELU = 11, +Embed = 12, +Exp = 13, +Flatten = 14, +InnerProduct = 15, +Input = 16, +Log = 17, +LRN = 18, +MemoryData = 19, +MVN = 20, +Pooling = 21, +Power = 22, +PReLU = 23, +Proposal = 24, +Reduction = 25, +ReLU = 26, +Reshape = 27, +ROIPooling = 28, +Scale = 29, +Sigmoid = 30, +Slice = 31, +Softmax = 32, +Split = 33, +SPP = 34, +TanH = 35, +Threshold = 36, +Tile = 37, +RNN = 38, +LSTM = 39, +BinaryOp = 40, +UnaryOp = 41, +ConvolutionDepthWise = 42, +Padding = 43, +Squeeze = 44, +ExpandDims = 45, +Normalize = 46, +Permute = 47, +PriorBox = 48, +DetectionOutput = 49, +Interp = 50, +DeconvolutionDepthWise = 51, +ShuffleChannel = 52, +InstanceNorm = 53, +Clip = 54, +Reorg = 55, +YoloDetectionOutput = 56, +Quantize = 57, +Dequantize = 58, +Yolov3DetectionOutput = 59, +PSROIPooling = 60, +ROIAlign = 61, +Packing = 62, +Requantize = 63, +Cast = 64, +HardSigmoid = 65, +SELU = 66, +HardSwish = 67, +Noop = 68, +PixelShuffle = 69, +DeepCopy = 70, +Mish = 71, +StatisticsPooling = 72, +Swish = 73, +Gemm = 74, +GroupNorm = 75, +LayerNorm = 76, +Softplus = 77, +GRU = 78, +MultiHeadAttention = 79, +GELU = 80, +Convolution1D = 81, +Pooling1D = 82, +ConvolutionDepthWise1D = 83, +Convolution3D = 84, +ConvolutionDepthWise3D = 85, +Pooling3D = 86, +MatMul = 87, +Deconvolution1D = 88, +DeconvolutionDepthWise1D = 89, +Deconvolution3D = 90, +DeconvolutionDepthWise3D = 91, +Einsum = 92, +DeformableConv2D = 93, +GLU = 94, +Fold = 95, +Unfold = 96, +GridSample = 97, +CumulativeSum = 98, +CopyTo = 99, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/mat.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/mat.h new file mode 100644 index 0000000..c6f59ef --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/mat.h @@ -0,0 +1,1843 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MAT_H +#define NCNN_MAT_H + +#include +#include +#if __ARM_NEON +#include +#endif +#if __SSE2__ +#include +#if __AVX__ +#include +#endif +#endif +#if __mips_msa +#include +#endif +#if __loongarch_sx +#include +#endif +#if __riscv_vector +#include +#include "cpu.h" // cpu_riscv_vlenb() +#endif + +#include "allocator.h" +#include "option.h" +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PIXEL +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + +namespace ncnn { + +#if NCNN_VULKAN +class VkMat; +class VkImageMat; +#endif // NCNN_VULKAN + +// the three dimension matrix +class NCNN_EXPORT Mat +{ +public: + // empty + Mat(); + // vec + Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // image + Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // dim + Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // cube + Mat(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // packed vec + Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed image + Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed dim + Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed cube + Mat(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // copy + Mat(const Mat& m); + // external vec + Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external image + Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external dim + Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external packed vec + Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed image + Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed dim + Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // release + ~Mat(); + // assign + Mat& operator=(const Mat& m); + // set all + void fill(float v); + void fill(int v); +#if __ARM_NEON + void fill(float32x4_t _v); + void fill(uint16x4_t _v); + void fill(int32x4_t _v); + void fill(int32x4_t _v0, int32x4_t _v1); +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + void fill(float16x4_t _v); + void fill(float16x8_t _v); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ + void fill(__m512 _v); +#endif // __AVX512F__ + void fill(__m256 _v, int i = 0); +#endif // __AVX__ + void fill(__m128 _v); + void fill(__m128i _v); +#endif // __SSE2__ +#if __mips_msa + void fill(v4f32 _v); +#endif // __mips_msa +#if __loongarch_sx + void fill(__m128 _v); +#endif //__loongarch_sx +#if __riscv_vector + void fill(vfloat32m1_t _v); + void fill(vuint16m1_t _v); + void fill(vint8m1_t _v); +#if __riscv_zfh + void fill(vfloat16m1_t _v); +#endif // __riscv_zfh +#endif // __riscv_vector + template + void fill(T v); + // deep copy + Mat clone(Allocator* allocator = 0) const; + // deep copy from other mat, inplace + void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0); + // reshape vec + Mat reshape(int w, Allocator* allocator = 0) const; + // reshape image + Mat reshape(int w, int h, Allocator* allocator = 0) const; + // reshape dim + Mat reshape(int w, int h, int c, Allocator* allocator = 0) const; + // reshape cube + Mat reshape(int w, int h, int d, int c, Allocator* allocator = 0) const; + // allocate vec + void create(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate image + void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate dim + void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate like + void create_like(const Mat& m, Allocator* allocator = 0); +#if NCNN_VULKAN + // allocate like + void create_like(const VkMat& m, Allocator* allocator = 0); + // allocate like + void create_like(const VkImageMat& im, Allocator* allocator = 0); +#endif // NCNN_VULKAN + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // data reference + Mat channel(int c); + const Mat channel(int c) const; + Mat depth(int z); + const Mat depth(int z) const; + float* row(int y); + const float* row(int y) const; + template + T* row(int y); + template + const T* row(int y) const; + + // range reference + Mat channel_range(int c, int channels); + const Mat channel_range(int c, int channels) const; + Mat depth_range(int z, int depths); + const Mat depth_range(int z, int depths) const; + Mat row_range(int y, int rows); + const Mat row_range(int y, int rows) const; + Mat range(int x, int n); + const Mat range(int x, int n) const; + + // access raw data + template + operator T*(); + template + operator const T*() const; + + // convenient access float vec element + float& operator[](size_t i); + const float& operator[](size_t i) const; + +#if NCNN_PIXEL + enum PixelType + { + PIXEL_CONVERT_SHIFT = 16, + PIXEL_FORMAT_MASK = 0x0000ffff, + PIXEL_CONVERT_MASK = 0xffff0000, + + PIXEL_RGB = 1, + PIXEL_BGR = 2, + PIXEL_GRAY = 3, + PIXEL_RGBA = 4, + PIXEL_BGRA = 5, + + PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + }; + // convenient construct from pixel data + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0); + // convenient construct from pixel data with stride(bytes-per-row) parameter + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi with stride(bytes-per-row) parameter + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + + // convenient export to pixel data + void to_pixels(unsigned char* pixels, int type) const; + // convenient export to pixel data with stride(bytes-per-row) parameter + void to_pixels(unsigned char* pixels, int type, int stride) const; + // convenient export to pixel data and resize to specific size + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const; + // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 + // convenient construct from android Bitmap + static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0); + // convenient construct from android Bitmap and resize to specific size + static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from android Bitmap roi + static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from android Bitmap roi and resize to specific size + static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient export to android Bitmap and resize to the android Bitmap size + void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + + // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip + void substract_mean_normalize(const float* mean_vals, const float* norm_vals); + + // convenient construct from half precision floating point data + static Mat from_float16(const unsigned short* data, int size); + + // pointer to the data + void* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + Allocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +#if NCNN_VULKAN + +// the three dimension matrix, vulkan version +class NCNN_EXPORT VkMat +{ +public: + // empty + VkMat(); + // vec + VkMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkMat(const VkMat& m); + // external vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkMat(); + // assign + VkMat& operator=(const VkMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkBuffer buffer() const; + size_t buffer_offset() const; + size_t buffer_capacity() const; + + // device buffer + VkBufferMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +class NCNN_EXPORT VkImageMat +{ +public: + // empty + VkImageMat(); + // vec + VkImageMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkImageMat(const VkImageMat& m); + // external vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkImageMat(); + // assign + VkImageMat& operator=(const VkImageMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkImage image() const; + VkImageView imageview() const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + // convenient construct from android hardware buffer + static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + // device image + VkImageMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; +}; + +// type for vulkan specialization constant and push constant +union vk_specialization_type +{ + int i; + float f; + uint32_t u32; +}; +union vk_constant_type +{ + int i; + float f; +}; +#endif // NCNN_VULKAN + +// misc function +#if NCNN_PIXEL +// convert yuv420sp(nv21) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv12) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv21) to rgb with half resize, the faster approximate version +NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// image pixel bilinear resize +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +// image pixel bilinear resize with stride(bytes-per-row) parameter +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +// image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +#endif // NCNN_PIXEL +#if NCNN_PIXEL_ROTATE +// type is the from type, 6 means rotating from 6 to 1 +// +// 1 2 3 4 5 6 7 8 +// +// 888888 888888 88 88 8888888888 88 88 8888888888 +// 88 88 88 88 88 88 88 88 88 88 88 88 +// 8888 8888 8888 8888 88 8888888888 8888888888 88 +// 88 88 88 88 +// 88 88 888888 888888 +// +// ref http://sylvana.net/jpegcrop/exif_orientation.html +// image pixel kanna rotate +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +// image pixel kanna rotate with stride(bytes-per-row) parameter +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +// image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +#endif // NCNN_PIXEL_ROTATE +#if NCNN_PIXEL_AFFINE +// resolve affine transform matrix from rotation angle, scale factor and x y offset +NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm); +// resolve affine transform matrix from two set of points, num_point must be >= 2 +NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm); +// resolve the inversion affine transform matrix +NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv); +// image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +#endif // NCNN_PIXEL_AFFINE +#if NCNN_PIXEL_DRAWING +// draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw line, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// resolve text bounding box size +NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h); +// draw ascii printables and newline, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +#endif // NCNN_PIXEL_DRAWING + +// type conversion +// convert float to half precision floating point +NCNN_EXPORT unsigned short float32_to_float16(float value); +// convert half precision floating point to float +NCNN_EXPORT float float16_to_float32(unsigned short value); +// convert float to brain half +NCNN_EXPORT NCNN_FORCEINLINE unsigned short float32_to_bfloat16(float value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.f = value; + return tmp.u >> 16; +} +// convert brain half to float +NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.u = value << 16; + return tmp.f; +} + +// mat process +enum BorderType +{ + BORDER_CONSTANT = 0, + BORDER_REPLICATE = 1, + BORDER_REFLECT = 2, + BORDER_TRANSPARENT = -233, +}; +NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt = Option()); +NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option()); +NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option()); +NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option()); +NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option()); + +NCNN_FORCEINLINE Mat::Mat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(const Mat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c), cstep(m.cstep) +{ + addref(); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::~Mat() +{ + release(); +} + +NCNN_FORCEINLINE void Mat::fill(float _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + + int i = 0; +#if __ARM_NEON + float32x4_t _c = vdupq_n_f32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_f32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +NCNN_FORCEINLINE void Mat::fill(int _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + + int i = 0; +#if __ARM_NEON + int32x4_t _c = vdupq_n_s32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_s32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +#if __ARM_NEON +NCNN_FORCEINLINE void Mat::fill(float32x4_t _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vst1q_f32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(uint16x4_t _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vst1_u16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v0, int32x4_t _v1) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v0); + vst1q_s32(ptr + 4, _v1); + ptr += 8; + } +} +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +NCNN_FORCEINLINE void Mat::fill(float16x4_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1_f16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(float16x8_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1q_f16(ptr, _v); + ptr += 8; + } +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON + +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m512 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm512_storeu_ps(ptr, _v); + ptr += 16; + } +} +#endif // __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m256 _v, int _i) +{ + // old gcc cannot overload __m128 and __m256 type + // add a dummy int parameter for different mangled function symbol + (void)_i; + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm256_storeu_ps(ptr, _v); + ptr += 8; + } +} +#endif // __AVX__ +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm_storeu_ps(ptr, _v); + ptr += 4; + } +} +NCNN_FORCEINLINE void Mat::fill(__m128i _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + _mm_store_si128((__m128i*)ptr, _v); + ptr += 8; + } +} +#endif // __SSE2__ + +#if __mips_msa +NCNN_FORCEINLINE void Mat::fill(v4f32 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __msa_st_w((v4i32)_v, ptr, 0); + ptr += 4; + } +} +#endif // __mips_msa + +#if __loongarch_sx +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __lsx_vst(_v, ptr, 0); + ptr += 4; + } +} +#endif // __loongarch_sx +#if __riscv_vector +NCNN_FORCEINLINE void Mat::fill(vfloat32m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 4; + const size_t vl = vsetvl_e32m1(packn); + + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vse32_v_f32m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vuint16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vse16_v_u16m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 1; + const size_t vl = vsetvl_e8m1(packn); + + int size = (int)total(); + signed char* ptr = (signed char*)data; + for (int i = 0; i < size; i++) + { + vse8_v_i8m1(ptr, _v, vl); + ptr += packn; + } +} +#if __riscv_zfh +NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vse16_v_f16m1(ptr, _v, vl); + ptr += packn; + } +} +#endif // __riscv_zfh +#endif // __riscv_vector + +template +NCNN_FORCEINLINE void Mat::fill(T _v) +{ + int size = (int)total(); + T* ptr = (T*)data; + for (int i = 0; i < size; i++) + { + ptr[i] = _v; + } +} + +NCNN_FORCEINLINE Mat& Mat::operator=(const Mat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE void Mat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void Mat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator) + allocator->fastFree(data); + else + fastFree(data); + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool Mat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t Mat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int Mat::elembits() const +{ + return elempack ? static_cast(elemsize * 8) / elempack : 0; +} + +NCNN_FORCEINLINE Mat Mat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE Mat Mat::channel(int _c) +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel(int _c) const +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth(int z) +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::depth(int z) const +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE float* Mat::row(int y) +{ + return (float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE const float* Mat::row(int y) const +{ + return (const float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE T* Mat::row(int y) +{ + return (T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE const T* Mat::row(int y) const +{ + return (const T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE Mat Mat::channel_range(int _c, int channels) +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel_range(int _c, int channels) const +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth_range(int z, int depths) +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::depth_range(int z, int depths) const +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::row_range(int y, int rows) +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::row_range(int y, int rows) const +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE Mat Mat::range(int x, int n) +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::range(int x, int n) const +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +template +NCNN_FORCEINLINE Mat::operator T*() +{ + return (T*)data; +} + +template +NCNN_FORCEINLINE Mat::operator const T*() const +{ + return (const T*)data; +} + +NCNN_FORCEINLINE float& Mat::operator[](size_t i) +{ + return ((float*)data)[i]; +} + +NCNN_FORCEINLINE const float& Mat::operator[](size_t i) const +{ + return ((const float*)data)[i]; +} + +#if NCNN_VULKAN + +NCNN_FORCEINLINE VkMat::VkMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(const VkMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); + + cstep = m.cstep; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::~VkMat() +{ + release(); +} + +NCNN_FORCEINLINE VkMat& VkMat::operator=(const VkMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE Mat VkMat::mapped() const +{ + if (!allocator->mappable) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkMat::mapped_ptr() const +{ + if (!allocator->mappable) + return 0; + + return (unsigned char*)data->mapped_ptr + data->offset; +} + +NCNN_FORCEINLINE void VkMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkMat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int VkMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkBuffer VkMat::buffer() const +{ + return data->buffer; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_offset() const +{ + return data->offset; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_capacity() const +{ + return data->capacity; +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(const VkImageMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::~VkImageMat() +{ + release(); +} + +NCNN_FORCEINLINE VkImageMat& VkImageMat::operator=(const VkImageMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + return *this; +} + +NCNN_FORCEINLINE Mat VkImageMat::mapped() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkImageMat::mapped_ptr() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return 0; + + return (unsigned char*)data->mapped_ptr + data->bind_offset; +} + +NCNN_FORCEINLINE void VkImageMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkImageMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkImageMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkImageMat::total() const +{ + return w * h * d * c; +} + +NCNN_FORCEINLINE int VkImageMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkImageMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkImage VkImageMat::image() const +{ + return data->image; +} + +NCNN_FORCEINLINE VkImageView VkImageMat::imageview() const +{ + return data->imageview; +} + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_MAT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/modelbin.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/modelbin.h new file mode 100644 index 0000000..15d2b9c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/modelbin.h @@ -0,0 +1,80 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MODELBIN_H +#define NCNN_MODELBIN_H + +#include "mat.h" + +namespace ncnn { + +class DataReader; +class NCNN_EXPORT ModelBin +{ +public: + ModelBin(); + virtual ~ModelBin(); + // element type + // 0 = auto + // 1 = float32 + // 2 = float16 + // 3 = int8 + // load vec + virtual Mat load(int w, int type) const = 0; + // load image + virtual Mat load(int w, int h, int type) const; + // load dim + virtual Mat load(int w, int h, int c, int type) const; + // load cube + virtual Mat load(int w, int h, int d, int c, int type) const; +}; + +class ModelBinFromDataReaderPrivate; +class NCNN_EXPORT ModelBinFromDataReader : public ModelBin +{ +public: + explicit ModelBinFromDataReader(const DataReader& dr); + virtual ~ModelBinFromDataReader(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromDataReader(const ModelBinFromDataReader&); + ModelBinFromDataReader& operator=(const ModelBinFromDataReader&); + +private: + ModelBinFromDataReaderPrivate* const d; +}; + +class ModelBinFromMatArrayPrivate; +class NCNN_EXPORT ModelBinFromMatArray : public ModelBin +{ +public: + // construct from weight blob array + explicit ModelBinFromMatArray(const Mat* weights); + virtual ~ModelBinFromMatArray(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromMatArray(const ModelBinFromMatArray&); + ModelBinFromMatArray& operator=(const ModelBinFromMatArray&); + +private: + ModelBinFromMatArrayPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_MODELBIN_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/ncnn_export.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/ncnn_export.h new file mode 100644 index 0000000..7343310 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/ncnn_export.h @@ -0,0 +1,42 @@ + +#ifndef NCNN_EXPORT_H +#define NCNN_EXPORT_H + +#ifdef NCNN_STATIC_DEFINE +# define NCNN_EXPORT +# define NCNN_NO_EXPORT +#else +# ifndef NCNN_EXPORT +# ifdef ncnn_EXPORTS + /* We are building this library */ +# define NCNN_EXPORT +# else + /* We are using this library */ +# define NCNN_EXPORT +# endif +# endif + +# ifndef NCNN_NO_EXPORT +# define NCNN_NO_EXPORT +# endif +#endif + +#ifndef NCNN_DEPRECATED +# define NCNN_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef NCNN_DEPRECATED_EXPORT +# define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED +#endif + +#ifndef NCNN_DEPRECATED_NO_EXPORT +# define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef NCNN_NO_DEPRECATED +# define NCNN_NO_DEPRECATED +# endif +#endif + +#endif /* NCNN_EXPORT_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/net.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/net.h new file mode 100644 index 0000000..9407042 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/net.h @@ -0,0 +1,272 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_NET_H +#define NCNN_NET_H + +#include "blob.h" +#include "layer.h" +#include "mat.h" +#include "option.h" +#include "platform.h" + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +#if NCNN_VULKAN +class VkCompute; +#endif // NCNN_VULKAN +class DataReader; +class Extractor; +class NetPrivate; +class NCNN_EXPORT Net +{ +public: + // empty init + Net(); + // clear and destroy + virtual ~Net(); + +public: + // option can be changed before loading + Option opt; + +#if NCNN_VULKAN + // set gpu device by index + void set_vulkan_device(int device_index); + + // set gpu device by device handle, no owner transfer + void set_vulkan_device(const VulkanDevice* vkdev); + + const VulkanDevice* vulkan_device() const; +#endif // NCNN_VULKAN + +#if NCNN_STRING + // register custom layer by layer type name + // return 0 if success + int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + virtual int custom_layer_to_index(const char* type); +#endif // NCNN_STRING + // register custom layer by layer type + // return 0 if success + int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + +#if NCNN_STRING + int load_param(const DataReader& dr); +#endif // NCNN_STRING + + int load_param_bin(const DataReader& dr); + + int load_model(const DataReader& dr); + +#if NCNN_STDIO +#if NCNN_STRING + // load network structure from plain param file + // return 0 if success + int load_param(FILE* fp); + int load_param(const char* protopath); + int load_param_mem(const char* mem); +#endif // NCNN_STRING + // load network structure from binary param file + // return 0 if success + int load_param_bin(FILE* fp); + int load_param_bin(const char* protopath); + + // load network weight data from model file + // return 0 if success + int load_model(FILE* fp); + int load_model(const char* modelpath); +#endif // NCNN_STDIO + + // load network structure from external memory + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_param(const unsigned char* mem); + + // reference network weight data from external memory + // weight data is not copied but referenced + // so external memory should be retained when used + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_model(const unsigned char* mem); + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#if NCNN_STRING + // convenient load network structure from android asset plain param file + int load_param(AAsset* asset); + int load_param(AAssetManager* mgr, const char* assetpath); +#endif // NCNN_STRING + // convenient load network structure from android asset binary param file + int load_param_bin(AAsset* asset); + int load_param_bin(AAssetManager* mgr, const char* assetpath); + + // convenient load network weight data from android asset model file + int load_model(AAsset* asset); + int load_model(AAssetManager* mgr, const char* assetpath); +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + + // unload network structure and weight data + void clear(); + + // construct an Extractor from network + Extractor create_extractor() const; + + // get input/output indexes/names + const std::vector& input_indexes() const; + const std::vector& output_indexes() const; +#if NCNN_STRING + const std::vector& input_names() const; + const std::vector& output_names() const; +#endif + + const std::vector& blobs() const; + const std::vector& layers() const; + + std::vector& mutable_blobs(); + std::vector& mutable_layers(); + +protected: + friend class Extractor; +#if NCNN_STRING + int find_blob_index_by_name(const char* name) const; + int find_layer_index_by_name(const char* name) const; + virtual Layer* create_custom_layer(const char* type); +#endif // NCNN_STRING + virtual Layer* create_custom_layer(int index); + +private: + Net(const Net&); + Net& operator=(const Net&); + +private: + NetPrivate* const d; +}; + +class ExtractorPrivate; +class NCNN_EXPORT Extractor +{ +public: + virtual ~Extractor(); + + // copy + Extractor(const Extractor&); + + // assign + Extractor& operator=(const Extractor&); + + // clear blob mats and alloctors + void clear(); + + // enable light mode + // intermediate blob will be recycled when enabled + // enabled by default + void set_light_mode(bool enable); + + // set thread count for this extractor + // this will overwrite the global setting + // default count is system depended + void set_num_threads(int num_threads); + + // set blob memory allocator + void set_blob_allocator(Allocator* allocator); + + // set workspace memory allocator + void set_workspace_allocator(Allocator* allocator); + +#if NCNN_VULKAN + void set_vulkan_compute(bool enable); + + void set_blob_vkallocator(VkAllocator* allocator); + + void set_workspace_vkallocator(VkAllocator* allocator); + + void set_staging_vkallocator(VkAllocator* allocator); +#endif // NCNN_VULKAN + +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const Mat& in); + + // get result by blob name + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(const char* blob_name, Mat& feat, int type = 0); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const Mat& in); + + // get result by blob index + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(int blob_index, Mat& feat, int type = 0); + +#if NCNN_VULKAN +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); + + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkImageMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkMat& feat, VkCompute& cmd); + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkImageMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_VULKAN + +protected: + friend Extractor Net::create_extractor() const; + Extractor(const Net* net, size_t blob_count); + +private: + ExtractorPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_NET_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/option.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/option.h new file mode 100644 index 0000000..3fda808 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/option.h @@ -0,0 +1,153 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_OPTION_H +#define NCNN_OPTION_H + +#include "platform.h" + +namespace ncnn { + +#if NCNN_VULKAN +class VkAllocator; +class PipelineCache; +#endif // NCNN_VULKAN + +class Allocator; +class NCNN_EXPORT Option +{ +public: + // default option + Option(); + +public: + // light mode + // intermediate blob will be recycled when enabled + // enabled by default + bool lightmode; + + // thread count + // default value is the one returned by get_cpu_count() + int num_threads; + + // blob memory allocator + Allocator* blob_allocator; + + // workspace memory allocator + Allocator* workspace_allocator; + +#if NCNN_VULKAN + // blob memory allocator + VkAllocator* blob_vkallocator; + + // workspace memory allocator + VkAllocator* workspace_vkallocator; + + // staging memory allocator + VkAllocator* staging_vkallocator; + + // pipeline cache + PipelineCache* pipeline_cache; +#endif // NCNN_VULKAN + + // the time openmp threads busy-wait for more work before going to sleep + // default value is 20ms to keep the cores enabled + // without too much extra power consumption afterwards + int openmp_blocktime; + + // enable winograd convolution optimization + // improve convolution 3x3 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_winograd_convolution; + + // enable sgemm convolution optimization + // improve convolution 1x1 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_sgemm_convolution; + + // enable quantized int8 inference + // use low-precision int8 path for quantized model + // changes should be applied before loading network structure and weight + // enabled by default + bool use_int8_inference; + + // enable vulkan compute + bool use_vulkan_compute; + + // enable bf16 data type for storage + // improve most operator performance on all arm devices, may consume more memory + bool use_bf16_storage; + + // enable options for gpu inference + bool use_fp16_packed; + bool use_fp16_storage; + bool use_fp16_arithmetic; + bool use_int8_packed; + bool use_int8_storage; + bool use_int8_arithmetic; + + // enable simd-friendly packed memory layout + // improve all operator performance on all arm devices, will consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_packing_layout; + + bool use_shader_pack8; + + // subgroup option + bool use_subgroup_basic; + bool use_subgroup_vote; + bool use_subgroup_ballot; + bool use_subgroup_shuffle; + + // turn on for adreno + bool use_image_storage; + bool use_tensor_storage; + + bool use_reserved_0; + + // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) + // default value is 3 + // 0 = DAZ OFF, FTZ OFF + // 1 = DAZ ON , FTZ OFF + // 2 = DAZ OFF, FTZ ON + // 3 = DAZ ON, FTZ ON + int flush_denormals; + + bool use_local_pool_allocator; + + // enable local memory optimization for gpu inference + bool use_shader_local_memory; + + // enable cooperative matrix optimization for gpu inference + bool use_cooperative_matrix; + + // more fine-grained control of winograd convolution + bool use_winograd23_convolution; + bool use_winograd43_convolution; + bool use_winograd63_convolution; + + bool use_reserved_6; + bool use_reserved_7; + bool use_reserved_8; + bool use_reserved_9; + bool use_reserved_10; + bool use_reserved_11; +}; + +} // namespace ncnn + +#endif // NCNN_OPTION_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/paramdict.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/paramdict.h new file mode 100644 index 0000000..c2ef160 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/paramdict.h @@ -0,0 +1,73 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PARAMDICT_H +#define NCNN_PARAMDICT_H + +#include "mat.h" + +// at most 32 parameters +#define NCNN_MAX_PARAM_COUNT 32 + +namespace ncnn { + +class DataReader; +class Net; +class ParamDictPrivate; +class NCNN_EXPORT ParamDict +{ +public: + // empty + ParamDict(); + + virtual ~ParamDict(); + + // copy + ParamDict(const ParamDict&); + + // assign + ParamDict& operator=(const ParamDict&); + + // get type + int type(int id) const; + + // get int + int get(int id, int def) const; + // get float + float get(int id, float def) const; + // get array + Mat get(int id, const Mat& def) const; + + // set int + void set(int id, int i); + // set float + void set(int id, float f); + // set array + void set(int id, const Mat& v); + +protected: + friend class Net; + + void clear(); + + int load_param(const DataReader& dr); + int load_param_bin(const DataReader& dr); + +private: + ParamDictPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_PARAMDICT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/pipeline.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/pipeline.h new file mode 100644 index 0000000..c284a14 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/pipeline.h @@ -0,0 +1,113 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINE_H +#define NCNN_PIPELINE_H + +#include "mat.h" +#include "platform.h" +#if NCNN_VULKAN +#include "gpu.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +#if NCNN_VULKAN +class Option; +class PipelinePrivate; +class NCNN_EXPORT Pipeline +{ +public: + explicit Pipeline(const VulkanDevice* vkdev); + virtual ~Pipeline(); + +public: + void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4); + void set_optimal_local_size_xyz(const Mat& local_size_xyz); + void set_local_size_xyz(int w, int h, int c); + + int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations); + + int create(int shader_type_index, const Option& opt, const std::vector& specializations); + +public: + VkShaderModule shader_module() const; + VkDescriptorSetLayout descriptorset_layout() const; + VkPipelineLayout pipeline_layout() const; + VkPipeline pipeline() const; + VkDescriptorUpdateTemplateKHR descriptor_update_template() const; + + const ShaderInfo& shader_info() const; + + uint32_t local_size_x() const; + uint32_t local_size_y() const; + uint32_t local_size_z() const; + +protected: + void set_shader_module(VkShaderModule shader_module); + void set_descriptorset_layout(VkDescriptorSetLayout descriptorset_layout); + void set_pipeline_layout(VkPipelineLayout pipeline_layout); + void set_pipeline(VkPipeline pipeline); + void set_descriptor_update_template(VkDescriptorUpdateTemplateKHR descriptor_update_template); + + void set_shader_info(const ShaderInfo& shader_info); + +public: + const VulkanDevice* vkdev; + +private: + Pipeline(const Pipeline&); + Pipeline& operator=(const Pipeline&); + +private: + PipelinePrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class VkCompute; +class NCNN_EXPORT ImportAndroidHardwareBufferPipeline : private Pipeline +{ +public: + explicit ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev); + virtual ~ImportAndroidHardwareBufferPipeline(); + + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt); + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt); + void destroy(); + + friend class VkCompute; + +protected: + int create_shader_module(const Option& opt); + int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator); + int create_descriptorset_layout(); + +public: + int type_to; + int rotate_from; + bool need_resize; + + VkSampler sampler; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/pipelinecache.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/pipelinecache.h new file mode 100644 index 0000000..bb6b8fb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/pipelinecache.h @@ -0,0 +1,85 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINECACHE_H +#define NCNN_PIPELINECACHE_H + +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#include "mat.h" +#include "gpu.h" + +namespace ncnn { + +#if NCNN_VULKAN + +class VulkanDevice; +class PipelineCachePrivate; +class NCNN_EXPORT PipelineCache +{ +public: + explicit PipelineCache(const VulkanDevice* _vkdev); + + virtual ~PipelineCache(); + + void clear(); + + int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + + int get_pipeline(int shader_type_index, const Option& opt, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + +protected: + int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* _shader_module, ShaderInfo& si) const; + + int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector& specializations, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + +protected: + const VulkanDevice* vkdev; + +private: + PipelineCache(const PipelineCache&); + PipelineCache& operator=(const PipelineCache&); + +private: + PipelineCachePrivate* const d; +}; + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINECACHE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/platform.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/platform.h new file mode 100644 index 0000000..aebc873 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/platform.h @@ -0,0 +1,286 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PLATFORM_H +#define NCNN_PLATFORM_H + +#define NCNN_STDIO 1 +#define NCNN_STRING 1 +#define NCNN_SIMPLEOCV 0 +#define NCNN_SIMPLEOMP 0 +#define NCNN_SIMPLESTL 0 +#define NCNN_THREADS 1 +#define NCNN_BENCHMARK 0 +#define NCNN_C_API 1 +#define NCNN_PLATFORM_API 1 +#define NCNN_PIXEL 1 +#define NCNN_PIXEL_ROTATE 1 +#define NCNN_PIXEL_AFFINE 1 +#define NCNN_PIXEL_DRAWING 1 +#define NCNN_VULKAN 1 +#define NCNN_SYSTEM_GLSLANG 0 +#define NCNN_RUNTIME_CPU 1 +#define NCNN_GNU_INLINE_ASM 1 +#define NCNN_AVX 0 +#define NCNN_XOP 0 +#define NCNN_FMA 0 +#define NCNN_F16C 0 +#define NCNN_AVX2 0 +#define NCNN_AVXVNNI 0 +#define NCNN_AVX512 0 +#define NCNN_AVX512VNNI 0 +#define NCNN_AVX512BF16 0 +#define NCNN_AVX512FP16 0 +#define NCNN_VFPV4 1 +#if __aarch64__ +#define NCNN_ARM82 0 +#define NCNN_ARM82DOT 0 +#define NCNN_ARM82FP16FML 0 +#define NCNN_ARM84BF16 0 +#define NCNN_ARM84I8MM 0 +#define NCNN_ARM86SVE 0 +#define NCNN_ARM86SVE2 0 +#define NCNN_ARM86SVEBF16 0 +#define NCNN_ARM86SVEI8MM 0 +#define NCNN_ARM86SVEF32MM 0 +#endif // __aarch64__ +#define NCNN_MSA 0 +#define NCNN_LSX 0 +#define NCNN_MMI 0 +#define NCNN_RVV 0 +#define NCNN_INT8 1 +#define NCNN_BF16 1 +#define NCNN_FORCE_INLINE 1 + +#define NCNN_VERSION_STRING "1.0.20230223" + +#include "ncnn_export.h" + +#ifdef __cplusplus + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#include +#else +#include +#endif +#endif // NCNN_THREADS + +#if __ANDROID_API__ >= 26 +#define VK_USE_PLATFORM_ANDROID_KHR +#endif // __ANDROID_API__ >= 26 + +namespace ncnn { + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { InitializeSRWLock(&srwlock); } + ~Mutex() {} + void lock_() { AcquireSRWLockExclusive(&srwlock); } + void unlock() { ReleaseSRWLockExclusive(&srwlock); } +private: + friend class ConditionVariable; + // NOTE SRWLock is available from windows vista + SRWLOCK srwlock; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { InitializeConditionVariable(&condvar); } + ~ConditionVariable() {} + void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } + void broadcast() { WakeAllConditionVariable(&condvar); } + void signal() { WakeConditionVariable(&condvar); } +private: + CONDITION_VARIABLE condvar; +}; + +static unsigned __stdcall start_wrapper(void* args); +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } + ~Thread() {} + void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } +private: + friend unsigned __stdcall start_wrapper(void* args) + { + Thread* t = (Thread*)args; + t->_start(t->_args); + return 0; + } + HANDLE handle; + void* (*_start)(void*); + void* _args; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { key = TlsAlloc(); } + ~ThreadLocalStorage() { TlsFree(key); } + void set(void* value) { TlsSetValue(key, (LPVOID)value); } + void* get() { return (void*)TlsGetValue(key); } +private: + DWORD key; +}; +#else // (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { pthread_mutex_init(&mutex, 0); } + ~Mutex() { pthread_mutex_destroy(&mutex); } + void lock_() { pthread_mutex_lock(&mutex); } + void unlock() { pthread_mutex_unlock(&mutex); } +private: + friend class ConditionVariable; + pthread_mutex_t mutex; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { pthread_cond_init(&cond, 0); } + ~ConditionVariable() { pthread_cond_destroy(&cond); } + void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } + void broadcast() { pthread_cond_broadcast(&cond); } + void signal() { pthread_cond_signal(&cond); } +private: + pthread_cond_t cond; +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } + ~Thread() {} + void join() { pthread_join(t, 0); } +private: + pthread_t t; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { pthread_key_create(&key, 0); } + ~ThreadLocalStorage() { pthread_key_delete(key); } + void set(void* value) { pthread_setspecific(key, value); } + void* get() { return pthread_getspecific(key); } +private: + pthread_key_t key; +}; +#endif // (defined _WIN32 && !(defined __MINGW32__)) +#else // NCNN_THREADS +class NCNN_EXPORT Mutex +{ +public: + Mutex() {} + ~Mutex() {} + void lock_() {} + void unlock() {} +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() {} + ~ConditionVariable() {} + void wait(Mutex& /*mutex*/) {} + void broadcast() {} + void signal() {} +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {} + ~Thread() {} + void join() {} +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { data = 0; } + ~ThreadLocalStorage() {} + void set(void* value) { data = value; } + void* get() { return data; } +private: + void* data; +}; +#endif // NCNN_THREADS + +class NCNN_EXPORT MutexLockGuard +{ +public: + MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock_(); } + ~MutexLockGuard() { mutex.unlock(); } +private: + Mutex& mutex; +}; + +} // namespace ncnn + +#if NCNN_SIMPLESTL +#include "simplestl.h" +#else +#include +#include +#include +#include +#endif + +#endif // __cplusplus + +#if NCNN_STDIO +#if NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \ + __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0) +#else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0) +#endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#else +#define NCNN_LOGE(...) +#endif + + +#if NCNN_FORCE_INLINE +#ifdef _MSC_VER + #define NCNN_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) +#elif defined(__CLANG__) + #if __has_attribute(__always_inline__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) + #else + #define NCNN_FORCEINLINE inline + #endif +#else + #define NCNN_FORCEINLINE inline +#endif +#else + #define NCNN_FORCEINLINE inline +#endif + +#endif // NCNN_PLATFORM_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simpleocv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simpleocv.h new file mode 100644 index 0000000..55ede15 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simpleocv.h @@ -0,0 +1,501 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOCV_H +#define NCNN_SIMPLEOCV_H + +#include "platform.h" + +#if NCNN_SIMPLEOCV + +#include +#include +#include "allocator.h" +#include "mat.h" + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif + +#ifndef NCNN_XADD +using ncnn::NCNN_XADD; +#endif + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +enum +{ + CV_LOAD_IMAGE_UNCHANGED = -1, + CV_LOAD_IMAGE_GRAYSCALE = 0, + CV_LOAD_IMAGE_COLOR = 1, +}; + +enum +{ + CV_IMWRITE_JPEG_QUALITY = 1 +}; + +// minimal opencv style data structure implementation +namespace cv { + +template +static inline _Tp saturate_cast(int v) +{ + return _Tp(v); +} +template<> +inline uchar saturate_cast(int v) +{ + return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); +} + +template +struct Scalar_ +{ + Scalar_() + { + v[0] = 0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0) + { + v[0] = _v0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = _v3; + } + + const _Tp operator[](const int i) const + { + return v[i]; + } + + _Tp operator[](const int i) + { + return v[i]; + } + + _Tp v[4]; +}; + +typedef Scalar_ Scalar; + +template +struct Point_ +{ + Point_() + : x(0), y(0) + { + } + Point_(_Tp _x, _Tp _y) + : x(_x), y(_y) + { + } + + template + operator Point_<_Tp2>() const + { + return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); + } + + _Tp x; + _Tp y; +}; + +typedef Point_ Point; +typedef Point_ Point2f; + +template +struct Size_ +{ + Size_() + : width(0), height(0) + { + } + Size_(_Tp _w, _Tp _h) + : width(_w), height(_h) + { + } + + template + operator Size_<_Tp2>() const + { + return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp width; + _Tp height; +}; + +typedef Size_ Size; +typedef Size_ Size2f; + +template +struct Rect_ +{ + Rect_() + : x(0), y(0), width(0), height(0) + { + } + Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) + : x(_x), y(_y), width(_w), height(_h) + { + } + Rect_(Point_<_Tp> _p, Size_<_Tp> _size) + : x(_p.x), y(_p.y), width(_size.width), height(_size.height) + { + } + + template + operator Rect_<_Tp2>() const + { + return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp x; + _Tp y; + _Tp width; + _Tp height; + + // area + _Tp area() const + { + return width * height; + } +}; + +template +static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); + a.width = std::min(a.x + a.width, b.x + b.width) - x1; + a.height = std::min(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + if (a.width <= 0 || a.height <= 0) + a = Rect_<_Tp>(); + return a; +} + +template +static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + return a; +} + +template +static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c &= b; +} + +template +static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c |= b; +} + +typedef Rect_ Rect; +typedef Rect_ Rect2f; + +#define CV_8UC1 1 +#define CV_8UC3 3 +#define CV_8UC4 4 +#define CV_32FC1 4 + +struct NCNN_EXPORT Mat +{ + Mat() + : data(0), refcount(0), rows(0), cols(0), c(0) + { + } + + Mat(int _rows, int _cols, int flags) + : data(0), refcount(0) + { + create(_rows, _cols, flags); + } + + // copy + Mat(const Mat& m) + : data(m.data), refcount(m.refcount) + { + if (refcount) + NCNN_XADD(refcount, 1); + + rows = m.rows; + cols = m.cols; + c = m.c; + } + + Mat(int _rows, int _cols, int flags, void* _data) + : data((unsigned char*)_data), refcount(0) + { + rows = _rows; + cols = _cols; + c = flags; + } + + ~Mat() + { + release(); + } + + // assign + Mat& operator=(const Mat& m) + { + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + + rows = m.rows; + cols = m.cols; + c = m.c; + + return *this; + } + + Mat& operator=(const Scalar& s) + { + if (total() > 0) + { + uchar* p = data; + for (int i = 0; i < cols * rows; i++) + { + for (int j = 0; j < c; j++) + { + *p++ = s[j]; + } + } + } + + return *this; + } + + void create(int _rows, int _cols, int flags) + { + release(); + + rows = _rows; + cols = _cols; + c = flags; + + if (total() > 0) + { + // refcount address must be aligned, so we expand totalsize here + size_t totalsize = (total() + 3) >> 2 << 2; + data = (uchar*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((uchar*)data) + totalsize); + *refcount = 1; + } + } + + void release() + { + if (refcount && NCNN_XADD(refcount, -1) == 1) + ncnn::fastFree(data); + + data = 0; + + rows = 0; + cols = 0; + c = 0; + + refcount = 0; + } + + Mat clone() const + { + if (empty()) + return Mat(); + + Mat m(rows, cols, c); + + if (total() > 0) + { + memcpy(m.data, data, total()); + } + + return m; + } + + bool empty() const + { + return data == 0 || total() == 0; + } + + int channels() const + { + return c; + } + + int type() const + { + return c; + } + + size_t total() const + { + return cols * rows * c; + } + + const uchar* ptr(int y) const + { + return data + y * cols * c; + } + + uchar* ptr(int y) + { + return data + y * cols * c; + } + + template + const _Tp* ptr(int y) const + { + return (const _Tp*)data + y * cols * c; + } + + template + _Tp* ptr(int y) + { + return (_Tp*)data + y * cols * c; + } + + // roi + Mat operator()(const Rect& roi) const + { + if (empty()) + return Mat(); + + Mat m(roi.height, roi.width, c); + + int sy = roi.y; + for (int y = 0; y < roi.height; y++) + { + const uchar* sptr = ptr(sy) + roi.x * c; + uchar* dptr = m.ptr(y); + memcpy(dptr, sptr, roi.width * c); + sy++; + } + + return m; + } + + uchar* data; + + // pointer to the reference counter; + // when points to user-allocated data, the pointer is NULL + int* refcount; + + int rows; + int cols; + + int c; +}; + +enum ImreadModes +{ + IMREAD_UNCHANGED = -1, + IMREAD_GRAYSCALE = 0, + IMREAD_COLOR = 1 +}; + +NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); + +enum ImwriteFlags +{ + IMWRITE_JPEG_QUALITY = 1 +}; + +NCNN_EXPORT bool imwrite(const std::string& path, const Mat& m, const std::vector& params = std::vector()); + +NCNN_EXPORT void imshow(const std::string& name, const Mat& m); + +NCNN_EXPORT int waitKey(int delay = 0); + +#if NCNN_PIXEL +NCNN_EXPORT void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); +#endif // NCNN_PIXEL + +#if NCNN_PIXEL_DRAWING + +enum +{ + FILLED = -1 +}; + +NCNN_EXPORT void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void rectangle(Mat& img, Rect rec, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void circle(Mat& img, Point center, int radius, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void line(Mat& img, Point p0, Point p1, const Scalar& color, int thickness = 1); + +enum +{ + FONT_HERSHEY_SIMPLEX = 0 +}; + +NCNN_EXPORT void putText(Mat& img, const std::string& text, Point org, int fontFace, double fontScale, Scalar color, int thickness = 1); + +NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontScale, int thickness, int* baseLine); + +#endif // NCNN_PIXEL_DRAWING + +} // namespace cv + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma pop_macro("min") +#pragma pop_macro("max") +#endif + +#endif // NCNN_SIMPLEOCV + +#endif // NCNN_SIMPLEOCV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simpleomp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simpleomp.h new file mode 100644 index 0000000..13e2452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simpleomp.h @@ -0,0 +1,53 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOMP_H +#define NCNN_SIMPLEOMP_H + +#include "platform.h" + +#if NCNN_SIMPLEOMP + +#include + +// This minimal openmp runtime implementation only supports the llvm openmp abi +// and only supports #pragma omp parallel for num_threads(X) + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT int omp_get_max_threads(); + +NCNN_EXPORT void omp_set_num_threads(int num_threads); + +NCNN_EXPORT int omp_get_dynamic(); + +NCNN_EXPORT void omp_set_dynamic(int dynamic); + +NCNN_EXPORT int omp_get_num_threads(); + +NCNN_EXPORT int omp_get_thread_num(); + +NCNN_EXPORT int kmp_get_blocktime(); + +NCNN_EXPORT void kmp_set_blocktime(int blocktime); + +#ifdef __cplusplus +} +#endif + +#endif // NCNN_SIMPLEOMP + +#endif // NCNN_SIMPLEOMP_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simplestl.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simplestl.h new file mode 100644 index 0000000..00ff468 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/simplestl.h @@ -0,0 +1,565 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLESTL_H +#define NCNN_SIMPLESTL_H + +#include +#include +#include + +#if !NCNN_SIMPLESTL + +#include + +#else + +// allocation functions +NCNN_EXPORT void* operator new(size_t size); +NCNN_EXPORT void* operator new[](size_t size); +// placement allocation functions +NCNN_EXPORT void* operator new(size_t size, void* ptr); +NCNN_EXPORT void* operator new[](size_t size, void* ptr); +// deallocation functions +NCNN_EXPORT void operator delete(void* ptr); +NCNN_EXPORT void operator delete[](void* ptr); +// deallocation functions since c++14 +#if __cplusplus >= 201402L +NCNN_EXPORT void operator delete(void* ptr, size_t sz); +NCNN_EXPORT void operator delete[](void* ptr, size_t sz); +#endif +// placement deallocation functions +NCNN_EXPORT void operator delete(void* ptr, void* voidptr2); +NCNN_EXPORT void operator delete[](void* ptr, void* voidptr2); + +#endif + +// minimal stl data structure implementation +namespace std { + +template +const T& max(const T& a, const T& b) +{ + return (a < b) ? b : a; +} + +template +const T& min(const T& a, const T& b) +{ + return (a > b) ? b : a; +} + +template +void swap(T& a, T& b) +{ + T temp(a); + a = b; + b = temp; +} + +template +struct pair +{ + pair() + : first(), second() + { + } + pair(const T1& t1, const T2& t2) + : first(t1), second(t2) + { + } + + T1 first; + T2 second; +}; + +template +bool operator==(const pair& x, const pair& y) +{ + return (x.first == y.first && x.second == y.second); +} +template +bool operator<(const pair& x, const pair& y) +{ + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} +template +bool operator!=(const pair& x, const pair& y) +{ + return !(x == y); +} +template +bool operator>(const pair& x, const pair& y) +{ + return y < x; +} +template +bool operator<=(const pair& x, const pair& y) +{ + return !(y < x); +} +template +bool operator>=(const pair& x, const pair& y) +{ + return !(x < y); +} + +template +pair make_pair(const T1& t1, const T2& t2) +{ + return pair(t1, t2); +} + +template +struct node +{ + node* prev_; + node* next_; + T data_; + + node() + : prev_(0), next_(0), data_() + { + } + node(const T& t) + : prev_(0), next_(0), data_(t) + { + } +}; + +template +struct iter_list +{ + iter_list() + : curr_(0) + { + } + iter_list(node* n) + : curr_(n) + { + } + iter_list(const iter_list& i) + : curr_(i.curr_) + { + } + ~iter_list() + { + } + + iter_list& operator=(const iter_list& i) + { + curr_ = i.curr_; + return *this; + } + + T& operator*() + { + return curr_->data_; + } + T* operator->() + { + return &(curr_->data_); + } + + bool operator==(const iter_list& i) + { + return curr_ == i.curr_; + } + bool operator!=(const iter_list& i) + { + return curr_ != i.curr_; + } + + iter_list& operator++() + { + curr_ = curr_->next_; + return *this; + } + iter_list& operator--() + { + curr_ = curr_->prev_; + return *this; + } + + node* curr_; +}; + +template +struct list +{ + typedef iter_list iterator; + + list() + { + head_ = new node(); + tail_ = head_; + count_ = 0; + } + ~list() + { + clear(); + delete head_; + } + list(const list& l) + { + head_ = new node(); + tail_ = head_; + count_ = 0; + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + } + + list& operator=(const list& l) + { + if (this == &l) + { + return *this; + } + clear(); + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + return *this; + } + + void clear() + { + while (count_ > 0) + { + pop_front(); + } + } + + void pop_front() + { + if (count_ > 0) + { + head_ = head_->next_; + delete head_->prev_; + head_->prev_ = 0; + --count_; + } + } + + size_t size() const + { + return count_; + } + iter_list begin() const + { + return iter_list(head_); + } + iter_list end() const + { + return iter_list(tail_); + } + bool empty() const + { + return count_ == 0; + } + + void push_back(const T& t) + { + if (count_ == 0) + { + head_ = new node(t); + head_->prev_ = 0; + head_->next_ = tail_; + tail_->prev_ = head_; + count_ = 1; + } + else + { + node* temp = new node(t); + temp->prev_ = tail_->prev_; + temp->next_ = tail_; + tail_->prev_->next_ = temp; + tail_->prev_ = temp; + ++count_; + } + } + + iter_list erase(iter_list pos) + { + if (pos != end()) + { + node* temp = pos.curr_; + if (temp == head_) + { + ++pos; + temp->next_->prev_ = 0; + head_ = temp->next_; + } + else + { + --pos; + temp->next_->prev_ = temp->prev_; + temp->prev_->next_ = temp->next_; + ++pos; + } + delete temp; + --count_; + } + return pos; + } + +protected: + node* head_; + node* tail_; + size_t count_; +}; + +template +struct greater +{ + bool operator()(const T& x, const T& y) const + { + return (x > y); + } +}; + +template +struct less +{ + bool operator()(const T& x, const T& y) const + { + return (x < y); + } +}; + +template +void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp) +{ + // [TODO] heap sort should be used here, but we simply use bubble sort now + for (RandomAccessIter i = first; i < middle; ++i) + { + // bubble sort + for (RandomAccessIter j = last - 1; j > first; --j) + { + if (comp(*j, *(j - 1))) + { + swap(*j, *(j - 1)); + } + } + } +} + +template +struct vector +{ + vector() + : data_(0), size_(0), capacity_(0) + { + } + vector(const size_t new_size, const T& value = T()) + : data_(0), size_(0), capacity_(0) + { + resize(new_size, value); + } + ~vector() + { + clear(); + } + vector(const vector& v) + : data_(0), size_(0), capacity_(0) + { + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + } + + vector& operator=(const vector& v) + { + if (this == &v) + { + return *this; + } + resize(0); + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + return *this; + } + + void resize(const size_t new_size, const T& value = T()) + { + try_alloc(new_size); + if (new_size > size_) + { + for (size_t i = size_; i < new_size; i++) + { + new (&data_[i]) T(value); + } + } + else if (new_size < size_) + { + for (size_t i = new_size; i < size_; i++) + { + data_[i].~T(); + } + } + size_ = new_size; + } + + void clear() + { + for (size_t i = 0; i < size_; i++) + { + data_[i].~T(); + } + delete[](char*) data_; + data_ = 0; + size_ = 0; + capacity_ = 0; + } + + T* data() const + { + return data_; + } + size_t size() const + { + return size_; + } + T& operator[](size_t i) const + { + return data_[i]; + } + T* begin() const + { + return &data_[0]; + } + T* end() const + { + return &data_[size_]; + } + bool empty() const + { + return size_ == 0; + } + + void push_back(const T& t) + { + try_alloc(size_ + 1); + new (&data_[size_]) T(t); + size_++; + } + + void insert(T* pos, T* b, T* e) + { + vector* v = 0; + if (b >= begin() && b < end()) + { + //the same vector + v = new vector(*this); + b = v->begin() + (b - begin()); + e = v->begin() + (e - begin()); + } + size_t diff = pos - begin(); + try_alloc(size_ + (e - b)); + pos = begin() + diff; + memmove(pos + (e - b), pos, (end() - pos) * sizeof(T)); + size_t len = e - b; + size_ += len; + for (size_t i = 0; i < len; i++) + { + *pos = *b; + pos++; + b++; + } + delete v; + } + + T* erase(T* pos) + { + pos->~T(); + memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T)); + size_--; + return pos; + } + +protected: + T* data_; + size_t size_; + size_t capacity_; + void try_alloc(size_t new_size) + { + if (new_size * 3 / 2 > capacity_ / 2) + { + capacity_ = new_size * 2; + T* new_data = (T*)new char[capacity_ * sizeof(T)]; + memset(static_cast(new_data), 0, capacity_ * sizeof(T)); + if (data_) + { + memmove(new_data, data_, sizeof(T) * size_); + delete[](char*) data_; + } + data_ = new_data; + } + } +}; + +struct NCNN_EXPORT string : public vector +{ + string() + { + } + string(const char* str) + { + size_t len = strlen(str); + resize(len); + memcpy(data_, str, len); + } + const char* c_str() const + { + return (const char*)data_; + } + bool operator==(const string& str2) const + { + return strcmp(data_, str2.data_) == 0; + } + bool operator==(const char* str2) const + { + return strcmp(data_, str2) == 0; + } + bool operator!=(const char* str2) const + { + return strcmp(data_, str2) != 0; + } + string& operator+=(const string& str1) + { + insert(end(), str1.begin(), str1.end()); + return *this; + } +}; + +inline string operator+(const string& str1, const string& str2) +{ + string str(str1); + str.insert(str.end(), str2.begin(), str2.end()); + return str; +} + +} // namespace std + +#endif // NCNN_SIMPLESTL_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/vulkan_header_fix.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/vulkan_header_fix.h new file mode 100644 index 0000000..103ac3e --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/include/ncnn/vulkan_header_fix.h @@ -0,0 +1,259 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_VULKAN_HEADER_FIX_H +#define NCNN_VULKAN_HEADER_FIX_H + +#include + +// This header contains new structure and function declearation to fix build with old vulkan sdk + +#if VK_HEADER_VERSION < 70 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES (VkStructureType)1000094000 +typedef enum VkSubgroupFeatureFlagBits +{ + VK_SUBGROUP_FEATURE_BASIC_BIT = 0x00000001, + VK_SUBGROUP_FEATURE_VOTE_BIT = 0x00000002, + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT = 0x00000004, + VK_SUBGROUP_FEATURE_BALLOT_BIT = 0x00000008, + VK_SUBGROUP_FEATURE_SHUFFLE_BIT = 0x00000010, + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT = 0x00000020, + VK_SUBGROUP_FEATURE_CLUSTERED_BIT = 0x00000040, + VK_SUBGROUP_FEATURE_QUAD_BIT = 0x00000080, + VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV = 0x00000100, + VK_SUBGROUP_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSubgroupFeatureFlagBits; +typedef VkFlags VkSubgroupFeatureFlags; +typedef struct VkPhysicalDeviceSubgroupProperties +{ + VkStructureType sType; + void* pNext; + uint32_t subgroupSize; + VkShaderStageFlags supportedStages; + VkSubgroupFeatureFlags supportedOperations; + VkBool32 quadOperationsInAllStages; +} VkPhysicalDeviceSubgroupProperties; +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES (VkStructureType)1000168000 +#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT (VkStructureType)1000168001 +typedef struct VkPhysicalDeviceMaintenance3Properties +{ + VkStructureType sType; + void* pNext; + uint32_t maxPerSetDescriptors; + VkDeviceSize maxMemoryAllocationSize; +} VkPhysicalDeviceMaintenance3Properties; +typedef struct VkDescriptorSetLayoutSupport +{ + VkStructureType sType; + void* pNext; + VkBool32 supported; +} VkDescriptorSetLayoutSupport; +typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR; +typedef VkDescriptorSetLayoutSupport VkDescriptorSetLayoutSupportKHR; +typedef void(VKAPI_PTR* PFN_vkGetDescriptorSetLayoutSupportKHR)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport); +#endif // VK_HEADER_VERSION < 70 + +#if VK_HEADER_VERSION < 80 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR (VkStructureType)1000177000 +typedef struct VkPhysicalDevice8BitStorageFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; +} VkPhysicalDevice8BitStorageFeaturesKHR; +#define VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR (VkStructureType)1000109000 +#define VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR (VkStructureType)1000109001 +#define VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR (VkStructureType)1000109002 +#define VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR (VkStructureType)1000109003 +#define VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR (VkStructureType)1000109004 +#define VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR (VkStructureType)1000109005 +#define VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR (VkStructureType)1000109006 +typedef struct VkAttachmentDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription2KHR; +typedef struct VkAttachmentReference2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t attachment; + VkImageLayout layout; + VkImageAspectFlags aspectMask; +} VkAttachmentReference2KHR; +typedef struct VkSubpassDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t viewMask; + uint32_t inputAttachmentCount; + const VkAttachmentReference2KHR* pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference2KHR* pColorAttachments; + const VkAttachmentReference2KHR* pResolveAttachments; + const VkAttachmentReference2KHR* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t* pPreserveAttachments; +} VkSubpassDescription2KHR; +typedef struct VkSubpassDependency2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; +} VkSubpassDependency2KHR; +typedef struct VkRenderPassCreateInfo2KHR +{ + VkStructureType sType; + const void* pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription2KHR* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription2KHR* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency2KHR* pDependencies; + uint32_t correlatedViewMaskCount; + const uint32_t* pCorrelatedViewMasks; +} VkRenderPassCreateInfo2KHR; +typedef struct VkSubpassBeginInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassContents contents; +} VkSubpassBeginInfoKHR; + +typedef struct VkSubpassEndInfoKHR +{ + VkStructureType sType; + const void* pNext; +} VkSubpassEndInfoKHR; +typedef VkResult(VKAPI_PTR* PFN_vkCreateRenderPass2KHR)(VkDevice device, const VkRenderPassCreateInfo2KHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void(VKAPI_PTR* PFN_vkCmdBeginRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfoKHR* pSubpassBeginInfo); +typedef void(VKAPI_PTR* PFN_vkCmdNextSubpass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfoKHR* pSubpassBeginInfo, const VkSubpassEndInfoKHR* pSubpassEndInfo); +typedef void(VKAPI_PTR* PFN_vkCmdEndRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR* pSubpassEndInfo); +#endif // VK_HEADER_VERSION < 80 + +#if VK_HEADER_VERSION < 95 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR (VkStructureType)1000082000 +typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceFloat16Int8FeaturesKHR; +#endif // VK_HEADER_VERSION < 95 + +#if VK_HEADER_VERSION < 97 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT +{ + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; +#endif // VK_HEADER_VERSION < 97 + +#if VK_HEADER_VERSION < 101 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV (VkStructureType)1000249000 +#define VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249002 +typedef enum VkComponentTypeNV +{ + VK_COMPONENT_TYPE_FLOAT16_NV = 0, + VK_COMPONENT_TYPE_FLOAT32_NV = 1, + VK_COMPONENT_TYPE_FLOAT64_NV = 2, + VK_COMPONENT_TYPE_SINT8_NV = 3, + VK_COMPONENT_TYPE_SINT16_NV = 4, + VK_COMPONENT_TYPE_SINT32_NV = 5, + VK_COMPONENT_TYPE_SINT64_NV = 6, + VK_COMPONENT_TYPE_UINT8_NV = 7, + VK_COMPONENT_TYPE_UINT16_NV = 8, + VK_COMPONENT_TYPE_UINT32_NV = 9, + VK_COMPONENT_TYPE_UINT64_NV = 10, + VK_COMPONENT_TYPE_BEGIN_RANGE_NV = VK_COMPONENT_TYPE_FLOAT16_NV, + VK_COMPONENT_TYPE_END_RANGE_NV = VK_COMPONENT_TYPE_UINT64_NV, + VK_COMPONENT_TYPE_RANGE_SIZE_NV = (VK_COMPONENT_TYPE_UINT64_NV - VK_COMPONENT_TYPE_FLOAT16_NV + 1), + VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkComponentTypeNV; +typedef enum VkScopeNV +{ + VK_SCOPE_DEVICE_NV = 1, + VK_SCOPE_WORKGROUP_NV = 2, + VK_SCOPE_SUBGROUP_NV = 3, + VK_SCOPE_QUEUE_FAMILY_NV = 5, + VK_SCOPE_BEGIN_RANGE_NV = VK_SCOPE_DEVICE_NV, + VK_SCOPE_END_RANGE_NV = VK_SCOPE_QUEUE_FAMILY_NV, + VK_SCOPE_RANGE_SIZE_NV = (VK_SCOPE_QUEUE_FAMILY_NV - VK_SCOPE_DEVICE_NV + 1), + VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkScopeNV; +typedef struct VkCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + uint32_t MSize; + uint32_t NSize; + uint32_t KSize; + VkComponentTypeNV AType; + VkComponentTypeNV BType; + VkComponentTypeNV CType; + VkComponentTypeNV DType; + VkScopeNV scope; +} VkCooperativeMatrixPropertiesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV +{ + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrix; + VkBool32 cooperativeMatrixRobustBufferAccess; +} VkPhysicalDeviceCooperativeMatrixFeaturesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + VkShaderStageFlags cooperativeMatrixSupportedStages; +} VkPhysicalDeviceCooperativeMatrixPropertiesNV; +typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); +#endif // VK_HEADER_VERSION < 101 + +#if VK_HEADER_VERSION < 208 +typedef enum VkInstanceCreateFlagBits +{ + VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR = 0x00000001, + VK_INSTANCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkInstanceCreateFlagBits; +#endif // VK_HEADER_VERSION < 208 + +#endif // NCNN_VULKAN_HEADER_FIX_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/OGLCompilerTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/OGLCompilerTargets.cmake new file mode 100644 index 0000000..89bb144 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/OGLCompilerTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OGLCompilerTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OGLCompiler) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OGLCompiler ALIAS glslang::OGLCompiler) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/OSDependentTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/OSDependentTargets.cmake new file mode 100644 index 0000000..c345456 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/OSDependentTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OSDependentTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OSDependent) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OSDependent ALIAS glslang::OSDependent) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/SPIRVTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/SPIRVTargets.cmake new file mode 100644 index 0000000..1c614ab --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/SPIRVTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `SPIRVTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::SPIRV) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(SPIRV ALIAS glslang::SPIRV) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config-version.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config-version.cmake new file mode 100644 index 0000000..83b16db --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config-version.cmake @@ -0,0 +1,70 @@ +# This is a basic version file for the Config-mode of find_package(). +# It is used by write_basic_package_version_file() as input file for configure_file() +# to create a version-file which can be installed along a config.cmake file. +# +# The created file sets PACKAGE_VERSION_EXACT if the current version string and +# the requested version string are exactly the same and it sets +# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, +# but only if the requested major version is the same as the current one. +# The variable CVF_VERSION must be set before calling configure_file(). + + +set(PACKAGE_VERSION "11.12.0") + +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION) + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + + if("11.12.0" MATCHES "^([0-9]+)\\.") + set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}") + if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0) + string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}") + endif() + else() + set(CVF_VERSION_MAJOR "11.12.0") + endif() + + if(PACKAGE_FIND_VERSION_RANGE) + # both endpoints of the range must have the expected major version + math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1") + if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT))) + set(PACKAGE_VERSION_COMPATIBLE FALSE) + elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX))) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + else() + if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + + if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + endif() + endif() +endif() + + +# if the installed project requested no architecture check, don't perform the check +if("FALSE") + return() +endif() + +# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: +if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "4" STREQUAL "") + return() +endif() + +# check that the installed version has the same 32/64bit-ness as the one which is currently searching: +if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "4") + math(EXPR installedBits "4 * 8") + set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") + set(PACKAGE_VERSION_UNSUITABLE TRUE) +endif() diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config.cmake new file mode 100644 index 0000000..5ebe599 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-config.cmake @@ -0,0 +1,27 @@ + +####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() ####### +####### Any changes to this file will be overwritten by the next CMake run #### +####### The input file was glslang-config.cmake.in ######## + +get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) + +macro(set_and_check _var _file) + set(${_var} "${_file}") + if(NOT EXISTS "${_file}") + message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") + endif() +endmacro() + +macro(check_required_components _NAME) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(NOT ${_NAME}_${comp}_FOUND) + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_NAME}_FOUND FALSE) + endif() + endif() + endforeach() +endmacro() + +#################################################################################### + include("${PACKAGE_PREFIX_DIR}/lib/cmake/glslang/glslang-targets.cmake") + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets-release.cmake new file mode 100644 index 0000000..e4de522 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "glslang::OSDependent" for configuration "Release" +set_property(TARGET glslang::OSDependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OSDependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOSDependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OSDependent ) +list(APPEND _cmake_import_check_files_for_glslang::OSDependent "${_IMPORT_PREFIX}/lib/libOSDependent.a" ) + +# Import target "glslang::glslang" for configuration "Release" +set_property(TARGET glslang::glslang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::glslang PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libglslang.a" + ) + +list(APPEND _cmake_import_check_targets glslang::glslang ) +list(APPEND _cmake_import_check_files_for_glslang::glslang "${_IMPORT_PREFIX}/lib/libglslang.a" ) + +# Import target "glslang::MachineIndependent" for configuration "Release" +set_property(TARGET glslang::MachineIndependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::MachineIndependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::MachineIndependent ) +list(APPEND _cmake_import_check_files_for_glslang::MachineIndependent "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" ) + +# Import target "glslang::GenericCodeGen" for configuration "Release" +set_property(TARGET glslang::GenericCodeGen APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::GenericCodeGen PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" + ) + +list(APPEND _cmake_import_check_targets glslang::GenericCodeGen ) +list(APPEND _cmake_import_check_files_for_glslang::GenericCodeGen "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" ) + +# Import target "glslang::OGLCompiler" for configuration "Release" +set_property(TARGET glslang::OGLCompiler APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OGLCompiler PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OGLCompiler ) +list(APPEND _cmake_import_check_files_for_glslang::OGLCompiler "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" ) + +# Import target "glslang::SPIRV" for configuration "Release" +set_property(TARGET glslang::SPIRV APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::SPIRV PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libSPIRV.a" + ) + +list(APPEND _cmake_import_check_targets glslang::SPIRV ) +list(APPEND _cmake_import_check_files_for_glslang::SPIRV "${_IMPORT_PREFIX}/lib/libSPIRV.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets.cmake new file mode 100644 index 0000000..2173b87 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslang/glslang-targets.cmake @@ -0,0 +1,135 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS glslang::OSDependent glslang::glslang glslang::MachineIndependent glslang::GenericCodeGen glslang::OGLCompiler glslang::SPIRV) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target glslang::OSDependent +add_library(glslang::OSDependent STATIC IMPORTED) + +set_target_properties(glslang::OSDependent PROPERTIES + INTERFACE_LINK_LIBRARIES "-pthread" +) + +# Create imported target glslang::glslang +add_library(glslang::glslang STATIC IMPORTED) + +set_target_properties(glslang::glslang PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::MachineIndependent +add_library(glslang::MachineIndependent STATIC IMPORTED) + +set_target_properties(glslang::MachineIndependent PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::GenericCodeGen +add_library(glslang::GenericCodeGen STATIC IMPORTED) + +# Create imported target glslang::OGLCompiler +add_library(glslang::OGLCompiler STATIC IMPORTED) + +# Create imported target glslang::SPIRV +add_library(glslang::SPIRV STATIC IMPORTED) + +set_target_properties(glslang::SPIRV PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/glslang-targets-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslangTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslangTargets.cmake new file mode 100644 index 0000000..6f4e3bc --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/glslangTargets.cmake @@ -0,0 +1,15 @@ + + message(WARNING "Using `glslangTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::glslang) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + if(OFF) + add_library(glslang ALIAS glslang::glslang) + else() + add_library(glslang ALIAS glslang::glslang) + add_library(MachineIndependent ALIAS glslang::MachineIndependent) + add_library(GenericCodeGen ALIAS glslang::GenericCodeGen) + endif() + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn-release.cmake new file mode 100644 index 0000000..37f24ba --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn-release.cmake @@ -0,0 +1,19 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "ncnn" for configuration "Release" +set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(ncnn PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a" + ) + +list(APPEND _cmake_import_check_targets ncnn ) +list(APPEND _cmake_import_check_files_for_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn.cmake new file mode 100644 index 0000000..f224c1b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnn.cmake @@ -0,0 +1,125 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS ncnn) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target ncnn +add_library(ncnn STATIC IMPORTED) + +set_target_properties(ncnn PROPERTIES + INTERFACE_COMPILE_OPTIONS "-fno-rtti;-fno-exceptions" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" + INTERFACE_LINK_LIBRARIES "-fopenmp;-static-openmp;-Wl,-wrap,__kmp_affinity_determine_capable;Threads::Threads;Vulkan::Vulkan;\$;\$;android;jnigraphics;log" + INTERFACE_POSITION_INDEPENDENT_CODE "ON" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/ncnn-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# Make sure the targets which have been exported in some other +# export set exist. +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) +foreach(_target "glslang::glslang" "glslang::SPIRV" ) + if(NOT TARGET "${_target}" ) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets "${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets} ${_target}") + endif() +endforeach() + +if(DEFINED ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + if(CMAKE_FIND_PACKAGE_NAME) + set( ${CMAKE_FIND_PACKAGE_NAME}_FOUND FALSE) + set( ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + else() + message(FATAL_ERROR "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + endif() +endif() +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnnConfig.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnnConfig.cmake new file mode 100644 index 0000000..c1e5613 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/cmake/ncnn/ncnnConfig.cmake @@ -0,0 +1,42 @@ +set(NCNN_OPENMP ON) +set(NCNN_THREADS ON) +set(NCNN_VULKAN ON) +set(NCNN_SHARED_LIB OFF) +set(NCNN_SYSTEM_GLSLANG OFF) + +if(NCNN_OPENMP) + find_package(OpenMP) +endif() + +if(NCNN_THREADS) + set(CMAKE_THREAD_PREFER_PTHREAD TRUE) + set(THREADS_PREFER_PTHREAD_FLAG TRUE) + find_package(Threads REQUIRED) +endif() + +if(NCNN_VULKAN) + find_package(Vulkan REQUIRED) + + if(NOT NCNN_SHARED_LIB) + if(NCNN_SYSTEM_GLSLANG) + find_package(glslang QUIET) + if(NOT glslang_FOUND) + set(GLSLANG_TARGET_DIR "") + include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake) + include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake) + if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + # hlsl support can be optional + include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + endif() + include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake) + include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake) + endif() + else() + set(glslang_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/glslang") + find_package(glslang QUIET) + endif() + + endif() +endif() + +include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libGenericCodeGen.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libGenericCodeGen.a new file mode 100644 index 0000000..0297551 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libGenericCodeGen.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libMachineIndependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libMachineIndependent.a new file mode 100644 index 0000000..0534cea Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libMachineIndependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libOGLCompiler.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libOGLCompiler.a new file mode 100644 index 0000000..00b4742 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libOGLCompiler.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libOSDependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libOSDependent.a new file mode 100644 index 0000000..648036b Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libOSDependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libSPIRV.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libSPIRV.a new file mode 100644 index 0000000..c64a8f5 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libSPIRV.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libglslang.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libglslang.a new file mode 100644 index 0000000..ae47ca5 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libglslang.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libncnn.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libncnn.a new file mode 100644 index 0000000..98340ab Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/libncnn.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/pkgconfig/ncnn.pc b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/pkgconfig/ncnn.pc new file mode 100644 index 0000000..e683e4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/armeabi-v7a/lib/pkgconfig/ncnn.pc @@ -0,0 +1,11 @@ +prefix=${pcfiledir}/../.. +librarydir=${prefix}/lib +includedir=${prefix}/include + +Name: ncnn +Description: high-performance neural network inference framework optimized for the mobile platform +Version: 1.0.20230223 +URL: https://github.com/Tencent/ncnn +Libs: -L"${librarydir}" -lncnn +Cflags: -I"${includedir}" + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/BaseTypes.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/BaseTypes.h new file mode 100644 index 0000000..156d98b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/BaseTypes.h @@ -0,0 +1,609 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _BASICTYPES_INCLUDED_ +#define _BASICTYPES_INCLUDED_ + +namespace glslang { + +// +// Basic type. Arrays, vectors, sampler details, etc., are orthogonal to this. +// +enum TBasicType { + EbtVoid, + EbtFloat, + EbtDouble, + EbtFloat16, + EbtInt8, + EbtUint8, + EbtInt16, + EbtUint16, + EbtInt, + EbtUint, + EbtInt64, + EbtUint64, + EbtBool, + EbtAtomicUint, + EbtSampler, + EbtStruct, + EbtBlock, + EbtAccStruct, + EbtReference, + EbtRayQuery, +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type + EbtSpirvType, +#endif + + // HLSL types that live only temporarily. + EbtString, + + EbtNumTypes +}; + +// +// Storage qualifiers. Should align with different kinds of storage or +// resource or GLSL storage qualifier. Expansion is deprecated. +// +// N.B.: You probably DON'T want to add anything here, but rather just add it +// to the built-in variables. See the comment above TBuiltInVariable. +// +// A new built-in variable will normally be an existing qualifier, like 'in', 'out', etc. +// DO NOT follow the design pattern of, say EvqInstanceId, etc. +// +enum TStorageQualifier { + EvqTemporary, // For temporaries (within a function), read/write + EvqGlobal, // For globals read/write + EvqConst, // User-defined constant values, will be semantically constant and constant folded + EvqVaryingIn, // pipeline input, read only, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqVaryingOut, // pipeline output, read/write, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqUniform, // read only, shared with app + EvqBuffer, // read/write, shared with app + EvqShared, // compute shader's read/write 'shared' qualifier +#ifndef GLSLANG_WEB + EvqSpirvStorageClass, // spirv_storage_class +#endif + + EvqPayload, + EvqPayloadIn, + EvqHitAttr, + EvqCallableData, + EvqCallableDataIn, + + EvqtaskPayloadSharedEXT, + + // parameters + EvqIn, // also, for 'in' in the grammar before we know if it's a pipeline input or an 'in' parameter + EvqOut, // also, for 'out' in the grammar before we know if it's a pipeline output or an 'out' parameter + EvqInOut, + EvqConstReadOnly, // input; also other read-only types having neither a constant value nor constant-value semantics + + // built-ins read by vertex shader + EvqVertexId, + EvqInstanceId, + + // built-ins written by vertex shader + EvqPosition, + EvqPointSize, + EvqClipVertex, + + // built-ins read by fragment shader + EvqFace, + EvqFragCoord, + EvqPointCoord, + + // built-ins written by fragment shader + EvqFragColor, + EvqFragDepth, + EvqFragStencil, + + // end of list + EvqLast +}; + +// +// Subcategories of the TStorageQualifier, simply to give a direct mapping +// between built-in variable names and an numerical value (the enum). +// +// For backward compatibility, there is some redundancy between the +// TStorageQualifier and these. Existing members should both be maintained accurately. +// However, any new built-in variable (and any existing non-redundant one) +// must follow the pattern that the specific built-in is here, and only its +// general qualifier is in TStorageQualifier. +// +// Something like gl_Position, which is sometimes 'in' and sometimes 'out' +// shows up as two different built-in variables in a single stage, but +// only has a single enum in TBuiltInVariable, so both the +// TStorageQualifier and the TBuitinVariable are needed to distinguish +// between them. +// +enum TBuiltInVariable { + EbvNone, + EbvNumWorkGroups, + EbvWorkGroupSize, + EbvWorkGroupId, + EbvLocalInvocationId, + EbvGlobalInvocationId, + EbvLocalInvocationIndex, + EbvNumSubgroups, + EbvSubgroupID, + EbvSubGroupSize, + EbvSubGroupInvocation, + EbvSubGroupEqMask, + EbvSubGroupGeMask, + EbvSubGroupGtMask, + EbvSubGroupLeMask, + EbvSubGroupLtMask, + EbvSubgroupSize2, + EbvSubgroupInvocation2, + EbvSubgroupEqMask2, + EbvSubgroupGeMask2, + EbvSubgroupGtMask2, + EbvSubgroupLeMask2, + EbvSubgroupLtMask2, + EbvVertexId, + EbvInstanceId, + EbvVertexIndex, + EbvInstanceIndex, + EbvBaseVertex, + EbvBaseInstance, + EbvDrawId, + EbvPosition, + EbvPointSize, + EbvClipVertex, + EbvClipDistance, + EbvCullDistance, + EbvNormal, + EbvVertex, + EbvMultiTexCoord0, + EbvMultiTexCoord1, + EbvMultiTexCoord2, + EbvMultiTexCoord3, + EbvMultiTexCoord4, + EbvMultiTexCoord5, + EbvMultiTexCoord6, + EbvMultiTexCoord7, + EbvFrontColor, + EbvBackColor, + EbvFrontSecondaryColor, + EbvBackSecondaryColor, + EbvTexCoord, + EbvFogFragCoord, + EbvInvocationId, + EbvPrimitiveId, + EbvLayer, + EbvViewportIndex, + EbvPatchVertices, + EbvTessLevelOuter, + EbvTessLevelInner, + EbvBoundingBox, + EbvTessCoord, + EbvColor, + EbvSecondaryColor, + EbvFace, + EbvFragCoord, + EbvPointCoord, + EbvFragColor, + EbvFragData, + EbvFragDepth, + EbvFragStencilRef, + EbvSampleId, + EbvSamplePosition, + EbvSampleMask, + EbvHelperInvocation, + + EbvBaryCoordNoPersp, + EbvBaryCoordNoPerspCentroid, + EbvBaryCoordNoPerspSample, + EbvBaryCoordSmooth, + EbvBaryCoordSmoothCentroid, + EbvBaryCoordSmoothSample, + EbvBaryCoordPullModel, + + EbvViewIndex, + EbvDeviceIndex, + + EbvShadingRateKHR, + EbvPrimitiveShadingRateKHR, + + EbvFragSizeEXT, + EbvFragInvocationCountEXT, + + EbvSecondaryFragDataEXT, + EbvSecondaryFragColorEXT, + + EbvViewportMaskNV, + EbvSecondaryPositionNV, + EbvSecondaryViewportMaskNV, + EbvPositionPerViewNV, + EbvViewportMaskPerViewNV, + EbvFragFullyCoveredNV, + EbvFragmentSizeNV, + EbvInvocationsPerPixelNV, + // ray tracing + EbvLaunchId, + EbvLaunchSize, + EbvInstanceCustomIndex, + EbvGeometryIndex, + EbvWorldRayOrigin, + EbvWorldRayDirection, + EbvObjectRayOrigin, + EbvObjectRayDirection, + EbvRayTmin, + EbvRayTmax, + EbvCullMask, + EbvHitT, + EbvHitKind, + EbvObjectToWorld, + EbvObjectToWorld3x4, + EbvWorldToObject, + EbvWorldToObject3x4, + EbvIncomingRayFlags, + EbvCurrentRayTimeNV, + // barycentrics + EbvBaryCoordNV, + EbvBaryCoordNoPerspNV, + EbvBaryCoordEXT, + EbvBaryCoordNoPerspEXT, + // mesh shaders + EbvTaskCountNV, + EbvPrimitiveCountNV, + EbvPrimitiveIndicesNV, + EbvClipDistancePerViewNV, + EbvCullDistancePerViewNV, + EbvLayerPerViewNV, + EbvMeshViewCountNV, + EbvMeshViewIndicesNV, + //GL_EXT_mesh_shader + EbvPrimitivePointIndicesEXT, + EbvPrimitiveLineIndicesEXT, + EbvPrimitiveTriangleIndicesEXT, + EbvCullPrimitiveEXT, + + // sm builtins + EbvWarpsPerSM, + EbvSMCount, + EbvWarpID, + EbvSMID, + + // HLSL built-ins that live only temporarily, until they get remapped + // to one of the above. + EbvFragDepthGreater, + EbvFragDepthLesser, + EbvGsOutputStream, + EbvOutputPatch, + EbvInputPatch, + + // structbuffer types + EbvAppendConsume, // no need to differentiate append and consume + EbvRWStructuredBuffer, + EbvStructuredBuffer, + EbvByteAddressBuffer, + EbvRWByteAddressBuffer, + + EbvLast +}; + +// In this enum, order matters; users can assume higher precision is a bigger value +// and EpqNone is 0. +enum TPrecisionQualifier { + EpqNone = 0, + EpqLow, + EpqMedium, + EpqHigh +}; + +#ifdef GLSLANG_WEB +__inline const char* GetStorageQualifierString(TStorageQualifier q) { return ""; } +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) { return ""; } +#else +// These will show up in error messages +__inline const char* GetStorageQualifierString(TStorageQualifier q) +{ + switch (q) { + case EvqTemporary: return "temp"; break; + case EvqGlobal: return "global"; break; + case EvqConst: return "const"; break; + case EvqConstReadOnly: return "const (read only)"; break; +#ifndef GLSLANG_WEB + case EvqSpirvStorageClass: return "spirv_storage_class"; break; +#endif + case EvqVaryingIn: return "in"; break; + case EvqVaryingOut: return "out"; break; + case EvqUniform: return "uniform"; break; + case EvqBuffer: return "buffer"; break; + case EvqShared: return "shared"; break; + case EvqIn: return "in"; break; + case EvqOut: return "out"; break; + case EvqInOut: return "inout"; break; + case EvqVertexId: return "gl_VertexId"; break; + case EvqInstanceId: return "gl_InstanceId"; break; + case EvqPosition: return "gl_Position"; break; + case EvqPointSize: return "gl_PointSize"; break; + case EvqClipVertex: return "gl_ClipVertex"; break; + case EvqFace: return "gl_FrontFacing"; break; + case EvqFragCoord: return "gl_FragCoord"; break; + case EvqPointCoord: return "gl_PointCoord"; break; + case EvqFragColor: return "fragColor"; break; + case EvqFragDepth: return "gl_FragDepth"; break; + case EvqFragStencil: return "gl_FragStencilRefARB"; break; + case EvqPayload: return "rayPayloadNV"; break; + case EvqPayloadIn: return "rayPayloadInNV"; break; + case EvqHitAttr: return "hitAttributeNV"; break; + case EvqCallableData: return "callableDataNV"; break; + case EvqCallableDataIn: return "callableDataInNV"; break; + case EvqtaskPayloadSharedEXT: return "taskPayloadSharedEXT"; break; + default: return "unknown qualifier"; + } +} + +__inline const char* GetBuiltInVariableString(TBuiltInVariable v) +{ + switch (v) { + case EbvNone: return ""; + case EbvNumWorkGroups: return "NumWorkGroups"; + case EbvWorkGroupSize: return "WorkGroupSize"; + case EbvWorkGroupId: return "WorkGroupID"; + case EbvLocalInvocationId: return "LocalInvocationID"; + case EbvGlobalInvocationId: return "GlobalInvocationID"; + case EbvLocalInvocationIndex: return "LocalInvocationIndex"; + case EbvNumSubgroups: return "NumSubgroups"; + case EbvSubgroupID: return "SubgroupID"; + case EbvSubGroupSize: return "SubGroupSize"; + case EbvSubGroupInvocation: return "SubGroupInvocation"; + case EbvSubGroupEqMask: return "SubGroupEqMask"; + case EbvSubGroupGeMask: return "SubGroupGeMask"; + case EbvSubGroupGtMask: return "SubGroupGtMask"; + case EbvSubGroupLeMask: return "SubGroupLeMask"; + case EbvSubGroupLtMask: return "SubGroupLtMask"; + case EbvSubgroupSize2: return "SubgroupSize"; + case EbvSubgroupInvocation2: return "SubgroupInvocationID"; + case EbvSubgroupEqMask2: return "SubgroupEqMask"; + case EbvSubgroupGeMask2: return "SubgroupGeMask"; + case EbvSubgroupGtMask2: return "SubgroupGtMask"; + case EbvSubgroupLeMask2: return "SubgroupLeMask"; + case EbvSubgroupLtMask2: return "SubgroupLtMask"; + case EbvVertexId: return "VertexId"; + case EbvInstanceId: return "InstanceId"; + case EbvVertexIndex: return "VertexIndex"; + case EbvInstanceIndex: return "InstanceIndex"; + case EbvBaseVertex: return "BaseVertex"; + case EbvBaseInstance: return "BaseInstance"; + case EbvDrawId: return "DrawId"; + case EbvPosition: return "Position"; + case EbvPointSize: return "PointSize"; + case EbvClipVertex: return "ClipVertex"; + case EbvClipDistance: return "ClipDistance"; + case EbvCullDistance: return "CullDistance"; + case EbvNormal: return "Normal"; + case EbvVertex: return "Vertex"; + case EbvMultiTexCoord0: return "MultiTexCoord0"; + case EbvMultiTexCoord1: return "MultiTexCoord1"; + case EbvMultiTexCoord2: return "MultiTexCoord2"; + case EbvMultiTexCoord3: return "MultiTexCoord3"; + case EbvMultiTexCoord4: return "MultiTexCoord4"; + case EbvMultiTexCoord5: return "MultiTexCoord5"; + case EbvMultiTexCoord6: return "MultiTexCoord6"; + case EbvMultiTexCoord7: return "MultiTexCoord7"; + case EbvFrontColor: return "FrontColor"; + case EbvBackColor: return "BackColor"; + case EbvFrontSecondaryColor: return "FrontSecondaryColor"; + case EbvBackSecondaryColor: return "BackSecondaryColor"; + case EbvTexCoord: return "TexCoord"; + case EbvFogFragCoord: return "FogFragCoord"; + case EbvInvocationId: return "InvocationID"; + case EbvPrimitiveId: return "PrimitiveID"; + case EbvLayer: return "Layer"; + case EbvViewportIndex: return "ViewportIndex"; + case EbvPatchVertices: return "PatchVertices"; + case EbvTessLevelOuter: return "TessLevelOuter"; + case EbvTessLevelInner: return "TessLevelInner"; + case EbvBoundingBox: return "BoundingBox"; + case EbvTessCoord: return "TessCoord"; + case EbvColor: return "Color"; + case EbvSecondaryColor: return "SecondaryColor"; + case EbvFace: return "Face"; + case EbvFragCoord: return "FragCoord"; + case EbvPointCoord: return "PointCoord"; + case EbvFragColor: return "FragColor"; + case EbvFragData: return "FragData"; + case EbvFragDepth: return "FragDepth"; + case EbvFragStencilRef: return "FragStencilRef"; + case EbvSampleId: return "SampleId"; + case EbvSamplePosition: return "SamplePosition"; + case EbvSampleMask: return "SampleMaskIn"; + case EbvHelperInvocation: return "HelperInvocation"; + + case EbvBaryCoordNoPersp: return "BaryCoordNoPersp"; + case EbvBaryCoordNoPerspCentroid: return "BaryCoordNoPerspCentroid"; + case EbvBaryCoordNoPerspSample: return "BaryCoordNoPerspSample"; + case EbvBaryCoordSmooth: return "BaryCoordSmooth"; + case EbvBaryCoordSmoothCentroid: return "BaryCoordSmoothCentroid"; + case EbvBaryCoordSmoothSample: return "BaryCoordSmoothSample"; + case EbvBaryCoordPullModel: return "BaryCoordPullModel"; + + case EbvViewIndex: return "ViewIndex"; + case EbvDeviceIndex: return "DeviceIndex"; + + case EbvFragSizeEXT: return "FragSizeEXT"; + case EbvFragInvocationCountEXT: return "FragInvocationCountEXT"; + + case EbvSecondaryFragDataEXT: return "SecondaryFragDataEXT"; + case EbvSecondaryFragColorEXT: return "SecondaryFragColorEXT"; + + case EbvViewportMaskNV: return "ViewportMaskNV"; + case EbvSecondaryPositionNV: return "SecondaryPositionNV"; + case EbvSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; + case EbvPositionPerViewNV: return "PositionPerViewNV"; + case EbvViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; + case EbvFragFullyCoveredNV: return "FragFullyCoveredNV"; + case EbvFragmentSizeNV: return "FragmentSizeNV"; + case EbvInvocationsPerPixelNV: return "InvocationsPerPixelNV"; + case EbvLaunchId: return "LaunchIdNV"; + case EbvLaunchSize: return "LaunchSizeNV"; + case EbvInstanceCustomIndex: return "InstanceCustomIndexNV"; + case EbvGeometryIndex: return "GeometryIndexEXT"; + case EbvWorldRayOrigin: return "WorldRayOriginNV"; + case EbvWorldRayDirection: return "WorldRayDirectionNV"; + case EbvObjectRayOrigin: return "ObjectRayOriginNV"; + case EbvObjectRayDirection: return "ObjectRayDirectionNV"; + case EbvRayTmin: return "ObjectRayTminNV"; + case EbvRayTmax: return "ObjectRayTmaxNV"; + case EbvHitT: return "HitTNV"; + case EbvHitKind: return "HitKindNV"; + case EbvIncomingRayFlags: return "IncomingRayFlagsNV"; + case EbvObjectToWorld: return "ObjectToWorldNV"; + case EbvWorldToObject: return "WorldToObjectNV"; + case EbvCurrentRayTimeNV: return "CurrentRayTimeNV"; + + case EbvBaryCoordEXT: + case EbvBaryCoordNV: return "BaryCoordKHR"; + case EbvBaryCoordNoPerspEXT: + case EbvBaryCoordNoPerspNV: return "BaryCoordNoPerspKHR"; + + case EbvTaskCountNV: return "TaskCountNV"; + case EbvPrimitiveCountNV: return "PrimitiveCountNV"; + case EbvPrimitiveIndicesNV: return "PrimitiveIndicesNV"; + case EbvClipDistancePerViewNV: return "ClipDistancePerViewNV"; + case EbvCullDistancePerViewNV: return "CullDistancePerViewNV"; + case EbvLayerPerViewNV: return "LayerPerViewNV"; + case EbvMeshViewCountNV: return "MeshViewCountNV"; + case EbvMeshViewIndicesNV: return "MeshViewIndicesNV"; + // GL_EXT_mesh_shader + case EbvPrimitivePointIndicesEXT: return "PrimitivePointIndicesEXT"; + case EbvPrimitiveLineIndicesEXT: return "PrimitiveLineIndicesEXT"; + case EbvPrimitiveTriangleIndicesEXT: return "PrimitiveTriangleIndicesEXT"; + case EbvCullPrimitiveEXT: return "CullPrimitiveEXT"; + + case EbvWarpsPerSM: return "WarpsPerSMNV"; + case EbvSMCount: return "SMCountNV"; + case EbvWarpID: return "WarpIDNV"; + case EbvSMID: return "SMIDNV"; + + case EbvShadingRateKHR: return "ShadingRateKHR"; + case EbvPrimitiveShadingRateKHR: return "PrimitiveShadingRateKHR"; + + default: return "unknown built-in variable"; + } +} + +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) +{ + switch (p) { + case EpqNone: return ""; break; + case EpqLow: return "lowp"; break; + case EpqMedium: return "mediump"; break; + case EpqHigh: return "highp"; break; + default: return "unknown precision qualifier"; + } +} +#endif + +__inline bool isTypeSignedInt(TBasicType type) +{ + switch (type) { + case EbtInt8: + case EbtInt16: + case EbtInt: + case EbtInt64: + return true; + default: + return false; + } +} + +__inline bool isTypeUnsignedInt(TBasicType type) +{ + switch (type) { + case EbtUint8: + case EbtUint16: + case EbtUint: + case EbtUint64: + return true; + default: + return false; + } +} + +__inline bool isTypeInt(TBasicType type) +{ + return isTypeSignedInt(type) || isTypeUnsignedInt(type); +} + +__inline bool isTypeFloat(TBasicType type) +{ + switch (type) { + case EbtFloat: + case EbtDouble: + case EbtFloat16: + return true; + default: + return false; + } +} + +__inline int getTypeRank(TBasicType type) +{ + int res = -1; + switch(type) { + case EbtInt8: + case EbtUint8: + res = 0; + break; + case EbtInt16: + case EbtUint16: + res = 1; + break; + case EbtInt: + case EbtUint: + res = 2; + break; + case EbtInt64: + case EbtUint64: + res = 3; + break; + default: + assert(false); + break; + } + return res; +} + +} // end namespace glslang + +#endif // _BASICTYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/Common.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/Common.h new file mode 100644 index 0000000..a5b41cb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/Common.h @@ -0,0 +1,340 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _COMMON_INCLUDED_ +#define _COMMON_INCLUDED_ + +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__ANDROID__) || (defined(_MSC_VER) && _MSC_VER < 1700) +#include +namespace std { +template +std::string to_string(const T& val) { + std::ostringstream os; + os << val; + return os.str(); +} +} +#endif + +#if (defined(_MSC_VER) && _MSC_VER < 1900 /*vs2015*/) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) + #include + #ifndef snprintf + #define snprintf sprintf_s + #endif + #define safe_vsprintf(buf,max,format,args) vsnprintf_s((buf), (max), (max), (format), (args)) +#elif defined (solaris) + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#else + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1800 + #include + inline long long int strtoll (const char* str, char** endptr, int base) + { + return _strtoi64(str, endptr, base); + } + inline unsigned long long int strtoull (const char* str, char** endptr, int base) + { + return _strtoui64(str, endptr, base); + } + inline long long int atoll (const char* str) + { + return strtoll(str, NULL, 10); + } +#endif + +#if defined(_MSC_VER) +#define strdup _strdup +#endif + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4786) // Don't warn about too long identifiers + #pragma warning(disable : 4514) // unused inline method + #pragma warning(disable : 4201) // nameless union +#endif + +#include "PoolAlloc.h" + +// +// Put POOL_ALLOCATOR_NEW_DELETE in base classes to make them use this scheme. +// +#define POOL_ALLOCATOR_NEW_DELETE(A) \ + void* operator new(size_t s) { return (A).allocate(s); } \ + void* operator new(size_t, void *_Where) { return (_Where); } \ + void operator delete(void*) { } \ + void operator delete(void *, void *) { } \ + void* operator new[](size_t s) { return (A).allocate(s); } \ + void* operator new[](size_t, void *_Where) { return (_Where); } \ + void operator delete[](void*) { } \ + void operator delete[](void *, void *) { } + +namespace glslang { + + // + // Pool version of string. + // + typedef pool_allocator TStringAllocator; + typedef std::basic_string , TStringAllocator> TString; + +} // end namespace glslang + +// Repackage the std::hash for use by unordered map/set with a TString key. +namespace std { + + template<> struct hash { + std::size_t operator()(const glslang::TString& s) const + { + const unsigned _FNV_offset_basis = 2166136261U; + const unsigned _FNV_prime = 16777619U; + unsigned _Val = _FNV_offset_basis; + size_t _Count = s.size(); + const char* _First = s.c_str(); + for (size_t _Next = 0; _Next < _Count; ++_Next) + { + _Val ^= (unsigned)_First[_Next]; + _Val *= _FNV_prime; + } + + return _Val; + } + }; +} + +namespace glslang { + +inline TString* NewPoolTString(const char* s) +{ + void* memory = GetThreadPoolAllocator().allocate(sizeof(TString)); + return new(memory) TString(s); +} + +template inline T* NewPoolObject(T*) +{ + return new(GetThreadPoolAllocator().allocate(sizeof(T))) T; +} + +template inline T* NewPoolObject(T, int instances) +{ + return new(GetThreadPoolAllocator().allocate(instances * sizeof(T))) T[instances]; +} + +// +// Pool allocator versions of vectors, lists, and maps +// +template class TVector : public std::vector > { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + typedef typename std::vector >::size_type size_type; + TVector() : std::vector >() {} + TVector(const pool_allocator& a) : std::vector >(a) {} + TVector(size_type i) : std::vector >(i) {} + TVector(size_type i, const T& val) : std::vector >(i, val) {} +}; + +template class TList : public std::list > { +}; + +template > +class TMap : public std::map > > { +}; + +template , class PRED = std::equal_to > +class TUnorderedMap : public std::unordered_map > > { +}; + +template > +class TSet : public std::set > { +}; + +// +// Persistent string memory. Should only be used for strings that survive +// across compiles/links. +// +typedef std::basic_string TPersistString; + +// +// templatized min and max functions. +// +template T Min(const T a, const T b) { return a < b ? a : b; } +template T Max(const T a, const T b) { return a > b ? a : b; } + +// +// Create a TString object from an integer. +// +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) +inline const TString String(const int i, const int base = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + _itoa_s(i, text, sizeof(text), base); + return text; +} +#else +inline const TString String(const int i, const int /*base*/ = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + + // we assume base 10 for all cases + snprintf(text, sizeof(text), "%d", i); + + return text; +} +#endif + +struct TSourceLoc { + void init() + { + name = nullptr; string = 0; line = 0; column = 0; + } + void init(int stringNum) { init(); string = stringNum; } + // Returns the name if it exists. Otherwise, returns the string number. + std::string getStringNameOrNum(bool quoteStringName = true) const + { + if (name != nullptr) { + TString qstr = quoteStringName ? ("\"" + *name + "\"") : *name; + std::string ret_str(qstr.c_str()); + return ret_str; + } + return std::to_string((long long)string); + } + const char* getFilename() const + { + if (name == nullptr) + return nullptr; + return name->c_str(); + } + const char* getFilenameStr() const { return name == nullptr ? "" : name->c_str(); } + TString* name; // descriptive name for this string, when a textual name is available, otherwise nullptr + int string; + int line; + int column; +}; + +class TPragmaTable : public TMap { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) +}; + +const int MaxTokenLength = 1024; + +template bool IsPow2(T powerOf2) +{ + if (powerOf2 <= 0) + return false; + + return (powerOf2 & (powerOf2 - 1)) == 0; +} + +// Round number up to a multiple of the given powerOf2, which is not +// a power, just a number that must be a power of 2. +template void RoundToPow2(T& number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + number = (number + powerOf2 - 1) & ~(powerOf2 - 1); +} + +template bool IsMultipleOfPow2(T number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + return ! (number & (powerOf2 - 1)); +} + +// Returns log2 of an integer power of 2. +// T should be integral. +template int IntLog2(T n) +{ + assert(IsPow2(n)); + int result = 0; + while ((T(1) << result) != n) { + result++; + } + return result; +} + +inline bool IsInfinity(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_NINF: + case _FPCLASS_PINF: + return true; + default: + return false; + } +#else + return std::isinf(x); +#endif +} + +inline bool IsNan(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_SNAN: + case _FPCLASS_QNAN: + return true; + default: + return false; + } +#else + return std::isnan(x); +#endif +} + +} // end namespace glslang + +#endif // _COMMON_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ConstantUnion.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ConstantUnion.h new file mode 100644 index 0000000..c4ffb85 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ConstantUnion.h @@ -0,0 +1,974 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _CONSTANT_UNION_INCLUDED_ +#define _CONSTANT_UNION_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" + +namespace glslang { + +class TConstUnion { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnion() : iConst(0), type(EbtInt) { } + + void setI8Const(signed char i) + { + i8Const = i; + type = EbtInt8; + } + + void setU8Const(unsigned char u) + { + u8Const = u; + type = EbtUint8; + } + + void setI16Const(signed short i) + { + i16Const = i; + type = EbtInt16; + } + + void setU16Const(unsigned short u) + { + u16Const = u; + type = EbtUint16; + } + + void setIConst(int i) + { + iConst = i; + type = EbtInt; + } + + void setUConst(unsigned int u) + { + uConst = u; + type = EbtUint; + } + + void setI64Const(long long i64) + { + i64Const = i64; + type = EbtInt64; + } + + void setU64Const(unsigned long long u64) + { + u64Const = u64; + type = EbtUint64; + } + + void setDConst(double d) + { + dConst = d; + type = EbtDouble; + } + + void setBConst(bool b) + { + bConst = b; + type = EbtBool; + } + + void setSConst(const TString* s) + { + sConst = s; + type = EbtString; + } + + signed char getI8Const() const { return i8Const; } + unsigned char getU8Const() const { return u8Const; } + signed short getI16Const() const { return i16Const; } + unsigned short getU16Const() const { return u16Const; } + int getIConst() const { return iConst; } + unsigned int getUConst() const { return uConst; } + long long getI64Const() const { return i64Const; } + unsigned long long getU64Const() const { return u64Const; } + double getDConst() const { return dConst; } + bool getBConst() const { return bConst; } + const TString* getSConst() const { return sConst; } + + bool operator==(const signed char i) const + { + if (i == i8Const) + return true; + + return false; + } + + bool operator==(const unsigned char u) const + { + if (u == u8Const) + return true; + + return false; + } + + bool operator==(const signed short i) const + { + if (i == i16Const) + return true; + + return false; + } + + bool operator==(const unsigned short u) const + { + if (u == u16Const) + return true; + + return false; + } + + bool operator==(const int i) const + { + if (i == iConst) + return true; + + return false; + } + + bool operator==(const unsigned int u) const + { + if (u == uConst) + return true; + + return false; + } + + bool operator==(const long long i64) const + { + if (i64 == i64Const) + return true; + + return false; + } + + bool operator==(const unsigned long long u64) const + { + if (u64 == u64Const) + return true; + + return false; + } + + bool operator==(const double d) const + { + if (d == dConst) + return true; + + return false; + } + + bool operator==(const bool b) const + { + if (b == bConst) + return true; + + return false; + } + + bool operator==(const TConstUnion& constant) const + { + if (constant.type != type) + return false; + + switch (type) { + case EbtInt: + if (constant.iConst == iConst) + return true; + + break; + case EbtUint: + if (constant.uConst == uConst) + return true; + + break; + case EbtBool: + if (constant.bConst == bConst) + return true; + + break; + case EbtDouble: + if (constant.dConst == dConst) + return true; + + break; + +#ifndef GLSLANG_WEB + case EbtInt16: + if (constant.i16Const == i16Const) + return true; + + break; + case EbtUint16: + if (constant.u16Const == u16Const) + return true; + + break; + case EbtInt8: + if (constant.i8Const == i8Const) + return true; + + break; + case EbtUint8: + if (constant.u8Const == u8Const) + return true; + + break; + case EbtInt64: + if (constant.i64Const == i64Const) + return true; + + break; + case EbtUint64: + if (constant.u64Const == u64Const) + return true; + + break; +#endif + default: + assert(false && "Default missing"); + } + + return false; + } + + bool operator!=(const signed char i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned char u) const + { + return !operator==(u); + } + + bool operator!=(const signed short i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned short u) const + { + return !operator==(u); + } + + bool operator!=(const int i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned int u) const + { + return !operator==(u); + } + + bool operator!=(const long long i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned long long u) const + { + return !operator==(u); + } + + bool operator!=(const float f) const + { + return !operator==(f); + } + + bool operator!=(const bool b) const + { + return !operator==(b); + } + + bool operator!=(const TConstUnion& constant) const + { + return !operator==(constant); + } + + bool operator>(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { + case EbtInt: + if (iConst > constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst > constant.uConst) + return true; + + return false; + case EbtDouble: + if (dConst > constant.dConst) + return true; + + return false; +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const > constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const > constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const > constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const > constant.u16Const) + return true; + + return false; + case EbtInt64: + if (i64Const > constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const > constant.u64Const) + return true; + + return false; +#endif + default: + assert(false && "Default missing"); + return false; + } + } + + bool operator<(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const < constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const < constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const < constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const < constant.u16Const) + return true; + return false; + case EbtInt64: + if (i64Const < constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const < constant.u64Const) + return true; + + return false; +#endif + case EbtDouble: + if (dConst < constant.dConst) + return true; + + return false; + case EbtInt: + if (iConst < constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst < constant.uConst) + return true; + + return false; + default: + assert(false && "Default missing"); + return false; + } + } + + TConstUnion operator+(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst + constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst + constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst + constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const + constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const + constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const + constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const + constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const + constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const + constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator-(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst - constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst - constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst - constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const - constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const - constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const - constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const - constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const - constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const - constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator*(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst * constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst * constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst * constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const * constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const * constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const * constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const * constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const * constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const * constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator%(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst % constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst % constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const % constant.i8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const % constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const % constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const % constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const % constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const % constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator>>(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const >> constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const >> constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const >> constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const >> constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const >> constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const >> constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const >> constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const >> constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const >> constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const >> constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const >> constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const >> constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst >> constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst >> constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst >> constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst >> constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst >> constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst >> constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst >> constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst >> constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst >> constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst >> constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst >> constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst >> constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; +#ifndef GLSLANG_WEB + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const >> constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const >> constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const >> constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const >> constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const >> constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const >> constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator<<(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const << constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const << constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const << constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const << constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const << constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const << constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const << constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const << constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const << constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const << constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const << constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const << constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const << constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const << constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const << constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const << constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const << constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const << constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const << constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const << constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const << constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const << constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const << constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const << constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const << constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const << constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const << constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const << constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const << constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const << constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const << constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const << constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const << constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const << constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const << constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const << constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const << constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const << constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const << constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const << constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const << constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const << constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst << constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst << constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst << constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst << constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst << constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst << constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst << constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst << constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst << constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst << constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst << constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst << constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst & constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst & constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const & constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const & constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const & constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const & constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const & constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const & constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator|(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst | constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst | constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const | constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const | constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const | constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const | constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const | constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const | constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator^(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst ^ constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst ^ constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const ^ constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const ^ constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const ^ constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const ^ constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const ^ constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const ^ constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator~() const + { + TConstUnion returnValue; + switch (type) { + case EbtInt: returnValue.setIConst(~iConst); break; + case EbtUint: returnValue.setUConst(~uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(~i8Const); break; + case EbtUint8: returnValue.setU8Const(~u8Const); break; + case EbtInt16: returnValue.setI16Const(~i16Const); break; + case EbtUint16: returnValue.setU16Const(~u16Const); break; + case EbtInt64: returnValue.setI64Const(~i64Const); break; + case EbtUint64: returnValue.setU64Const(~u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst && constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator||(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst || constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TBasicType getType() const { return type; } + +private: + union { + signed char i8Const; // used for i8vec, scalar int8s + unsigned char u8Const; // used for u8vec, scalar uint8s + signed short i16Const; // used for i16vec, scalar int16s + unsigned short u16Const; // used for u16vec, scalar uint16s + int iConst; // used for ivec, scalar ints + unsigned int uConst; // used for uvec, scalar uints + long long i64Const; // used for i64vec, scalar int64s + unsigned long long u64Const; // used for u64vec, scalar uint64s + bool bConst; // used for bvec, scalar bools + double dConst; // used for vec, dvec, mat, dmat, scalar floats and doubles + const TString* sConst; // string constant + }; + + TBasicType type; +}; + +// Encapsulate having a pointer to an array of TConstUnion, +// which only needs to be allocated if its size is going to be +// bigger than 0. +// +// One convenience is being able to use [] to go inside the array, instead +// of C++ assuming it as an array of pointers to vectors. +// +// General usage is that the size is known up front, and it is +// created once with the proper size. +// +class TConstUnionArray { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnionArray() : unionArray(nullptr) { } + virtual ~TConstUnionArray() { } + + explicit TConstUnionArray(int size) + { + if (size == 0) + unionArray = nullptr; + else + unionArray = new TConstUnionVector(size); + } + TConstUnionArray(const TConstUnionArray& a) = default; + TConstUnionArray(const TConstUnionArray& a, int start, int size) + { + unionArray = new TConstUnionVector(size); + for (int i = 0; i < size; ++i) + (*unionArray)[i] = a[start + i]; + } + + // Use this constructor for a smear operation + TConstUnionArray(int size, const TConstUnion& val) + { + unionArray = new TConstUnionVector(size, val); + } + + int size() const { return unionArray ? (int)unionArray->size() : 0; } + TConstUnion& operator[](size_t index) { return (*unionArray)[index]; } + const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; } + bool operator==(const TConstUnionArray& rhs) const + { + // this includes the case that both are unallocated + if (unionArray == rhs.unionArray) + return true; + + if (! unionArray || ! rhs.unionArray) + return false; + + return *unionArray == *rhs.unionArray; + } + bool operator!=(const TConstUnionArray& rhs) const { return ! operator==(rhs); } + + double dot(const TConstUnionArray& rhs) + { + assert(rhs.unionArray->size() == unionArray->size()); + double sum = 0.0; + + for (size_t comp = 0; comp < unionArray->size(); ++comp) + sum += (*this)[comp].getDConst() * rhs[comp].getDConst(); + + return sum; + } + + bool empty() const { return unionArray == nullptr; } + +protected: + typedef TVector TConstUnionVector; + TConstUnionVector* unionArray; +}; + +} // end namespace glslang + +#endif // _CONSTANT_UNION_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/InfoSink.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/InfoSink.h new file mode 100644 index 0000000..dceb603 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/InfoSink.h @@ -0,0 +1,144 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INFOSINK_INCLUDED_ +#define _INFOSINK_INCLUDED_ + +#include "../Include/Common.h" +#include + +namespace glslang { + +// +// TPrefixType is used to centralize how info log messages start. +// See below. +// +enum TPrefixType { + EPrefixNone, + EPrefixWarning, + EPrefixError, + EPrefixInternalError, + EPrefixUnimplemented, + EPrefixNote +}; + +enum TOutputStream { + ENull = 0, + EDebugger = 0x01, + EStdOut = 0x02, + EString = 0x04, +}; +// +// Encapsulate info logs for all objects that have them. +// +// The methods are a general set of tools for getting a variety of +// messages and types inserted into the log. +// +class TInfoSinkBase { +public: + TInfoSinkBase() : outputStream(4) {} + void erase() { sink.erase(); } + TInfoSinkBase& operator<<(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(char c) { append(1, c); return *this; } + TInfoSinkBase& operator<<(const char* s) { append(s); return *this; } + TInfoSinkBase& operator<<(int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(unsigned int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(float n) { const int size = 40; char buf[size]; + snprintf(buf, size, (fabs(n) > 1e-8 && fabs(n) < 1e8) || n == 0.0f ? "%f" : "%g", n); + append(buf); + return *this; } + TInfoSinkBase& operator+(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const char* s) { append(s); return *this; } + const char* c_str() const { return sink.c_str(); } + void prefix(TPrefixType message) { + switch(message) { + case EPrefixNone: break; + case EPrefixWarning: append("WARNING: "); break; + case EPrefixError: append("ERROR: "); break; + case EPrefixInternalError: append("INTERNAL ERROR: "); break; + case EPrefixUnimplemented: append("UNIMPLEMENTED: "); break; + case EPrefixNote: append("NOTE: "); break; + default: append("UNKNOWN ERROR: "); break; + } + } + void location(const TSourceLoc& loc) { + const int maxSize = 24; + char locText[maxSize]; + snprintf(locText, maxSize, ":%d", loc.line); + append(loc.getStringNameOrNum(false).c_str()); + append(locText); + append(": "); + } + void message(TPrefixType message, const char* s) { + prefix(message); + append(s); + append("\n"); + } + void message(TPrefixType message, const char* s, const TSourceLoc& loc) { + prefix(message); + location(loc); + append(s); + append("\n"); + } + + void setOutputStream(int output = 4) + { + outputStream = output; + } + +protected: + void append(const char* s); + + void append(int count, char c); + void append(const TPersistString& t); + void append(const TString& t); + + void checkMem(size_t growth) { if (sink.capacity() < sink.size() + growth + 2) + sink.reserve(sink.capacity() + sink.capacity() / 2); } + void appendToStream(const char* s); + TPersistString sink; + int outputStream; +}; + +} // end namespace glslang + +class TInfoSink { +public: + glslang::TInfoSinkBase info; + glslang::TInfoSinkBase debug; +}; + +#endif // _INFOSINK_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/InitializeGlobals.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/InitializeGlobals.h new file mode 100644 index 0000000..95d0a40 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/InitializeGlobals.h @@ -0,0 +1,44 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef __INITIALIZE_GLOBALS_INCLUDED_ +#define __INITIALIZE_GLOBALS_INCLUDED_ + +namespace glslang { + +bool InitializePoolIndex(); + +} // end namespace glslang + +#endif // __INITIALIZE_GLOBALS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/PoolAlloc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/PoolAlloc.h new file mode 100644 index 0000000..1f5cac7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/PoolAlloc.h @@ -0,0 +1,318 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _POOLALLOC_INCLUDED_ +#define _POOLALLOC_INCLUDED_ + +#ifdef _DEBUG +# define GUARD_BLOCKS // define to enable guard block sanity checking +#endif + +// +// This header defines an allocator that can be used to efficiently +// allocate a large number of small requests for heap memory, with the +// intention that they are not individually deallocated, but rather +// collectively deallocated at one time. +// +// This simultaneously +// +// * Makes each individual allocation much more efficient; the +// typical allocation is trivial. +// * Completely avoids the cost of doing individual deallocation. +// * Saves the trouble of tracking down and plugging a large class of leaks. +// +// Individual classes can use this allocator by supplying their own +// new and delete methods. +// +// STL containers can use this allocator by using the pool_allocator +// class as the allocator (second) template argument. +// + +#include +#include +#include + +namespace glslang { + +// If we are using guard blocks, we must track each individual +// allocation. If we aren't using guard blocks, these +// never get instantiated, so won't have any impact. +// + +class TAllocation { +public: + TAllocation(size_t size, unsigned char* mem, TAllocation* prev = 0) : + size(size), mem(mem), prevAlloc(prev) { + // Allocations are bracketed: + // [allocationHeader][initialGuardBlock][userData][finalGuardBlock] + // This would be cleaner with if (guardBlockSize)..., but that + // makes the compiler print warnings about 0 length memsets, + // even with the if() protecting them. +# ifdef GUARD_BLOCKS + memset(preGuard(), guardBlockBeginVal, guardBlockSize); + memset(data(), userDataFill, size); + memset(postGuard(), guardBlockEndVal, guardBlockSize); +# endif + } + + void check() const { + checkGuardBlock(preGuard(), guardBlockBeginVal, "before"); + checkGuardBlock(postGuard(), guardBlockEndVal, "after"); + } + + void checkAllocList() const; + + // Return total size needed to accommodate user buffer of 'size', + // plus our tracking data. + inline static size_t allocationSize(size_t size) { + return size + 2 * guardBlockSize + headerSize(); + } + + // Offset from surrounding buffer to get to user data buffer. + inline static unsigned char* offsetAllocation(unsigned char* m) { + return m + guardBlockSize + headerSize(); + } + +private: + void checkGuardBlock(unsigned char* blockMem, unsigned char val, const char* locText) const; + + // Find offsets to pre and post guard blocks, and user data buffer + unsigned char* preGuard() const { return mem + headerSize(); } + unsigned char* data() const { return preGuard() + guardBlockSize; } + unsigned char* postGuard() const { return data() + size; } + + size_t size; // size of the user data area + unsigned char* mem; // beginning of our allocation (pts to header) + TAllocation* prevAlloc; // prior allocation in the chain + + const static unsigned char guardBlockBeginVal; + const static unsigned char guardBlockEndVal; + const static unsigned char userDataFill; + + const static size_t guardBlockSize; +# ifdef GUARD_BLOCKS + inline static size_t headerSize() { return sizeof(TAllocation); } +# else + inline static size_t headerSize() { return 0; } +# endif +}; + +// +// There are several stacks. One is to track the pushing and popping +// of the user, and not yet implemented. The others are simply a +// repositories of free pages or used pages. +// +// Page stacks are linked together with a simple header at the beginning +// of each allocation obtained from the underlying OS. Multi-page allocations +// are returned to the OS. Individual page allocations are kept for future +// re-use. +// +// The "page size" used is not, nor must it match, the underlying OS +// page size. But, having it be about that size or equal to a set of +// pages is likely most optimal. +// +class TPoolAllocator { +public: + TPoolAllocator(int growthIncrement = 8*1024, int allocationAlignment = 16); + + // + // Don't call the destructor just to free up the memory, call pop() + // + ~TPoolAllocator(); + + // + // Call push() to establish a new place to pop memory too. Does not + // have to be called to get things started. + // + void push(); + + // + // Call pop() to free all memory allocated since the last call to push(), + // or if no last call to push, frees all memory since first allocation. + // + void pop(); + + // + // Call popAll() to free all memory allocated. + // + void popAll(); + + // + // Call allocate() to actually acquire memory. Returns 0 if no memory + // available, otherwise a properly aligned pointer to 'numBytes' of memory. + // + void* allocate(size_t numBytes); + + // + // There is no deallocate. The point of this class is that + // deallocation can be skipped by the user of it, as the model + // of use is to simultaneously deallocate everything at once + // by calling pop(), and to not have to solve memory leak problems. + // + +protected: + friend struct tHeader; + + struct tHeader { + tHeader(tHeader* nextPage, size_t pageCount) : +#ifdef GUARD_BLOCKS + lastAllocation(0), +#endif + nextPage(nextPage), pageCount(pageCount) { } + + ~tHeader() { +#ifdef GUARD_BLOCKS + if (lastAllocation) + lastAllocation->checkAllocList(); +#endif + } + +#ifdef GUARD_BLOCKS + TAllocation* lastAllocation; +#endif + tHeader* nextPage; + size_t pageCount; + }; + + struct tAllocState { + size_t offset; + tHeader* page; + }; + typedef std::vector tAllocStack; + + // Track allocations if and only if we're using guard blocks +#ifndef GUARD_BLOCKS + void* initializeAllocation(tHeader*, unsigned char* memory, size_t) { +#else + void* initializeAllocation(tHeader* block, unsigned char* memory, size_t numBytes) { + new(memory) TAllocation(numBytes, memory, block->lastAllocation); + block->lastAllocation = reinterpret_cast(memory); +#endif + + // This is optimized entirely away if GUARD_BLOCKS is not defined. + return TAllocation::offsetAllocation(memory); + } + + size_t pageSize; // granularity of allocation from the OS + size_t alignment; // all returned allocations will be aligned at + // this granularity, which will be a power of 2 + size_t alignmentMask; + size_t headerSkip; // amount of memory to skip to make room for the + // header (basically, size of header, rounded + // up to make it aligned + size_t currentPageOffset; // next offset in top of inUseList to allocate from + tHeader* freeList; // list of popped memory + tHeader* inUseList; // list of all memory currently being used + tAllocStack stack; // stack of where to allocate from, to partition pool + + int numCalls; // just an interesting statistic + size_t totalBytes; // just an interesting statistic +private: + TPoolAllocator& operator=(const TPoolAllocator&); // don't allow assignment operator + TPoolAllocator(const TPoolAllocator&); // don't allow default copy constructor +}; + +// +// There could potentially be many pools with pops happening at +// different times. But a simple use is to have a global pop +// with everyone using the same global allocator. +// +extern TPoolAllocator& GetThreadPoolAllocator(); +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); + +// +// This STL compatible allocator is intended to be used as the allocator +// parameter to templatized STL containers, like vector and map. +// +// It will use the pools for allocation, and not +// do any deallocation, but will still do destruction. +// +template +class pool_allocator { +public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + template + struct rebind { + typedef pool_allocator other; + }; + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pool_allocator() : allocator(GetThreadPoolAllocator()) { } + pool_allocator(TPoolAllocator& a) : allocator(a) { } + pool_allocator(const pool_allocator& p) : allocator(p.allocator) { } + + template + pool_allocator(const pool_allocator& p) : allocator(p.getAllocator()) { } + + pointer allocate(size_type n) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + pointer allocate(size_type n, const void*) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + + void deallocate(void*, size_type) { } + void deallocate(pointer, size_type) { } + + pointer _Charalloc(size_t n) { + return reinterpret_cast(getAllocator().allocate(n)); } + + void construct(pointer p, const T& val) { new ((void *)p) T(val); } + void destroy(pointer p) { p->T::~T(); } + + bool operator==(const pool_allocator& rhs) const { return &getAllocator() == &rhs.getAllocator(); } + bool operator!=(const pool_allocator& rhs) const { return &getAllocator() != &rhs.getAllocator(); } + + size_type max_size() const { return static_cast(-1) / sizeof(T); } + size_type max_size(int size) const { return static_cast(-1) / size; } + + TPoolAllocator& getAllocator() const { return allocator; } + + pool_allocator select_on_container_copy_construction() const { return pool_allocator{}; } + +protected: + pool_allocator& operator=(const pool_allocator&) { return *this; } + TPoolAllocator& allocator; +}; + +} // end namespace glslang + +#endif // _POOLALLOC_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ResourceLimits.h new file mode 100644 index 0000000..b36c8d6 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ResourceLimits.h @@ -0,0 +1,159 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _RESOURCE_LIMITS_INCLUDED_ +#define _RESOURCE_LIMITS_INCLUDED_ + +struct TLimits { + bool nonInductiveForLoops; + bool whileLoops; + bool doWhileLoops; + bool generalUniformIndexing; + bool generalAttributeMatrixVectorIndexing; + bool generalVaryingIndexing; + bool generalSamplerIndexing; + bool generalVariableIndexing; + bool generalConstantMatrixVectorIndexing; +}; + +struct TBuiltInResource { + int maxLights; + int maxClipPlanes; + int maxTextureUnits; + int maxTextureCoords; + int maxVertexAttribs; + int maxVertexUniformComponents; + int maxVaryingFloats; + int maxVertexTextureImageUnits; + int maxCombinedTextureImageUnits; + int maxTextureImageUnits; + int maxFragmentUniformComponents; + int maxDrawBuffers; + int maxVertexUniformVectors; + int maxVaryingVectors; + int maxFragmentUniformVectors; + int maxVertexOutputVectors; + int maxFragmentInputVectors; + int minProgramTexelOffset; + int maxProgramTexelOffset; + int maxClipDistances; + int maxComputeWorkGroupCountX; + int maxComputeWorkGroupCountY; + int maxComputeWorkGroupCountZ; + int maxComputeWorkGroupSizeX; + int maxComputeWorkGroupSizeY; + int maxComputeWorkGroupSizeZ; + int maxComputeUniformComponents; + int maxComputeTextureImageUnits; + int maxComputeImageUniforms; + int maxComputeAtomicCounters; + int maxComputeAtomicCounterBuffers; + int maxVaryingComponents; + int maxVertexOutputComponents; + int maxGeometryInputComponents; + int maxGeometryOutputComponents; + int maxFragmentInputComponents; + int maxImageUnits; + int maxCombinedImageUnitsAndFragmentOutputs; + int maxCombinedShaderOutputResources; + int maxImageSamples; + int maxVertexImageUniforms; + int maxTessControlImageUniforms; + int maxTessEvaluationImageUniforms; + int maxGeometryImageUniforms; + int maxFragmentImageUniforms; + int maxCombinedImageUniforms; + int maxGeometryTextureImageUnits; + int maxGeometryOutputVertices; + int maxGeometryTotalOutputComponents; + int maxGeometryUniformComponents; + int maxGeometryVaryingComponents; + int maxTessControlInputComponents; + int maxTessControlOutputComponents; + int maxTessControlTextureImageUnits; + int maxTessControlUniformComponents; + int maxTessControlTotalOutputComponents; + int maxTessEvaluationInputComponents; + int maxTessEvaluationOutputComponents; + int maxTessEvaluationTextureImageUnits; + int maxTessEvaluationUniformComponents; + int maxTessPatchComponents; + int maxPatchVertices; + int maxTessGenLevel; + int maxViewports; + int maxVertexAtomicCounters; + int maxTessControlAtomicCounters; + int maxTessEvaluationAtomicCounters; + int maxGeometryAtomicCounters; + int maxFragmentAtomicCounters; + int maxCombinedAtomicCounters; + int maxAtomicCounterBindings; + int maxVertexAtomicCounterBuffers; + int maxTessControlAtomicCounterBuffers; + int maxTessEvaluationAtomicCounterBuffers; + int maxGeometryAtomicCounterBuffers; + int maxFragmentAtomicCounterBuffers; + int maxCombinedAtomicCounterBuffers; + int maxAtomicCounterBufferSize; + int maxTransformFeedbackBuffers; + int maxTransformFeedbackInterleavedComponents; + int maxCullDistances; + int maxCombinedClipAndCullDistances; + int maxSamples; + int maxMeshOutputVerticesNV; + int maxMeshOutputPrimitivesNV; + int maxMeshWorkGroupSizeX_NV; + int maxMeshWorkGroupSizeY_NV; + int maxMeshWorkGroupSizeZ_NV; + int maxTaskWorkGroupSizeX_NV; + int maxTaskWorkGroupSizeY_NV; + int maxTaskWorkGroupSizeZ_NV; + int maxMeshViewCountNV; + int maxMeshOutputVerticesEXT; + int maxMeshOutputPrimitivesEXT; + int maxMeshWorkGroupSizeX_EXT; + int maxMeshWorkGroupSizeY_EXT; + int maxMeshWorkGroupSizeZ_EXT; + int maxTaskWorkGroupSizeX_EXT; + int maxTaskWorkGroupSizeY_EXT; + int maxTaskWorkGroupSizeZ_EXT; + int maxMeshViewCountEXT; + int maxDualSourceDrawBuffersEXT; + + TLimits limits; +}; + +#endif // _RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ShHandle.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ShHandle.h new file mode 100644 index 0000000..df07bd8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/ShHandle.h @@ -0,0 +1,176 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SHHANDLE_INCLUDED_ +#define _SHHANDLE_INCLUDED_ + +// +// Machine independent part of the compiler private objects +// sent as ShHandle to the driver. +// +// This should not be included by driver code. +// + +#define SH_EXPORTING +#include "../Public/ShaderLang.h" +#include "../MachineIndependent/Versions.h" +#include "InfoSink.h" + +class TCompiler; +class TLinker; +class TUniformMap; + +// +// The base class used to back handles returned to the driver. +// +class TShHandleBase { +public: + TShHandleBase() { pool = new glslang::TPoolAllocator; } + virtual ~TShHandleBase() { delete pool; } + virtual TCompiler* getAsCompiler() { return 0; } + virtual TLinker* getAsLinker() { return 0; } + virtual TUniformMap* getAsUniformMap() { return 0; } + virtual glslang::TPoolAllocator* getPool() const { return pool; } +private: + glslang::TPoolAllocator* pool; +}; + +// +// The base class for the machine dependent linker to derive from +// for managing where uniforms live. +// +class TUniformMap : public TShHandleBase { +public: + TUniformMap() { } + virtual ~TUniformMap() { } + virtual TUniformMap* getAsUniformMap() { return this; } + virtual int getLocation(const char* name) = 0; + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink infoSink; +}; + +class TIntermNode; + +// +// The base class for the machine dependent compiler to derive from +// for managing object code from the compile. +// +class TCompiler : public TShHandleBase { +public: + TCompiler(EShLanguage l, TInfoSink& sink) : infoSink(sink) , language(l), haveValidObjectCode(false) { } + virtual ~TCompiler() { } + EShLanguage getLanguage() { return language; } + virtual TInfoSink& getInfoSink() { return infoSink; } + + virtual bool compile(TIntermNode* root, int version = 0, EProfile profile = ENoProfile) = 0; + + virtual TCompiler* getAsCompiler() { return this; } + virtual bool linkable() { return haveValidObjectCode; } + + TInfoSink& infoSink; +protected: + TCompiler& operator=(TCompiler&); + + EShLanguage language; + bool haveValidObjectCode; +}; + +// +// Link operations are based on a list of compile results... +// +typedef glslang::TVector TCompilerList; +typedef glslang::TVector THandleList; + +// +// The base class for the machine dependent linker to derive from +// to manage the resulting executable. +// + +class TLinker : public TShHandleBase { +public: + TLinker(EShExecutable e, TInfoSink& iSink) : + infoSink(iSink), + executable(e), + haveReturnableObjectCode(false), + appAttributeBindings(0), + fixedAttributeBindings(0), + excludedAttributes(0), + excludedCount(0), + uniformBindings(0) { } + virtual TLinker* getAsLinker() { return this; } + virtual ~TLinker() { } + virtual bool link(TCompilerList&, TUniformMap*) = 0; + virtual bool link(THandleList&) { return false; } + virtual void setAppAttributeBindings(const ShBindingTable* t) { appAttributeBindings = t; } + virtual void setFixedAttributeBindings(const ShBindingTable* t) { fixedAttributeBindings = t; } + virtual void getAttributeBindings(ShBindingTable const **t) const = 0; + virtual void setExcludedAttributes(const int* attributes, int count) { excludedAttributes = attributes; excludedCount = count; } + virtual ShBindingTable* getUniformBindings() const { return uniformBindings; } + virtual const void* getObjectCode() const { return 0; } // a real compiler would be returning object code here + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink& infoSink; +protected: + TLinker& operator=(TLinker&); + EShExecutable executable; + bool haveReturnableObjectCode; // true when objectCode is acceptable to send to driver + + const ShBindingTable* appAttributeBindings; + const ShBindingTable* fixedAttributeBindings; + const int* excludedAttributes; + int excludedCount; + ShBindingTable* uniformBindings; // created by the linker +}; + +// +// This is the interface between the machine independent code +// and the machine dependent code. +// +// The machine dependent code should derive from the classes +// above. Then Construct*() and Delete*() will create and +// destroy the machine dependent objects, which contain the +// above machine independent information. +// +TCompiler* ConstructCompiler(EShLanguage, int); + +TShHandleBase* ConstructLinker(EShExecutable, int); +TShHandleBase* ConstructBindings(); +void DeleteLinker(TShHandleBase*); +void DeleteBindingList(TShHandleBase* bindingList); + +TUniformMap* ConstructUniformMap(); +void DeleteCompiler(TCompiler*); + +void DeleteUniformMap(TUniformMap*); + +#endif // _SHHANDLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/SpirvIntrinsics.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/SpirvIntrinsics.h new file mode 100644 index 0000000..3c7d72c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/SpirvIntrinsics.h @@ -0,0 +1,128 @@ +// +// Copyright(C) 2021 Advanced Micro Devices, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#ifndef GLSLANG_WEB + +// +// GL_EXT_spirv_intrinsics +// +#include "Common.h" + +namespace glslang { + +class TIntermTyped; +class TIntermConstantUnion; +class TType; + +// SPIR-V requirements +struct TSpirvRequirement { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // capability = [..] + TSet extensions; + // extension = [..] + TSet capabilities; +}; + +// SPIR-V execution modes +struct TSpirvExecutionMode { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_execution_mode + TMap> modes; + // spirv_execution_mode_id + TMap > modeIds; +}; + +// SPIR-V decorations +struct TSpirvDecorate { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_decorate + TMap > decorates; + // spirv_decorate_id + TMap> decorateIds; + // spirv_decorate_string + TMap > decorateStrings; +}; + +// SPIR-V instruction +struct TSpirvInstruction { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvInstruction() { set = ""; id = -1; } + + bool operator==(const TSpirvInstruction& rhs) const { return set == rhs.set && id == rhs.id; } + bool operator!=(const TSpirvInstruction& rhs) const { return !operator==(rhs); } + + // spirv_instruction + TString set; + int id; +}; + +// SPIR-V type parameter +struct TSpirvTypeParameter { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvTypeParameter(const TIntermConstantUnion* arg) { constant = arg; } + + bool operator==(const TSpirvTypeParameter& rhs) const { return constant == rhs.constant; } + bool operator!=(const TSpirvTypeParameter& rhs) const { return !operator==(rhs); } + + const TIntermConstantUnion* constant; +}; + +typedef TVector TSpirvTypeParameters; + +// SPIR-V type +struct TSpirvType { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + bool operator==(const TSpirvType& rhs) const + { + return spirvInst == rhs.spirvInst && typeParams == rhs.typeParams; + } + bool operator!=(const TSpirvType& rhs) const { return !operator==(rhs); } + + // spirv_type + TSpirvInstruction spirvInst; + TSpirvTypeParameters typeParams; +}; + +} // end namespace glslang + +#endif // GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/Types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/Types.h new file mode 100644 index 0000000..a6f47e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/Types.h @@ -0,0 +1,2901 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2015-2016 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _TYPES_INCLUDED +#define _TYPES_INCLUDED + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" +#include "../Public/ShaderLang.h" +#include "arrays.h" +#include "SpirvIntrinsics.h" + +#include + +namespace glslang { + +class TIntermAggregate; + +const int GlslangMaxTypeLength = 200; // TODO: need to print block/struct one member per line, so this can stay bounded + +const char* const AnonymousPrefix = "anon@"; // for something like a block whose members can be directly accessed +inline bool IsAnonymous(const TString& name) +{ + return name.compare(0, 5, AnonymousPrefix) == 0; +} + +// +// Details within a sampler type +// +enum TSamplerDim { + EsdNone, + Esd1D, + Esd2D, + Esd3D, + EsdCube, + EsdRect, + EsdBuffer, + EsdSubpass, // goes only with non-sampled image (image is true) + EsdNumDims +}; + +struct TSampler { // misnomer now; includes images, textures without sampler, and textures with sampler + TBasicType type : 8; // type returned by sampler + TSamplerDim dim : 8; + bool arrayed : 1; + bool shadow : 1; + bool ms : 1; + bool image : 1; // image, combined should be false + bool combined : 1; // true means texture is combined with a sampler, false means texture with no sampler + bool sampler : 1; // true means a pure sampler, other fields should be clear() + +#ifdef GLSLANG_WEB + bool is1D() const { return false; } + bool isBuffer() const { return false; } + bool isRect() const { return false; } + bool isSubpass() const { return false; } + bool isCombined() const { return true; } + bool isImage() const { return false; } + bool isImageClass() const { return false; } + bool isMultiSample() const { return false; } + bool isExternal() const { return false; } + void setExternal(bool e) { } + bool isYuv() const { return false; } +#else + unsigned int vectorSize : 3; // vector return type size. + // Some languages support structures as sample results. Storing the whole structure in the + // TSampler is too large, so there is an index to a separate table. + static const unsigned structReturnIndexBits = 4; // number of index bits to use. + static const unsigned structReturnSlots = (1< TTypeList; + +typedef TVector TIdentifierList; + +// +// Following are a series of helper enums for managing layouts and qualifiers, +// used for TPublicType, TType, others. +// + +enum TLayoutPacking { + ElpNone, + ElpShared, // default, but different than saying nothing + ElpStd140, + ElpStd430, + ElpPacked, + ElpScalar, + ElpCount // If expanding, see bitfield width below +}; + +enum TLayoutMatrix { + ElmNone, + ElmRowMajor, + ElmColumnMajor, // default, but different than saying nothing + ElmCount // If expanding, see bitfield width below +}; + +// Union of geometry shader and tessellation shader geometry types. +// They don't go into TType, but rather have current state per shader or +// active parser type (TPublicType). +enum TLayoutGeometry { + ElgNone, + ElgPoints, + ElgLines, + ElgLinesAdjacency, + ElgLineStrip, + ElgTriangles, + ElgTrianglesAdjacency, + ElgTriangleStrip, + ElgQuads, + ElgIsolines, +}; + +enum TVertexSpacing { + EvsNone, + EvsEqual, + EvsFractionalEven, + EvsFractionalOdd +}; + +enum TVertexOrder { + EvoNone, + EvoCw, + EvoCcw +}; + +// Note: order matters, as type of format is done by comparison. +enum TLayoutFormat { + ElfNone, + + // Float image + ElfRgba32f, + ElfRgba16f, + ElfR32f, + ElfRgba8, + ElfRgba8Snorm, + + ElfEsFloatGuard, // to help with comparisons + + ElfRg32f, + ElfRg16f, + ElfR11fG11fB10f, + ElfR16f, + ElfRgba16, + ElfRgb10A2, + ElfRg16, + ElfRg8, + ElfR16, + ElfR8, + ElfRgba16Snorm, + ElfRg16Snorm, + ElfRg8Snorm, + ElfR16Snorm, + ElfR8Snorm, + + ElfFloatGuard, // to help with comparisons + + // Int image + ElfRgba32i, + ElfRgba16i, + ElfRgba8i, + ElfR32i, + + ElfEsIntGuard, // to help with comparisons + + ElfRg32i, + ElfRg16i, + ElfRg8i, + ElfR16i, + ElfR8i, + ElfR64i, + + ElfIntGuard, // to help with comparisons + + // Uint image + ElfRgba32ui, + ElfRgba16ui, + ElfRgba8ui, + ElfR32ui, + + ElfEsUintGuard, // to help with comparisons + + ElfRg32ui, + ElfRg16ui, + ElfRgb10a2ui, + ElfRg8ui, + ElfR16ui, + ElfR8ui, + ElfR64ui, + + ElfCount +}; + +enum TLayoutDepth { + EldNone, + EldAny, + EldGreater, + EldLess, + EldUnchanged, + + EldCount +}; + +enum TLayoutStencil { + ElsNone, + ElsRefUnchangedFrontAMD, + ElsRefGreaterFrontAMD, + ElsRefLessFrontAMD, + ElsRefUnchangedBackAMD, + ElsRefGreaterBackAMD, + ElsRefLessBackAMD, + + ElsCount +}; + +enum TBlendEquationShift { + // No 'EBlendNone': + // These are used as bit-shift amounts. A mask of such shifts will have type 'int', + // and in that space, 0 means no bits set, or none. In this enum, 0 means (1 << 0), a bit is set. + EBlendMultiply, + EBlendScreen, + EBlendOverlay, + EBlendDarken, + EBlendLighten, + EBlendColordodge, + EBlendColorburn, + EBlendHardlight, + EBlendSoftlight, + EBlendDifference, + EBlendExclusion, + EBlendHslHue, + EBlendHslSaturation, + EBlendHslColor, + EBlendHslLuminosity, + EBlendAllEquations, + + EBlendCount +}; + +enum TInterlockOrdering { + EioNone, + EioPixelInterlockOrdered, + EioPixelInterlockUnordered, + EioSampleInterlockOrdered, + EioSampleInterlockUnordered, + EioShadingRateInterlockOrdered, + EioShadingRateInterlockUnordered, + + EioCount, +}; + +enum TShaderInterface +{ + // Includes both uniform blocks and buffer blocks + EsiUniform = 0, + EsiInput, + EsiOutput, + EsiNone, + + EsiCount +}; + +class TQualifier { +public: + static const int layoutNotSet = -1; + + void clear() + { + precision = EpqNone; + invariant = false; + makeTemporary(); + declaredBuiltIn = EbvNone; +#ifndef GLSLANG_WEB + noContraction = false; + nullInit = false; + spirvByReference = false; + spirvLiteral = false; +#endif + defaultBlock = false; + } + + // drop qualifiers that don't belong in a temporary variable + void makeTemporary() + { + semanticName = nullptr; + storage = EvqTemporary; + builtIn = EbvNone; + clearInterstage(); + clearMemory(); + specConstant = false; + nonUniform = false; + nullInit = false; + defaultBlock = false; + clearLayout(); +#ifndef GLSLANG_WEB + spirvStorageClass = -1; + spirvDecorate = nullptr; + spirvByReference = false; + spirvLiteral = false; +#endif + } + + void clearInterstage() + { + clearInterpolation(); +#ifndef GLSLANG_WEB + patch = false; + sample = false; +#endif + } + + void clearInterpolation() + { + centroid = false; + smooth = false; + flat = false; +#ifndef GLSLANG_WEB + nopersp = false; + explicitInterp = false; + pervertexNV = false; + perPrimitiveNV = false; + perViewNV = false; + perTaskNV = false; +#endif + pervertexEXT = false; + } + + void clearMemory() + { +#ifndef GLSLANG_WEB + coherent = false; + devicecoherent = false; + queuefamilycoherent = false; + workgroupcoherent = false; + subgroupcoherent = false; + shadercallcoherent = false; + nonprivate = false; + volatil = false; + restrict = false; + readonly = false; + writeonly = false; +#endif + } + + const char* semanticName; + TStorageQualifier storage : 6; + TBuiltInVariable builtIn : 9; + TBuiltInVariable declaredBuiltIn : 9; + static_assert(EbvLast < 256, "need to increase size of TBuiltInVariable bitfields!"); + TPrecisionQualifier precision : 3; + bool invariant : 1; // require canonical treatment for cross-shader invariance + bool centroid : 1; + bool smooth : 1; + bool flat : 1; + // having a constant_id is not sufficient: expressions have no id, but are still specConstant + bool specConstant : 1; + bool nonUniform : 1; + bool explicitOffset : 1; + bool defaultBlock : 1; // default blocks with matching names have structures merged when linking + +#ifdef GLSLANG_WEB + bool isWriteOnly() const { return false; } + bool isReadOnly() const { return false; } + bool isRestrict() const { return false; } + bool isCoherent() const { return false; } + bool isVolatile() const { return false; } + bool isSample() const { return false; } + bool isMemory() const { return false; } + bool isMemoryQualifierImageAndSSBOOnly() const { return false; } + bool bufferReferenceNeedsVulkanMemoryModel() const { return false; } + bool isInterpolation() const { return flat || smooth; } + bool isExplicitInterpolation() const { return false; } + bool isAuxiliary() const { return centroid; } + bool isPatch() const { return false; } + bool isNoContraction() const { return false; } + void setNoContraction() { } + bool isPervertexNV() const { return false; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() {} + bool isNullInit() const { return false; } + void setSpirvByReference() { } + bool isSpirvByReference() { return false; } + void setSpirvLiteral() { } + bool isSpirvLiteral() { return false; } +#else + bool noContraction: 1; // prevent contraction and reassociation, e.g., for 'precise' keyword, and expressions it affects + bool nopersp : 1; + bool explicitInterp : 1; + bool pervertexNV : 1; + bool pervertexEXT : 1; + bool perPrimitiveNV : 1; + bool perViewNV : 1; + bool perTaskNV : 1; + bool patch : 1; + bool sample : 1; + bool restrict : 1; + bool readonly : 1; + bool writeonly : 1; + bool coherent : 1; + bool volatil : 1; + bool devicecoherent : 1; + bool queuefamilycoherent : 1; + bool workgroupcoherent : 1; + bool subgroupcoherent : 1; + bool shadercallcoherent : 1; + bool nonprivate : 1; + bool nullInit : 1; + bool spirvByReference : 1; + bool spirvLiteral : 1; + bool isWriteOnly() const { return writeonly; } + bool isReadOnly() const { return readonly; } + bool isRestrict() const { return restrict; } + bool isCoherent() const { return coherent; } + bool isVolatile() const { return volatil; } + bool isSample() const { return sample; } + bool isMemory() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly || nonprivate; + } + bool isMemoryQualifierImageAndSSBOOnly() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly; + } + bool bufferReferenceNeedsVulkanMemoryModel() const + { + // include qualifiers that map to load/store availability/visibility/nonprivate memory access operands + return subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || nonprivate; + } + bool isInterpolation() const + { + return flat || smooth || nopersp || explicitInterp; + } + bool isExplicitInterpolation() const + { + return explicitInterp; + } + bool isAuxiliary() const + { + return centroid || patch || sample || pervertexNV || pervertexEXT; + } + bool isPatch() const { return patch; } + bool isNoContraction() const { return noContraction; } + void setNoContraction() { noContraction = true; } + bool isPervertexNV() const { return pervertexNV; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() { nullInit = true; } + bool isNullInit() const { return nullInit; } + void setSpirvByReference() { spirvByReference = true; } + bool isSpirvByReference() const { return spirvByReference; } + void setSpirvLiteral() { spirvLiteral = true; } + bool isSpirvLiteral() const { return spirvLiteral; } +#endif + + bool isPipeInput() const + { + switch (storage) { + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + return true; + default: + return false; + } + } + + bool isPipeOutput() const + { + switch (storage) { + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + bool isParamInput() const + { + switch (storage) { + case EvqIn: + case EvqInOut: + case EvqConstReadOnly: + return true; + default: + return false; + } + } + + bool isParamOutput() const + { + switch (storage) { + case EvqOut: + case EvqInOut: + return true; + default: + return false; + } + } + + bool isUniformOrBuffer() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + return true; + default: + return false; + } + } + + bool isUniform() const + { + switch (storage) { + case EvqUniform: + return true; + default: + return false; + } + } + + bool isIo() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + // non-built-in symbols that might link between compilation units + bool isLinkable() const + { + switch (storage) { + case EvqGlobal: + case EvqVaryingIn: + case EvqVaryingOut: + case EvqUniform: + case EvqBuffer: + case EvqShared: + return true; + default: + return false; + } + } + + TBlockStorageClass getBlockStorage() const { + if (storage == EvqUniform && !isPushConstant()) { + return EbsUniform; + } + else if (storage == EvqUniform) { + return EbsPushConstant; + } + else if (storage == EvqBuffer) { + return EbsStorageBuffer; + } + return EbsNone; + } + + void setBlockStorage(TBlockStorageClass newBacking) { +#ifndef GLSLANG_WEB + layoutPushConstant = (newBacking == EbsPushConstant); +#endif + switch (newBacking) { + case EbsUniform : + if (layoutPacking == ElpStd430) { + // std430 would not be valid + layoutPacking = ElpStd140; + } + storage = EvqUniform; + break; + case EbsStorageBuffer : + storage = EvqBuffer; + break; +#ifndef GLSLANG_WEB + case EbsPushConstant : + storage = EvqUniform; + layoutSet = TQualifier::layoutSetEnd; + layoutBinding = TQualifier::layoutBindingEnd; + break; +#endif + default: + break; + } + } + +#ifdef GLSLANG_WEB + bool isPerView() const { return false; } + bool isTaskMemory() const { return false; } + bool isArrayedIo(EShLanguage language) const { return false; } +#else + bool isPerPrimitive() const { return perPrimitiveNV; } + bool isPerView() const { return perViewNV; } + bool isTaskMemory() const { return perTaskNV; } + bool isTaskPayload() const { return storage == EvqtaskPayloadSharedEXT; } + bool isAnyPayload() const { + return storage == EvqPayload || storage == EvqPayloadIn; + } + bool isAnyCallable() const { + return storage == EvqCallableData || storage == EvqCallableDataIn; + } + + // True if this type of IO is supposed to be arrayed with extra level for per-vertex data + bool isArrayedIo(EShLanguage language) const + { + switch (language) { + case EShLangGeometry: + return isPipeInput(); + case EShLangTessControl: + return ! patch && (isPipeInput() || isPipeOutput()); + case EShLangTessEvaluation: + return ! patch && isPipeInput(); + case EShLangFragment: + return (pervertexNV || pervertexEXT) && isPipeInput(); + case EShLangMesh: + return ! perTaskNV && isPipeOutput(); + + default: + return false; + } + } +#endif + + // Implementing an embedded layout-qualifier class here, since C++ can't have a real class bitfield + void clearLayout() // all layout + { + clearUniformLayout(); + +#ifndef GLSLANG_WEB + layoutPushConstant = false; + layoutBufferReference = false; + layoutPassthrough = false; + layoutViewportRelative = false; + // -2048 as the default value indicating layoutSecondaryViewportRelative is not set + layoutSecondaryViewportRelativeOffset = -2048; + layoutShaderRecord = false; + layoutBufferReferenceAlign = layoutBufferReferenceAlignEnd; + layoutFormat = ElfNone; +#endif + + clearInterstageLayout(); + + layoutSpecConstantId = layoutSpecConstantIdEnd; + } + void clearInterstageLayout() + { + layoutLocation = layoutLocationEnd; + layoutComponent = layoutComponentEnd; +#ifndef GLSLANG_WEB + layoutIndex = layoutIndexEnd; + clearStreamLayout(); + clearXfbLayout(); +#endif + } + +#ifndef GLSLANG_WEB + void clearStreamLayout() + { + layoutStream = layoutStreamEnd; + } + void clearXfbLayout() + { + layoutXfbBuffer = layoutXfbBufferEnd; + layoutXfbStride = layoutXfbStrideEnd; + layoutXfbOffset = layoutXfbOffsetEnd; + } +#endif + + bool hasNonXfbLayout() const + { + return hasUniformLayout() || + hasAnyLocation() || + hasStream() || + hasFormat() || + isShaderRecord() || + isPushConstant() || + hasBufferReference(); + } + bool hasLayout() const + { + return hasNonXfbLayout() || + hasXfb(); + } + + TLayoutMatrix layoutMatrix : 3; + TLayoutPacking layoutPacking : 4; + int layoutOffset; + int layoutAlign; + + unsigned int layoutLocation : 12; + static const unsigned int layoutLocationEnd = 0xFFF; + + unsigned int layoutComponent : 3; + static const unsigned int layoutComponentEnd = 4; + + unsigned int layoutSet : 7; + static const unsigned int layoutSetEnd = 0x3F; + + unsigned int layoutBinding : 16; + static const unsigned int layoutBindingEnd = 0xFFFF; + + unsigned int layoutIndex : 8; + static const unsigned int layoutIndexEnd = 0xFF; + + unsigned int layoutStream : 8; + static const unsigned int layoutStreamEnd = 0xFF; + + unsigned int layoutXfbBuffer : 4; + static const unsigned int layoutXfbBufferEnd = 0xF; + + unsigned int layoutXfbStride : 14; + static const unsigned int layoutXfbStrideEnd = 0x3FFF; + + unsigned int layoutXfbOffset : 13; + static const unsigned int layoutXfbOffsetEnd = 0x1FFF; + + unsigned int layoutAttachment : 8; // for input_attachment_index + static const unsigned int layoutAttachmentEnd = 0XFF; + + unsigned int layoutSpecConstantId : 11; + static const unsigned int layoutSpecConstantIdEnd = 0x7FF; + +#ifndef GLSLANG_WEB + // stored as log2 of the actual alignment value + unsigned int layoutBufferReferenceAlign : 6; + static const unsigned int layoutBufferReferenceAlignEnd = 0x3F; + + TLayoutFormat layoutFormat : 8; + + bool layoutPushConstant; + bool layoutBufferReference; + bool layoutPassthrough; + bool layoutViewportRelative; + int layoutSecondaryViewportRelativeOffset; + bool layoutShaderRecord; + + // GL_EXT_spirv_intrinsics + int spirvStorageClass; + TSpirvDecorate* spirvDecorate; +#endif + + bool hasUniformLayout() const + { + return hasMatrix() || + hasPacking() || + hasOffset() || + hasBinding() || + hasSet() || + hasAlign(); + } + void clearUniformLayout() // only uniform specific + { + layoutMatrix = ElmNone; + layoutPacking = ElpNone; + layoutOffset = layoutNotSet; + layoutAlign = layoutNotSet; + + layoutSet = layoutSetEnd; + layoutBinding = layoutBindingEnd; +#ifndef GLSLANG_WEB + layoutAttachment = layoutAttachmentEnd; +#endif + } + + bool hasMatrix() const + { + return layoutMatrix != ElmNone; + } + bool hasPacking() const + { + return layoutPacking != ElpNone; + } + bool hasAlign() const + { + return layoutAlign != layoutNotSet; + } + bool hasAnyLocation() const + { + return hasLocation() || + hasComponent() || + hasIndex(); + } + bool hasLocation() const + { + return layoutLocation != layoutLocationEnd; + } + bool hasSet() const + { + return layoutSet != layoutSetEnd; + } + bool hasBinding() const + { + return layoutBinding != layoutBindingEnd; + } +#ifdef GLSLANG_WEB + bool hasOffset() const { return false; } + bool isNonPerspective() const { return false; } + bool hasIndex() const { return false; } + unsigned getIndex() const { return 0; } + bool hasComponent() const { return false; } + bool hasStream() const { return false; } + bool hasFormat() const { return false; } + bool hasXfb() const { return false; } + bool hasXfbBuffer() const { return false; } + bool hasXfbStride() const { return false; } + bool hasXfbOffset() const { return false; } + bool hasAttachment() const { return false; } + TLayoutFormat getFormat() const { return ElfNone; } + bool isPushConstant() const { return false; } + bool isShaderRecord() const { return false; } + bool hasBufferReference() const { return false; } + bool hasBufferReferenceAlign() const { return false; } + bool isNonUniform() const { return false; } +#else + bool hasOffset() const + { + return layoutOffset != layoutNotSet; + } + bool isNonPerspective() const { return nopersp; } + bool hasIndex() const + { + return layoutIndex != layoutIndexEnd; + } + unsigned getIndex() const { return layoutIndex; } + bool hasComponent() const + { + return layoutComponent != layoutComponentEnd; + } + bool hasStream() const + { + return layoutStream != layoutStreamEnd; + } + bool hasFormat() const + { + return layoutFormat != ElfNone; + } + bool hasXfb() const + { + return hasXfbBuffer() || + hasXfbStride() || + hasXfbOffset(); + } + bool hasXfbBuffer() const + { + return layoutXfbBuffer != layoutXfbBufferEnd; + } + bool hasXfbStride() const + { + return layoutXfbStride != layoutXfbStrideEnd; + } + bool hasXfbOffset() const + { + return layoutXfbOffset != layoutXfbOffsetEnd; + } + bool hasAttachment() const + { + return layoutAttachment != layoutAttachmentEnd; + } + TLayoutFormat getFormat() const { return layoutFormat; } + bool isPushConstant() const { return layoutPushConstant; } + bool isShaderRecord() const { return layoutShaderRecord; } + bool hasBufferReference() const { return layoutBufferReference; } + bool hasBufferReferenceAlign() const + { + return layoutBufferReferenceAlign != layoutBufferReferenceAlignEnd; + } + bool isNonUniform() const + { + return nonUniform; + } + + // GL_EXT_spirv_intrinsics + bool hasSprivDecorate() const { return spirvDecorate != nullptr; } + void setSpirvDecorate(int decoration, const TIntermAggregate* args = nullptr); + void setSpirvDecorateId(int decoration, const TIntermAggregate* args); + void setSpirvDecorateString(int decoration, const TIntermAggregate* args); + const TSpirvDecorate& getSpirvDecorate() const { assert(spirvDecorate); return *spirvDecorate; } + TSpirvDecorate& getSpirvDecorate() { assert(spirvDecorate); return *spirvDecorate; } + TString getSpirvDecorateQualifierString() const; +#endif + bool hasSpecConstantId() const + { + // Not the same thing as being a specialization constant, this + // is just whether or not it was declared with an ID. + return layoutSpecConstantId != layoutSpecConstantIdEnd; + } + bool isSpecConstant() const + { + // True if type is a specialization constant, whether or not it + // had a specialization-constant ID, and false if it is not a + // true front-end constant. + return specConstant; + } + bool isFrontEndConstant() const + { + // True if the front-end knows the final constant value. + // This allows front-end constant folding. + return storage == EvqConst && ! specConstant; + } + bool isConstant() const + { + // True if is either kind of constant; specialization or regular. + return isFrontEndConstant() || isSpecConstant(); + } + void makeSpecConstant() + { + storage = EvqConst; + specConstant = true; + } + static const char* getLayoutPackingString(TLayoutPacking packing) + { + switch (packing) { + case ElpStd140: return "std140"; +#ifndef GLSLANG_WEB + case ElpPacked: return "packed"; + case ElpShared: return "shared"; + case ElpStd430: return "std430"; + case ElpScalar: return "scalar"; +#endif + default: return "none"; + } + } + static const char* getLayoutMatrixString(TLayoutMatrix m) + { + switch (m) { + case ElmColumnMajor: return "column_major"; + case ElmRowMajor: return "row_major"; + default: return "none"; + } + } +#ifdef GLSLANG_WEB + static const char* getLayoutFormatString(TLayoutFormat f) { return "none"; } +#else + static const char* getLayoutFormatString(TLayoutFormat f) + { + switch (f) { + case ElfRgba32f: return "rgba32f"; + case ElfRgba16f: return "rgba16f"; + case ElfRg32f: return "rg32f"; + case ElfRg16f: return "rg16f"; + case ElfR11fG11fB10f: return "r11f_g11f_b10f"; + case ElfR32f: return "r32f"; + case ElfR16f: return "r16f"; + case ElfRgba16: return "rgba16"; + case ElfRgb10A2: return "rgb10_a2"; + case ElfRgba8: return "rgba8"; + case ElfRg16: return "rg16"; + case ElfRg8: return "rg8"; + case ElfR16: return "r16"; + case ElfR8: return "r8"; + case ElfRgba16Snorm: return "rgba16_snorm"; + case ElfRgba8Snorm: return "rgba8_snorm"; + case ElfRg16Snorm: return "rg16_snorm"; + case ElfRg8Snorm: return "rg8_snorm"; + case ElfR16Snorm: return "r16_snorm"; + case ElfR8Snorm: return "r8_snorm"; + + case ElfRgba32i: return "rgba32i"; + case ElfRgba16i: return "rgba16i"; + case ElfRgba8i: return "rgba8i"; + case ElfRg32i: return "rg32i"; + case ElfRg16i: return "rg16i"; + case ElfRg8i: return "rg8i"; + case ElfR32i: return "r32i"; + case ElfR16i: return "r16i"; + case ElfR8i: return "r8i"; + + case ElfRgba32ui: return "rgba32ui"; + case ElfRgba16ui: return "rgba16ui"; + case ElfRgba8ui: return "rgba8ui"; + case ElfRg32ui: return "rg32ui"; + case ElfRg16ui: return "rg16ui"; + case ElfRgb10a2ui: return "rgb10_a2ui"; + case ElfRg8ui: return "rg8ui"; + case ElfR32ui: return "r32ui"; + case ElfR16ui: return "r16ui"; + case ElfR8ui: return "r8ui"; + case ElfR64ui: return "r64ui"; + case ElfR64i: return "r64i"; + default: return "none"; + } + } + static const char* getLayoutDepthString(TLayoutDepth d) + { + switch (d) { + case EldAny: return "depth_any"; + case EldGreater: return "depth_greater"; + case EldLess: return "depth_less"; + case EldUnchanged: return "depth_unchanged"; + default: return "none"; + } + } + static const char* getLayoutStencilString(TLayoutStencil s) + { + switch (s) { + case ElsRefUnchangedFrontAMD: return "stencil_ref_unchanged_front_amd"; + case ElsRefGreaterFrontAMD: return "stencil_ref_greater_front_amd"; + case ElsRefLessFrontAMD: return "stencil_ref_less_front_amd"; + case ElsRefUnchangedBackAMD: return "stencil_ref_unchanged_back_amd"; + case ElsRefGreaterBackAMD: return "stencil_ref_greater_back_amd"; + case ElsRefLessBackAMD: return "stencil_ref_less_back_amd"; + default: return "none"; + } + } + static const char* getBlendEquationString(TBlendEquationShift e) + { + switch (e) { + case EBlendMultiply: return "blend_support_multiply"; + case EBlendScreen: return "blend_support_screen"; + case EBlendOverlay: return "blend_support_overlay"; + case EBlendDarken: return "blend_support_darken"; + case EBlendLighten: return "blend_support_lighten"; + case EBlendColordodge: return "blend_support_colordodge"; + case EBlendColorburn: return "blend_support_colorburn"; + case EBlendHardlight: return "blend_support_hardlight"; + case EBlendSoftlight: return "blend_support_softlight"; + case EBlendDifference: return "blend_support_difference"; + case EBlendExclusion: return "blend_support_exclusion"; + case EBlendHslHue: return "blend_support_hsl_hue"; + case EBlendHslSaturation: return "blend_support_hsl_saturation"; + case EBlendHslColor: return "blend_support_hsl_color"; + case EBlendHslLuminosity: return "blend_support_hsl_luminosity"; + case EBlendAllEquations: return "blend_support_all_equations"; + default: return "unknown"; + } + } + static const char* getGeometryString(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return "points"; + case ElgLines: return "lines"; + case ElgLinesAdjacency: return "lines_adjacency"; + case ElgLineStrip: return "line_strip"; + case ElgTriangles: return "triangles"; + case ElgTrianglesAdjacency: return "triangles_adjacency"; + case ElgTriangleStrip: return "triangle_strip"; + case ElgQuads: return "quads"; + case ElgIsolines: return "isolines"; + default: return "none"; + } + } + static const char* getVertexSpacingString(TVertexSpacing spacing) + { + switch (spacing) { + case EvsEqual: return "equal_spacing"; + case EvsFractionalEven: return "fractional_even_spacing"; + case EvsFractionalOdd: return "fractional_odd_spacing"; + default: return "none"; + } + } + static const char* getVertexOrderString(TVertexOrder order) + { + switch (order) { + case EvoCw: return "cw"; + case EvoCcw: return "ccw"; + default: return "none"; + } + } + static int mapGeometryToSize(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return 1; + case ElgLines: return 2; + case ElgLinesAdjacency: return 4; + case ElgTriangles: return 3; + case ElgTrianglesAdjacency: return 6; + default: return 0; + } + } + static const char* getInterlockOrderingString(TInterlockOrdering order) + { + switch (order) { + case EioPixelInterlockOrdered: return "pixel_interlock_ordered"; + case EioPixelInterlockUnordered: return "pixel_interlock_unordered"; + case EioSampleInterlockOrdered: return "sample_interlock_ordered"; + case EioSampleInterlockUnordered: return "sample_interlock_unordered"; + case EioShadingRateInterlockOrdered: return "shading_rate_interlock_ordered"; + case EioShadingRateInterlockUnordered: return "shading_rate_interlock_unordered"; + default: return "none"; + } + } +#endif +}; + +// Qualifiers that don't need to be keep per object. They have shader scope, not object scope. +// So, they will not be part of TType, TQualifier, etc. +struct TShaderQualifiers { + TLayoutGeometry geometry; // geometry/tessellation shader in/out primitives + bool pixelCenterInteger; // fragment shader + bool originUpperLeft; // fragment shader + int invocations; + int vertices; // for tessellation "vertices", geometry & mesh "max_vertices" + TVertexSpacing spacing; + TVertexOrder order; + bool pointMode; + int localSize[3]; // compute shader + bool localSizeNotDefault[3]; // compute shader + int localSizeSpecId[3]; // compute shader specialization id for gl_WorkGroupSize +#ifndef GLSLANG_WEB + bool earlyFragmentTests; // fragment input + bool postDepthCoverage; // fragment input + bool earlyAndLateFragmentTestsAMD; //fragment input + TLayoutDepth layoutDepth; + TLayoutStencil layoutStencil; + bool blendEquation; // true if any blend equation was specified + int numViews; // multiview extenstions + TInterlockOrdering interlockOrdering; + bool layoutOverrideCoverage; // true if layout override_coverage set + bool layoutDerivativeGroupQuads; // true if layout derivative_group_quadsNV set + bool layoutDerivativeGroupLinear; // true if layout derivative_group_linearNV set + int primitives; // mesh shader "max_primitives"DerivativeGroupLinear; // true if layout derivative_group_linearNV set + bool layoutPrimitiveCulling; // true if layout primitive_culling set + TLayoutDepth getDepth() const { return layoutDepth; } + TLayoutStencil getStencil() const { return layoutStencil; } +#else + TLayoutDepth getDepth() const { return EldNone; } +#endif + + void init() + { + geometry = ElgNone; + originUpperLeft = false; + pixelCenterInteger = false; + invocations = TQualifier::layoutNotSet; + vertices = TQualifier::layoutNotSet; + spacing = EvsNone; + order = EvoNone; + pointMode = false; + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + earlyFragmentTests = false; + earlyAndLateFragmentTestsAMD = false; + postDepthCoverage = false; + layoutDepth = EldNone; + layoutStencil = ElsNone; + blendEquation = false; + numViews = TQualifier::layoutNotSet; + layoutOverrideCoverage = false; + layoutDerivativeGroupQuads = false; + layoutDerivativeGroupLinear = false; + layoutPrimitiveCulling = false; + primitives = TQualifier::layoutNotSet; + interlockOrdering = EioNone; +#endif + } + +#ifdef GLSLANG_WEB + bool hasBlendEquation() const { return false; } +#else + bool hasBlendEquation() const { return blendEquation; } +#endif + + // Merge in characteristics from the 'src' qualifier. They can override when + // set, but never erase when not set. + void merge(const TShaderQualifiers& src) + { + if (src.geometry != ElgNone) + geometry = src.geometry; + if (src.pixelCenterInteger) + pixelCenterInteger = src.pixelCenterInteger; + if (src.originUpperLeft) + originUpperLeft = src.originUpperLeft; + if (src.invocations != TQualifier::layoutNotSet) + invocations = src.invocations; + if (src.vertices != TQualifier::layoutNotSet) + vertices = src.vertices; + if (src.spacing != EvsNone) + spacing = src.spacing; + if (src.order != EvoNone) + order = src.order; + if (src.pointMode) + pointMode = true; + for (int i = 0; i < 3; ++i) { + if (src.localSize[i] > 1) + localSize[i] = src.localSize[i]; + } + for (int i = 0; i < 3; ++i) { + localSizeNotDefault[i] = src.localSizeNotDefault[i] || localSizeNotDefault[i]; + } + for (int i = 0; i < 3; ++i) { + if (src.localSizeSpecId[i] != TQualifier::layoutNotSet) + localSizeSpecId[i] = src.localSizeSpecId[i]; + } +#ifndef GLSLANG_WEB + if (src.earlyFragmentTests) + earlyFragmentTests = true; + if (src.earlyAndLateFragmentTestsAMD) + earlyAndLateFragmentTestsAMD = true; + if (src.postDepthCoverage) + postDepthCoverage = true; + if (src.layoutDepth) + layoutDepth = src.layoutDepth; + if (src.layoutStencil) + layoutStencil = src.layoutStencil; + if (src.blendEquation) + blendEquation = src.blendEquation; + if (src.numViews != TQualifier::layoutNotSet) + numViews = src.numViews; + if (src.layoutOverrideCoverage) + layoutOverrideCoverage = src.layoutOverrideCoverage; + if (src.layoutDerivativeGroupQuads) + layoutDerivativeGroupQuads = src.layoutDerivativeGroupQuads; + if (src.layoutDerivativeGroupLinear) + layoutDerivativeGroupLinear = src.layoutDerivativeGroupLinear; + if (src.primitives != TQualifier::layoutNotSet) + primitives = src.primitives; + if (src.interlockOrdering != EioNone) + interlockOrdering = src.interlockOrdering; + if (src.layoutPrimitiveCulling) + layoutPrimitiveCulling = src.layoutPrimitiveCulling; +#endif + } +}; + +// +// TPublicType is just temporarily used while parsing and not quite the same +// information kept per node in TType. Due to the bison stack, it can't have +// types that it thinks have non-trivial constructors. It should +// just be used while recognizing the grammar, not anything else. +// Once enough is known about the situation, the proper information +// moved into a TType, or the parse context, etc. +// +class TPublicType { +public: + TBasicType basicType; + TSampler sampler; + TQualifier qualifier; + TShaderQualifiers shaderQualifiers; + int vectorSize : 4; + int matrixCols : 4; + int matrixRows : 4; + bool coopmat : 1; + TArraySizes* arraySizes; + const TType* userDef; + TSourceLoc loc; + TArraySizes* typeParameters; +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type directive + TSpirvType* spirvType; +#endif + +#ifdef GLSLANG_WEB + bool isCoopmat() const { return false; } +#else + bool isCoopmat() const { return coopmat; } +#endif + + void initType(const TSourceLoc& l) + { + basicType = EbtVoid; + vectorSize = 1; + matrixRows = 0; + matrixCols = 0; + arraySizes = nullptr; + userDef = nullptr; + loc = l; + typeParameters = nullptr; + coopmat = false; +#ifndef GLSLANG_WEB + spirvType = nullptr; +#endif + } + + void initQualifiers(bool global = false) + { + qualifier.clear(); + if (global) + qualifier.storage = EvqGlobal; + } + + void init(const TSourceLoc& l, bool global = false) + { + initType(l); + sampler.clear(); + initQualifiers(global); + shaderQualifiers.init(); + } + + void setVector(int s) + { + matrixRows = 0; + matrixCols = 0; + vectorSize = s; + } + + void setMatrix(int c, int r) + { + matrixRows = r; + matrixCols = c; + vectorSize = 0; + } + + bool isScalar() const + { + return matrixCols == 0 && vectorSize == 1 && arraySizes == nullptr && userDef == nullptr; + } + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + void setSpirvType(const TSpirvInstruction& spirvInst, const TSpirvTypeParameters* typeParams = nullptr); +#endif + + // "Image" is a superset of "Subpass" + bool isImage() const { return basicType == EbtSampler && sampler.isImage(); } + bool isSubpass() const { return basicType == EbtSampler && sampler.isSubpass(); } +}; + +// +// Base class for things that have a type. +// +class TType { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // for "empty" type (no args) or simple scalar/vector/matrix + explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for explicit precision qualifier + TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + qualifier.precision = p; + assert(p >= EpqNone && p <= EpqHigh); + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for turning a TPublicType into a TType, using a shallow copy + explicit TType(const TPublicType& p) : + basicType(p.basicType), + vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), coopmat(p.coopmat), + arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters) +#ifndef GLSLANG_WEB + , spirvType(p.spirvType) +#endif + { + if (basicType == EbtSampler) + sampler = p.sampler; + else + sampler.clear(); + qualifier = p.qualifier; + if (p.userDef) { + if (p.userDef->basicType == EbtReference) { + basicType = EbtReference; + referentType = p.userDef->referentType; + } else { + structure = p.userDef->getWritableStruct(); // public type is short-lived; there are no sharing issues + } + typeName = NewPoolTString(p.userDef->getTypeName().c_str()); + } + if (p.isCoopmat() && p.typeParameters && p.typeParameters->getNumDims() > 0) { + int numBits = p.typeParameters->getDimSize(0); + if (p.basicType == EbtFloat && numBits == 16) { + basicType = EbtFloat16; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtUint && numBits == 8) { + basicType = EbtUint8; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtInt && numBits == 8) { + basicType = EbtInt8; + qualifier.precision = EpqNone; + } + } + } + // for construction of sampler types + TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) : + basicType(EbtSampler), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), + sampler(sampler), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + qualifier.clear(); + qualifier.storage = q; + } + // to efficiently make a dereferenced type + // without ever duplicating the outer structure that will be thrown away + // and using only shallow copy + TType(const TType& type, int derefIndex, bool rowMajor = false) + { + if (type.isArray()) { + shallowCopy(type); + if (type.getArraySizes()->getNumDims() == 1) { + arraySizes = nullptr; + } else { + // want our own copy of the array, so we can edit it + arraySizes = new TArraySizes; + arraySizes->copyDereferenced(*type.arraySizes); + } + } else if (type.basicType == EbtStruct || type.basicType == EbtBlock) { + // do a structure dereference + const TTypeList& memberList = *type.getStruct(); + shallowCopy(*memberList[derefIndex].type); + return; + } else { + // do a vector/matrix dereference + shallowCopy(type); + if (matrixCols > 0) { + // dereference from matrix to vector + if (rowMajor) + vectorSize = matrixCols; + else + vectorSize = matrixRows; + matrixCols = 0; + matrixRows = 0; + if (vectorSize == 1) + vector1 = true; + } else if (isVector()) { + // dereference from vector to scalar + vectorSize = 1; + vector1 = false; + } else if (isCoopMat()) { + coopmat = false; + typeParameters = nullptr; + } + } + } + // for making structures, ... + TType(TTypeList* userDef, const TString& n) : + basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + typeName = NewPoolTString(n.c_str()); + } + // For interface blocks + TType(TTypeList* userDef, const TString& n, const TQualifier& q) : + basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + typeName = NewPoolTString(n.c_str()); + } + // for block reference (first parameter must be EbtReference) + explicit TType(TBasicType t, const TType &p, const TString& n) : + basicType(t), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + assert(t == EbtReference); + typeName = NewPoolTString(n.c_str()); + qualifier.clear(); + qualifier.storage = p.qualifier.storage; + referentType = p.clone(); + } + virtual ~TType() {} + + // Not for use across pool pops; it will cause multiple instances of TType to point to the same information. + // This only works if that information (like a structure's list of types) does not change and + // the instances are sharing the same pool. + void shallowCopy(const TType& copyOf) + { + basicType = copyOf.basicType; + sampler = copyOf.sampler; + qualifier = copyOf.qualifier; + vectorSize = copyOf.vectorSize; + matrixCols = copyOf.matrixCols; + matrixRows = copyOf.matrixRows; + vector1 = copyOf.vector1; + arraySizes = copyOf.arraySizes; // copying the pointer only, not the contents + fieldName = copyOf.fieldName; + typeName = copyOf.typeName; + if (isStruct()) { + structure = copyOf.structure; + } else { + referentType = copyOf.referentType; + } + typeParameters = copyOf.typeParameters; +#ifndef GLSLANG_WEB + spirvType = copyOf.spirvType; +#endif + coopmat = copyOf.isCoopMat(); + } + + // Make complete copy of the whole type graph rooted at 'copyOf'. + void deepCopy(const TType& copyOf) + { + TMap copied; // to enable copying a type graph as a graph, not a tree + deepCopy(copyOf, copied); + } + + // Recursively make temporary + void makeTemporary() + { + getQualifier().makeTemporary(); + + if (isStruct()) + for (unsigned int i = 0; i < structure->size(); ++i) + (*structure)[i].type->makeTemporary(); + } + + TType* clone() const + { + TType *newType = new TType(); + newType->deepCopy(*this); + + return newType; + } + + void makeVector() { vector1 = true; } + + virtual void hideMember() { basicType = EbtVoid; vectorSize = 1; } + virtual bool hiddenMember() const { return basicType == EbtVoid; } + + virtual void setFieldName(const TString& n) { fieldName = NewPoolTString(n.c_str()); } + virtual const TString& getTypeName() const + { + assert(typeName); + return *typeName; + } + + virtual const TString& getFieldName() const + { + assert(fieldName); + return *fieldName; + } + TShaderInterface getShaderInterface() const + { + if (basicType != EbtBlock) + return EsiNone; + + switch (qualifier.storage) { + default: + return EsiNone; + case EvqVaryingIn: + return EsiInput; + case EvqVaryingOut: + return EsiOutput; + case EvqUniform: + case EvqBuffer: + return EsiUniform; + } + } + + virtual TBasicType getBasicType() const { return basicType; } + virtual const TSampler& getSampler() const { return sampler; } + virtual TSampler& getSampler() { return sampler; } + + virtual TQualifier& getQualifier() { return qualifier; } + virtual const TQualifier& getQualifier() const { return qualifier; } + + virtual int getVectorSize() const { return vectorSize; } // returns 1 for either scalar or vector of size 1, valid for both + virtual int getMatrixCols() const { return matrixCols; } + virtual int getMatrixRows() const { return matrixRows; } + virtual int getOuterArraySize() const { return arraySizes->getOuterSize(); } + virtual TIntermTyped* getOuterArrayNode() const { return arraySizes->getOuterNode(); } + virtual int getCumulativeArraySize() const { return arraySizes->getCumulativeSize(); } +#ifdef GLSLANG_WEB + bool isArrayOfArrays() const { return false; } +#else + bool isArrayOfArrays() const { return arraySizes != nullptr && arraySizes->getNumDims() > 1; } +#endif + virtual int getImplicitArraySize() const { return arraySizes->getImplicitSize(); } + virtual const TArraySizes* getArraySizes() const { return arraySizes; } + virtual TArraySizes* getArraySizes() { return arraySizes; } + virtual TType* getReferentType() const { return referentType; } + virtual const TArraySizes* getTypeParameters() const { return typeParameters; } + virtual TArraySizes* getTypeParameters() { return typeParameters; } + + virtual bool isScalar() const { return ! isVector() && ! isMatrix() && ! isStruct() && ! isArray(); } + virtual bool isScalarOrVec1() const { return isScalar() || vector1; } + virtual bool isScalarOrVector() const { return !isMatrix() && !isStruct() && !isArray(); } + virtual bool isVector() const { return vectorSize > 1 || vector1; } + virtual bool isMatrix() const { return matrixCols ? true : false; } + virtual bool isArray() const { return arraySizes != nullptr; } + virtual bool isSizedArray() const { return isArray() && arraySizes->isSized(); } + virtual bool isUnsizedArray() const { return isArray() && !arraySizes->isSized(); } + virtual bool isArrayVariablyIndexed() const { assert(isArray()); return arraySizes->isVariablyIndexed(); } + virtual void setArrayVariablyIndexed() { assert(isArray()); arraySizes->setVariablyIndexed(); } + virtual void updateImplicitArraySize(int size) { assert(isArray()); arraySizes->updateImplicitSize(size); } + virtual bool isStruct() const { return basicType == EbtStruct || basicType == EbtBlock; } + virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16; } + virtual bool isIntegerDomain() const + { + switch (basicType) { + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtAtomicUint: + return true; + default: + break; + } + return false; + } + virtual bool isOpaque() const { return basicType == EbtSampler +#ifndef GLSLANG_WEB + || basicType == EbtAtomicUint || basicType == EbtAccStruct || basicType == EbtRayQuery +#endif + ; } + virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; } + + // "Image" is a superset of "Subpass" + virtual bool isImage() const { return basicType == EbtSampler && getSampler().isImage(); } + virtual bool isSubpass() const { return basicType == EbtSampler && getSampler().isSubpass(); } + virtual bool isTexture() const { return basicType == EbtSampler && getSampler().isTexture(); } + // Check the block-name convention of creating a block without populating it's members: + virtual bool isUnusableName() const { return isStruct() && structure == nullptr; } + virtual bool isParameterized() const { return typeParameters != nullptr; } +#ifdef GLSLANG_WEB + bool isAtomic() const { return false; } + bool isCoopMat() const { return false; } + bool isReference() const { return false; } + bool isSpirvType() const { return false; } +#else + bool isAtomic() const { return basicType == EbtAtomicUint; } + bool isCoopMat() const { return coopmat; } + bool isReference() const { return getBasicType() == EbtReference; } + bool isSpirvType() const { return getBasicType() == EbtSpirvType; } +#endif + + // return true if this type contains any subtype which satisfies the given predicate. + template + bool contains(P predicate) const + { + if (predicate(this)) + return true; + + const auto hasa = [predicate](const TTypeLoc& tl) { return tl.type->contains(predicate); }; + + return isStruct() && std::any_of(structure->begin(), structure->end(), hasa); + } + + // Recursively checks if the type contains the given basic type + virtual bool containsBasicType(TBasicType checkType) const + { + return contains([checkType](const TType* t) { return t->basicType == checkType; } ); + } + + // Recursively check the structure for any arrays, needed for some error checks + virtual bool containsArray() const + { + return contains([](const TType* t) { return t->isArray(); } ); + } + + // Check the structure for any structures, needed for some error checks + virtual bool containsStructure() const + { + return contains([this](const TType* t) { return t != this && t->isStruct(); } ); + } + + // Recursively check the structure for any unsized arrays, needed for triggering a copyUp(). + virtual bool containsUnsizedArray() const + { + return contains([](const TType* t) { return t->isUnsizedArray(); } ); + } + + virtual bool containsOpaque() const + { + return contains([](const TType* t) { return t->isOpaque(); } ); + } + + // Recursively checks if the type contains a built-in variable + virtual bool containsBuiltIn() const + { + return contains([](const TType* t) { return t->isBuiltIn(); } ); + } + + virtual bool containsNonOpaque() const + { + const auto nonOpaque = [](const TType* t) { + switch (t->basicType) { + case EbtVoid: + case EbtFloat: + case EbtDouble: + case EbtFloat16: + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtBool: + case EbtReference: + return true; + default: + return false; + } + }; + + return contains(nonOpaque); + } + + virtual bool containsSpecializationSize() const + { + return contains([](const TType* t) { return t->isArray() && t->arraySizes->isOuterSpecialization(); } ); + } + +#ifdef GLSLANG_WEB + bool containsDouble() const { return false; } + bool contains16BitFloat() const { return false; } + bool contains64BitInt() const { return false; } + bool contains16BitInt() const { return false; } + bool contains8BitInt() const { return false; } + bool containsCoopMat() const { return false; } + bool containsReference() const { return false; } +#else + bool containsDouble() const + { + return containsBasicType(EbtDouble); + } + bool contains16BitFloat() const + { + return containsBasicType(EbtFloat16); + } + bool contains64BitInt() const + { + return containsBasicType(EbtInt64) || containsBasicType(EbtUint64); + } + bool contains16BitInt() const + { + return containsBasicType(EbtInt16) || containsBasicType(EbtUint16); + } + bool contains8BitInt() const + { + return containsBasicType(EbtInt8) || containsBasicType(EbtUint8); + } + bool containsCoopMat() const + { + return contains([](const TType* t) { return t->coopmat; } ); + } + bool containsReference() const + { + return containsBasicType(EbtReference); + } +#endif + + // Array editing methods. Array descriptors can be shared across + // type instances. This allows all uses of the same array + // to be updated at once. E.g., all nodes can be explicitly sized + // by tracking and correcting one implicit size. Or, all nodes + // can get the explicit size on a redeclaration that gives size. + // + // N.B.: Don't share with the shared symbol tables (symbols are + // marked as isReadOnly(). Such symbols with arrays that will be + // edited need to copyUp() on first use, so that + // A) the edits don't effect the shared symbol table, and + // B) the edits are shared across all users. + void updateArraySizes(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(arraySizes != nullptr); + assert(type.arraySizes != nullptr); + *arraySizes = *type.arraySizes; + } + void copyArraySizes(const TArraySizes& s) + { + // For setting a fresh new set of array sizes, not yet worrying about sharing. + arraySizes = new TArraySizes; + *arraySizes = s; + } + void transferArraySizes(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + arraySizes = s; + } + void clearArraySizes() + { + arraySizes = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyArrayInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (arraySizes == nullptr) + copyArraySizes(*s); + else + arraySizes->addInnerSizes(*s); + } + } + void changeOuterArraySize(int s) { arraySizes->changeOuterSize(s); } + + // Recursively make the implicit array size the explicit array size. + // Expicit arrays are compile-time or link-time sized, never run-time sized. + // Sometimes, policy calls for an array to be run-time sized even if it was + // never variably indexed: Don't turn a 'skipNonvariablyIndexed' array into + // an explicit array. + void adoptImplicitArraySizes(bool skipNonvariablyIndexed) + { + if (isUnsizedArray() && !(skipNonvariablyIndexed || isArrayVariablyIndexed())) + changeOuterArraySize(getImplicitArraySize()); + // For multi-dim per-view arrays, set unsized inner dimension size to 1 + if (qualifier.isPerView() && arraySizes && arraySizes->isInnerUnsized()) + arraySizes->clearInnerUnsized(); + if (isStruct() && structure->size() > 0) { + int lastMember = (int)structure->size() - 1; + for (int i = 0; i < lastMember; ++i) + (*structure)[i].type->adoptImplicitArraySizes(false); + // implement the "last member of an SSBO" policy + (*structure)[lastMember].type->adoptImplicitArraySizes(getQualifier().storage == EvqBuffer); + } + } + + + void updateTypeParameters(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(typeParameters != nullptr); + assert(type.typeParameters != nullptr); + *typeParameters = *type.typeParameters; + } + void copyTypeParameters(const TArraySizes& s) + { + // For setting a fresh new set of type parameters, not yet worrying about sharing. + typeParameters = new TArraySizes; + *typeParameters = s; + } + void transferTypeParameters(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + typeParameters = s; + } + void clearTypeParameters() + { + typeParameters = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyTypeParametersInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (typeParameters == nullptr) + copyTypeParameters(*s); + else + typeParameters->addInnerSizes(*s); + } + } + + const char* getBasicString() const + { + return TType::getBasicString(basicType); + } + + static const char* getBasicString(TBasicType t) + { + switch (t) { + case EbtFloat: return "float"; + case EbtInt: return "int"; + case EbtUint: return "uint"; + case EbtSampler: return "sampler/image"; +#ifndef GLSLANG_WEB + case EbtVoid: return "void"; + case EbtDouble: return "double"; + case EbtFloat16: return "float16_t"; + case EbtInt8: return "int8_t"; + case EbtUint8: return "uint8_t"; + case EbtInt16: return "int16_t"; + case EbtUint16: return "uint16_t"; + case EbtInt64: return "int64_t"; + case EbtUint64: return "uint64_t"; + case EbtBool: return "bool"; + case EbtAtomicUint: return "atomic_uint"; + case EbtStruct: return "structure"; + case EbtBlock: return "block"; + case EbtAccStruct: return "accelerationStructureNV"; + case EbtRayQuery: return "rayQueryEXT"; + case EbtReference: return "reference"; + case EbtString: return "string"; + case EbtSpirvType: return "spirv_type"; +#endif + default: return "unknown type"; + } + } + +#ifdef GLSLANG_WEB + TString getCompleteString() const { return ""; } + const char* getStorageQualifierString() const { return ""; } + const char* getBuiltInVariableString() const { return ""; } + const char* getPrecisionQualifierString() const { return ""; } + TString getBasicTypeString() const { return ""; } +#else + TString getCompleteString(bool syntactic = false, bool getQualifiers = true, bool getPrecision = true, + bool getType = true, TString name = "", TString structName = "") const + { + TString typeString; + + const auto appendStr = [&](const char* s) { typeString.append(s); }; + const auto appendUint = [&](unsigned int u) { typeString.append(std::to_string(u).c_str()); }; + const auto appendInt = [&](int i) { typeString.append(std::to_string(i).c_str()); }; + + if (getQualifiers) { + if (qualifier.hasSprivDecorate()) + appendStr(qualifier.getSpirvDecorateQualifierString().c_str()); + + if (qualifier.hasLayout()) { + // To reduce noise, skip this if the only layout is an xfb_buffer + // with no triggering xfb_offset. + TQualifier noXfbBuffer = qualifier; + noXfbBuffer.layoutXfbBuffer = TQualifier::layoutXfbBufferEnd; + if (noXfbBuffer.hasLayout()) { + appendStr("layout("); + if (qualifier.hasAnyLocation()) { + appendStr(" location="); + appendUint(qualifier.layoutLocation); + if (qualifier.hasComponent()) { + appendStr(" component="); + appendUint(qualifier.layoutComponent); + } + if (qualifier.hasIndex()) { + appendStr(" index="); + appendUint(qualifier.layoutIndex); + } + } + if (qualifier.hasSet()) { + appendStr(" set="); + appendUint(qualifier.layoutSet); + } + if (qualifier.hasBinding()) { + appendStr(" binding="); + appendUint(qualifier.layoutBinding); + } + if (qualifier.hasStream()) { + appendStr(" stream="); + appendUint(qualifier.layoutStream); + } + if (qualifier.hasMatrix()) { + appendStr(" "); + appendStr(TQualifier::getLayoutMatrixString(qualifier.layoutMatrix)); + } + if (qualifier.hasPacking()) { + appendStr(" "); + appendStr(TQualifier::getLayoutPackingString(qualifier.layoutPacking)); + } + if (qualifier.hasOffset()) { + appendStr(" offset="); + appendInt(qualifier.layoutOffset); + } + if (qualifier.hasAlign()) { + appendStr(" align="); + appendInt(qualifier.layoutAlign); + } + if (qualifier.hasFormat()) { + appendStr(" "); + appendStr(TQualifier::getLayoutFormatString(qualifier.layoutFormat)); + } + if (qualifier.hasXfbBuffer() && qualifier.hasXfbOffset()) { + appendStr(" xfb_buffer="); + appendUint(qualifier.layoutXfbBuffer); + } + if (qualifier.hasXfbOffset()) { + appendStr(" xfb_offset="); + appendUint(qualifier.layoutXfbOffset); + } + if (qualifier.hasXfbStride()) { + appendStr(" xfb_stride="); + appendUint(qualifier.layoutXfbStride); + } + if (qualifier.hasAttachment()) { + appendStr(" input_attachment_index="); + appendUint(qualifier.layoutAttachment); + } + if (qualifier.hasSpecConstantId()) { + appendStr(" constant_id="); + appendUint(qualifier.layoutSpecConstantId); + } + if (qualifier.layoutPushConstant) + appendStr(" push_constant"); + if (qualifier.layoutBufferReference) + appendStr(" buffer_reference"); + if (qualifier.hasBufferReferenceAlign()) { + appendStr(" buffer_reference_align="); + appendUint(1u << qualifier.layoutBufferReferenceAlign); + } + + if (qualifier.layoutPassthrough) + appendStr(" passthrough"); + if (qualifier.layoutViewportRelative) + appendStr(" layoutViewportRelative"); + if (qualifier.layoutSecondaryViewportRelativeOffset != -2048) { + appendStr(" layoutSecondaryViewportRelativeOffset="); + appendInt(qualifier.layoutSecondaryViewportRelativeOffset); + } + if (qualifier.layoutShaderRecord) + appendStr(" shaderRecordNV"); + + appendStr(")"); + } + } + + if (qualifier.invariant) + appendStr(" invariant"); + if (qualifier.noContraction) + appendStr(" noContraction"); + if (qualifier.centroid) + appendStr(" centroid"); + if (qualifier.smooth) + appendStr(" smooth"); + if (qualifier.flat) + appendStr(" flat"); + if (qualifier.nopersp) + appendStr(" noperspective"); + if (qualifier.explicitInterp) + appendStr(" __explicitInterpAMD"); + if (qualifier.pervertexNV) + appendStr(" pervertexNV"); + if (qualifier.pervertexEXT) + appendStr(" pervertexEXT"); + if (qualifier.perPrimitiveNV) + appendStr(" perprimitiveNV"); + if (qualifier.perViewNV) + appendStr(" perviewNV"); + if (qualifier.perTaskNV) + appendStr(" taskNV"); + if (qualifier.patch) + appendStr(" patch"); + if (qualifier.sample) + appendStr(" sample"); + if (qualifier.coherent) + appendStr(" coherent"); + if (qualifier.devicecoherent) + appendStr(" devicecoherent"); + if (qualifier.queuefamilycoherent) + appendStr(" queuefamilycoherent"); + if (qualifier.workgroupcoherent) + appendStr(" workgroupcoherent"); + if (qualifier.subgroupcoherent) + appendStr(" subgroupcoherent"); + if (qualifier.shadercallcoherent) + appendStr(" shadercallcoherent"); + if (qualifier.nonprivate) + appendStr(" nonprivate"); + if (qualifier.volatil) + appendStr(" volatile"); + if (qualifier.restrict) + appendStr(" restrict"); + if (qualifier.readonly) + appendStr(" readonly"); + if (qualifier.writeonly) + appendStr(" writeonly"); + if (qualifier.specConstant) + appendStr(" specialization-constant"); + if (qualifier.nonUniform) + appendStr(" nonuniform"); + if (qualifier.isNullInit()) + appendStr(" null-init"); + if (qualifier.isSpirvByReference()) + appendStr(" spirv_by_reference"); + if (qualifier.isSpirvLiteral()) + appendStr(" spirv_literal"); + appendStr(" "); + appendStr(getStorageQualifierString()); + } + if (getType) { + if (syntactic) { + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isVector() || isMatrix()) { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("d"); + break; + case EbtInt: + appendStr("i"); + break; + case EbtUint: + appendStr("u"); + break; + case EbtBool: + appendStr("b"); + break; + case EbtFloat: + default: + break; + } + if (isVector()) { + appendStr("vec"); + appendInt(vectorSize); + } else { + appendStr("mat"); + appendInt(matrixCols); + appendStr("x"); + appendInt(matrixRows); + } + } else if (isStruct() && structure) { + appendStr(" "); + appendStr(structName.c_str()); + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString(syntactic, getQualifiers, getPrecision, getType, (*structure)[i].type->getFieldName())); + hasHiddenMember = false; + } + } + appendStr("}"); + } else { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("double"); + break; + case EbtInt: + appendStr("int"); + break; + case EbtUint: + appendStr("uint"); + break; + case EbtBool: + appendStr("bool"); + break; + case EbtFloat: + appendStr("float"); + break; + default: + appendStr("unexpected"); + break; + } + } + if (name.length() > 0) { + appendStr(" "); + appendStr(name.c_str()); + } + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr("[]"); + else { + if (size == UnsizedArraySize) { + appendStr("["); + if (i == 0) + appendInt(arraySizes->getImplicitSize()); + appendStr("]"); + } + else { + appendStr("["); + appendInt(arraySizes->getDimSize(i)); + appendStr("]"); + } + } + } + } + } + else { + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr(" runtime-sized array of"); + else { + if (size == UnsizedArraySize) { + appendStr(" unsized"); + if (i == 0) { + appendStr(" "); + appendInt(arraySizes->getImplicitSize()); + } + } + else { + appendStr(" "); + appendInt(arraySizes->getDimSize(i)); + } + appendStr("-element array of"); + } + } + } + if (isParameterized()) { + appendStr("<"); + for (int i = 0; i < (int)typeParameters->getNumDims(); ++i) { + appendInt(typeParameters->getDimSize(i)); + if (i != (int)typeParameters->getNumDims() - 1) + appendStr(", "); + } + appendStr(">"); + } + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isMatrix()) { + appendStr(" "); + appendInt(matrixCols); + appendStr("X"); + appendInt(matrixRows); + appendStr(" matrix of"); + } + else if (isVector()) { + appendStr(" "); + appendInt(vectorSize); + appendStr("-component vector of"); + } + + appendStr(" "); + typeString.append(getBasicTypeString()); + + if (qualifier.builtIn != EbvNone) { + appendStr(" "); + appendStr(getBuiltInVariableString()); + } + + // Add struct/block members + if (isStruct() && structure) { + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString()); + typeString.append(" "); + typeString.append((*structure)[i].type->getFieldName()); + hasHiddenMember = false; + } + } + appendStr("}"); + } + } + } + + return typeString; + } + + TString getBasicTypeString() const + { + if (basicType == EbtSampler) + return sampler.getString(); + else + return getBasicString(); + } + + const char* getStorageQualifierString() const { return GetStorageQualifierString(qualifier.storage); } + const char* getBuiltInVariableString() const { return GetBuiltInVariableString(qualifier.builtIn); } + const char* getPrecisionQualifierString() const { return GetPrecisionQualifierString(qualifier.precision); } +#endif + + const TTypeList* getStruct() const { assert(isStruct()); return structure; } + void setStruct(TTypeList* s) { assert(isStruct()); structure = s; } + TTypeList* getWritableStruct() const { assert(isStruct()); return structure; } // This should only be used when known to not be sharing with other threads + void setBasicType(const TBasicType& t) { basicType = t; } + + int computeNumComponents() const + { + int components = 0; + + if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) { + for (TTypeList::const_iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) + components += ((*tl).type)->computeNumComponents(); + } else if (matrixCols) + components = matrixCols * matrixRows; + else + components = vectorSize; + + if (arraySizes != nullptr) { + components *= arraySizes->getCumulativeSize(); + } + + return components; + } + + // append this type's mangled name to the passed in 'name' + void appendMangledName(TString& name) const + { + buildMangledName(name); + name += ';' ; + } + + // These variables are inconsistently declared inside and outside of gl_PerVertex in glslang right now. + // They are declared inside of 'in gl_PerVertex', but sitting as standalone when they are 'out'puts. + bool isInconsistentGLPerVertexMember(const TString& name) const + { + if (name == "gl_SecondaryPositionNV" || + name == "gl_PositionPerViewNV") + return true; + return false; + } + + + // Do two structure types match? They could be declared independently, + // in different places, but still might satisfy the definition of matching. + // From the spec: + // + // "Structures must have the same name, sequence of type names, and + // type definitions, and member names to be considered the same type. + // This rule applies recursively for nested or embedded types." + // + // If type mismatch in structure, return member indices through lpidx and rpidx. + // If matching members for either block are exhausted, return -1 for exhausted + // block and the index of the unmatched member. Otherwise return {-1,-1}. + // + bool sameStructType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + // Initialize error to general type mismatch. + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + + // Most commonly, they are both nullptr, or the same pointer to the same actual structure + // TODO: Why return true when neither types are structures? + if ((!isStruct() && !right.isStruct()) || + (isStruct() && right.isStruct() && structure == right.structure)) + return true; + + if (!isStruct() || !right.isStruct()) + return false; + + // Structure names have to match + if (*typeName != *right.typeName) + return false; + + // There are inconsistencies with how gl_PerVertex is setup. For now ignore those as errors if they + // are known inconsistencies. + bool isGLPerVertex = *typeName == "gl_PerVertex"; + + // Both being nullptr was caught above, now they both have to be structures of the same number of elements + if (lpidx == nullptr && + (structure->size() != right.structure->size() && !isGLPerVertex)) { + return false; + } + + // Compare the names and types of all the members, which have to match + for (size_t li = 0, ri = 0; li < structure->size() || ri < right.structure->size(); ++li, ++ri) { + if (lpidx != nullptr) { + *lpidx = static_cast(li); + *rpidx = static_cast(ri); + } + if (li < structure->size() && ri < right.structure->size()) { + if ((*structure)[li].type->getFieldName() == (*right.structure)[ri].type->getFieldName()) { + if (*(*structure)[li].type != *(*right.structure)[ri].type) + return false; + } else { + // Skip hidden members + if ((*structure)[li].type->hiddenMember()) { + ri--; + continue; + } else if ((*right.structure)[ri].type->hiddenMember()) { + li--; + continue; + } + // If one of the members is something that's inconsistently declared, skip over it + // for now. + if (isGLPerVertex) { + if (isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + ri--; + continue; + } else if (isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + li--; + continue; + } + } else { + return false; + } + } + // If we get here, then there should only be inconsistently declared members left + } else if (li < structure->size()) { + if (!(*structure)[li].type->hiddenMember() && !isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + if (lpidx != nullptr) { + *rpidx = -1; + } + return false; + } + } else { + if (!(*right.structure)[ri].type->hiddenMember() && !isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + if (lpidx != nullptr) { + *lpidx = -1; + } + return false; + } + } + } + + return true; + } + + bool sameReferenceType(const TType& right) const + { + if (isReference() != right.isReference()) + return false; + + if (!isReference() && !right.isReference()) + return true; + + assert(referentType != nullptr); + assert(right.referentType != nullptr); + + if (referentType == right.referentType) + return true; + + return *referentType == *right.referentType; + } + + // See if two types match, in all aspects except arrayness + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return basicType == right.basicType && sameElementShape(right, lpidx, rpidx); + } + + // See if two type's arrayness match + bool sameArrayness(const TType& right) const + { + return ((arraySizes == nullptr && right.arraySizes == nullptr) || + (arraySizes != nullptr && right.arraySizes != nullptr && *arraySizes == *right.arraySizes)); + } + + // See if two type's arrayness match in everything except their outer dimension + bool sameInnerArrayness(const TType& right) const + { + assert(arraySizes != nullptr && right.arraySizes != nullptr); + return arraySizes->sameInnerArrayness(*right.arraySizes); + } + + // See if two type's parameters match + bool sameTypeParameters(const TType& right) const + { + return ((typeParameters == nullptr && right.typeParameters == nullptr) || + (typeParameters != nullptr && right.typeParameters != nullptr && *typeParameters == *right.typeParameters)); + } + +#ifndef GLSLANG_WEB + // See if two type's SPIR-V type contents match + bool sameSpirvType(const TType& right) const + { + return ((spirvType == nullptr && right.spirvType == nullptr) || + (spirvType != nullptr && right.spirvType != nullptr && *spirvType == *right.spirvType)); + } +#endif + + // See if two type's elements match in all ways except basic type + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementShape(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return ((basicType != EbtSampler && right.basicType != EbtSampler) || sampler == right.sampler) && + vectorSize == right.vectorSize && + matrixCols == right.matrixCols && + matrixRows == right.matrixRows && + vector1 == right.vector1 && + isCoopMat() == right.isCoopMat() && + sameStructType(right, lpidx, rpidx) && + sameReferenceType(right); + } + + // See if a cooperative matrix type parameter with unspecified parameters is + // an OK function parameter + bool coopMatParameterOK(const TType& right) const + { + return isCoopMat() && right.isCoopMat() && (getBasicType() == right.getBasicType()) && + typeParameters == nullptr && right.typeParameters != nullptr; + } + + bool sameCoopMatBaseType(const TType &right) const { + bool rv = coopmat && right.coopmat; + if (getBasicType() == EbtFloat || getBasicType() == EbtFloat16) + rv = right.getBasicType() == EbtFloat || right.getBasicType() == EbtFloat16; + else if (getBasicType() == EbtUint || getBasicType() == EbtUint8) + rv = right.getBasicType() == EbtUint || right.getBasicType() == EbtUint8; + else if (getBasicType() == EbtInt || getBasicType() == EbtInt8) + rv = right.getBasicType() == EbtInt || right.getBasicType() == EbtInt8; + else + rv = false; + return rv; + } + + + // See if two types match in all ways (just the actual type, not qualification) + bool operator==(const TType& right) const + { +#ifndef GLSLANG_WEB + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right) && sameSpirvType(right); +#else + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right); +#endif + } + + bool operator!=(const TType& right) const + { + return ! operator==(right); + } + + unsigned int getBufferReferenceAlignment() const + { +#ifndef GLSLANG_WEB + if (getBasicType() == glslang::EbtReference) { + return getReferentType()->getQualifier().hasBufferReferenceAlign() ? + (1u << getReferentType()->getQualifier().layoutBufferReferenceAlign) : 16u; + } +#endif + return 0; + } + +#ifndef GLSLANG_WEB + const TSpirvType& getSpirvType() const { assert(spirvType); return *spirvType; } +#endif + +protected: + // Require consumer to pick between deep copy and shallow copy. + TType(const TType& type); + TType& operator=(const TType& type); + + // Recursively copy a type graph, while preserving the graph-like + // quality. That is, don't make more than one copy of a structure that + // gets reused multiple times in the type graph. + void deepCopy(const TType& copyOf, TMap& copiedMap) + { + shallowCopy(copyOf); + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + if (copyOf.qualifier.spirvDecorate) { + qualifier.spirvDecorate = new TSpirvDecorate; + *qualifier.spirvDecorate = *copyOf.qualifier.spirvDecorate; + } + + if (copyOf.spirvType) { + spirvType = new TSpirvType; + *spirvType = *copyOf.spirvType; + } +#endif + + if (copyOf.arraySizes) { + arraySizes = new TArraySizes; + *arraySizes = *copyOf.arraySizes; + } + + if (copyOf.typeParameters) { + typeParameters = new TArraySizes; + *typeParameters = *copyOf.typeParameters; + } + + if (copyOf.isStruct() && copyOf.structure) { + auto prevCopy = copiedMap.find(copyOf.structure); + if (prevCopy != copiedMap.end()) + structure = prevCopy->second; + else { + structure = new TTypeList; + copiedMap[copyOf.structure] = structure; + for (unsigned int i = 0; i < copyOf.structure->size(); ++i) { + TTypeLoc typeLoc; + typeLoc.loc = (*copyOf.structure)[i].loc; + typeLoc.type = new TType(); + typeLoc.type->deepCopy(*(*copyOf.structure)[i].type, copiedMap); + structure->push_back(typeLoc); + } + } + } + + if (copyOf.fieldName) + fieldName = NewPoolTString(copyOf.fieldName->c_str()); + if (copyOf.typeName) + typeName = NewPoolTString(copyOf.typeName->c_str()); + } + + + void buildMangledName(TString&) const; + + TBasicType basicType : 8; + int vectorSize : 4; // 1 means either scalar or 1-component vector; see vector1 to disambiguate. + int matrixCols : 4; + int matrixRows : 4; + bool vector1 : 1; // Backward-compatible tracking of a 1-component vector distinguished from a scalar. + // GLSL 4.5 never has a 1-component vector; so this will always be false until such + // functionality is added. + // HLSL does have a 1-component vectors, so this will be true to disambiguate + // from a scalar. + bool coopmat : 1; + TQualifier qualifier; + + TArraySizes* arraySizes; // nullptr unless an array; can be shared across types + // A type can't be both a structure (EbtStruct/EbtBlock) and a reference (EbtReference), so + // conserve space by making these a union + union { + TTypeList* structure; // invalid unless this is a struct; can be shared across types + TType *referentType; // invalid unless this is an EbtReference + }; + TString *fieldName; // for structure field names + TString *typeName; // for structure type name + TSampler sampler; + TArraySizes* typeParameters;// nullptr unless a parameterized type; can be shared across types +#ifndef GLSLANG_WEB + TSpirvType* spirvType; // SPIR-V type defined by spirv_type directive +#endif +}; + +} // end namespace glslang + +#endif // _TYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/arrays.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/arrays.h new file mode 100644 index 0000000..7f047d9 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/arrays.h @@ -0,0 +1,341 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Implement types for tracking GLSL arrays, arrays of arrays, etc. +// + +#ifndef _ARRAYS_INCLUDED +#define _ARRAYS_INCLUDED + +#include + +namespace glslang { + +// This is used to mean there is no size yet (unsized), it is waiting to get a size from somewhere else. +const int UnsizedArraySize = 0; + +class TIntermTyped; +extern bool SameSpecializationConstants(TIntermTyped*, TIntermTyped*); + +// Specialization constants need both a nominal size and a node that defines +// the specialization constant being used. Array types are the same when their +// size and specialization constant nodes are the same. +struct TArraySize { + unsigned int size; + TIntermTyped* node; // nullptr means no specialization constant node + bool operator==(const TArraySize& rhs) const + { + if (size != rhs.size) + return false; + if (node == nullptr || rhs.node == nullptr) + return node == rhs.node; + + return SameSpecializationConstants(node, rhs.node); + } +}; + +// +// TSmallArrayVector is used as the container for the set of sizes in TArraySizes. +// It has generic-container semantics, while TArraySizes has array-of-array semantics. +// That is, TSmallArrayVector should be more focused on mechanism and TArraySizes on policy. +// +struct TSmallArrayVector { + // + // TODO: memory: TSmallArrayVector is intended to be smaller. + // Almost all arrays could be handled by two sizes each fitting + // in 16 bits, needing a real vector only in the cases where there + // are more than 3 sizes or a size needing more than 16 bits. + // + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSmallArrayVector() : sizes(nullptr) { } + virtual ~TSmallArrayVector() { dealloc(); } + + // For breaking into two non-shared copies, independently modifiable. + TSmallArrayVector& operator=(const TSmallArrayVector& from) + { + if (from.sizes == nullptr) + sizes = nullptr; + else { + alloc(); + *sizes = *from.sizes; + } + + return *this; + } + + int size() const + { + if (sizes == nullptr) + return 0; + return (int)sizes->size(); + } + + unsigned int frontSize() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().size; + } + + TIntermTyped* frontNode() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().node; + } + + void changeFront(unsigned int s) + { + assert(sizes != nullptr); + // this should only happen for implicitly sized arrays, not specialization constants + assert(sizes->front().node == nullptr); + sizes->front().size = s; + } + + void push_back(unsigned int e, TIntermTyped* n) + { + alloc(); + TArraySize pair = { e, n }; + sizes->push_back(pair); + } + + void push_back(const TSmallArrayVector& newDims) + { + alloc(); + sizes->insert(sizes->end(), newDims.sizes->begin(), newDims.sizes->end()); + } + + void pop_front() + { + assert(sizes != nullptr && sizes->size() > 0); + if (sizes->size() == 1) + dealloc(); + else + sizes->erase(sizes->begin()); + } + + // 'this' should currently not be holding anything, and copyNonFront + // will make it hold a copy of all but the first element of rhs. + // (This would be useful for making a type that is dereferenced by + // one dimension.) + void copyNonFront(const TSmallArrayVector& rhs) + { + assert(sizes == nullptr); + if (rhs.size() > 1) { + alloc(); + sizes->insert(sizes->begin(), rhs.sizes->begin() + 1, rhs.sizes->end()); + } + } + + unsigned int getDimSize(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].size; + } + + void setDimSize(int i, unsigned int size) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + assert((*sizes)[i].node == nullptr); + (*sizes)[i].size = size; + } + + TIntermTyped* getDimNode(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].node; + } + + bool operator==(const TSmallArrayVector& rhs) const + { + if (sizes == nullptr && rhs.sizes == nullptr) + return true; + if (sizes == nullptr || rhs.sizes == nullptr) + return false; + return *sizes == *rhs.sizes; + } + bool operator!=(const TSmallArrayVector& rhs) const { return ! operator==(rhs); } + +protected: + TSmallArrayVector(const TSmallArrayVector&); + + void alloc() + { + if (sizes == nullptr) + sizes = new TVector; + } + void dealloc() + { + delete sizes; + sizes = nullptr; + } + + TVector* sizes; // will either hold such a pointer, or in the future, hold the two array sizes +}; + +// +// Represent an array, or array of arrays, to arbitrary depth. This is not +// done through a hierarchy of types in a type tree, rather all contiguous arrayness +// in the type hierarchy is localized into this single cumulative object. +// +// The arrayness in TTtype is a pointer, so that it can be non-allocated and zero +// for the vast majority of types that are non-array types. +// +// Order Policy: these are all identical: +// - left to right order within a contiguous set of ...[..][..][..]... in the source language +// - index order 0, 1, 2, ... within the 'sizes' member below +// - outer-most to inner-most +// +struct TArraySizes { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TArraySizes() : implicitArraySize(1), variablyIndexed(false) { } + + // For breaking into two non-shared copies, independently modifiable. + TArraySizes& operator=(const TArraySizes& from) + { + implicitArraySize = from.implicitArraySize; + variablyIndexed = from.variablyIndexed; + sizes = from.sizes; + + return *this; + } + + // translate from array-of-array semantics to container semantics + int getNumDims() const { return sizes.size(); } + int getDimSize(int dim) const { return sizes.getDimSize(dim); } + TIntermTyped* getDimNode(int dim) const { return sizes.getDimNode(dim); } + void setDimSize(int dim, int size) { sizes.setDimSize(dim, size); } + int getOuterSize() const { return sizes.frontSize(); } + TIntermTyped* getOuterNode() const { return sizes.frontNode(); } + int getCumulativeSize() const + { + int size = 1; + for (int d = 0; d < sizes.size(); ++d) { + // this only makes sense in paths that have a known array size + assert(sizes.getDimSize(d) != UnsizedArraySize); + size *= sizes.getDimSize(d); + } + return size; + } + void addInnerSize() { addInnerSize((unsigned)UnsizedArraySize); } + void addInnerSize(int s) { addInnerSize((unsigned)s, nullptr); } + void addInnerSize(int s, TIntermTyped* n) { sizes.push_back((unsigned)s, n); } + void addInnerSize(TArraySize pair) { + sizes.push_back(pair.size, pair.node); + } + void addInnerSizes(const TArraySizes& s) { sizes.push_back(s.sizes); } + void changeOuterSize(int s) { sizes.changeFront((unsigned)s); } + int getImplicitSize() const { return implicitArraySize; } + void updateImplicitSize(int s) { implicitArraySize = std::max(implicitArraySize, s); } + bool isInnerUnsized() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + return true; + } + + return false; + } + bool clearInnerUnsized() + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + setDimSize(d, 1); + } + + return false; + } + bool isInnerSpecialization() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimNode(d) != nullptr) + return true; + } + + return false; + } + bool isOuterSpecialization() + { + return sizes.getDimNode(0) != nullptr; + } + + bool hasUnsized() const { return getOuterSize() == UnsizedArraySize || isInnerUnsized(); } + bool isSized() const { return getOuterSize() != UnsizedArraySize; } + void dereference() { sizes.pop_front(); } + void copyDereferenced(const TArraySizes& rhs) + { + assert(sizes.size() == 0); + if (rhs.sizes.size() > 1) + sizes.copyNonFront(rhs.sizes); + } + + bool sameInnerArrayness(const TArraySizes& rhs) const + { + if (sizes.size() != rhs.sizes.size()) + return false; + + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) != rhs.sizes.getDimSize(d) || + sizes.getDimNode(d) != rhs.sizes.getDimNode(d)) + return false; + } + + return true; + } + + void setVariablyIndexed() { variablyIndexed = true; } + bool isVariablyIndexed() const { return variablyIndexed; } + + bool operator==(const TArraySizes& rhs) const { return sizes == rhs.sizes; } + bool operator!=(const TArraySizes& rhs) const { return sizes != rhs.sizes; } + +protected: + TSmallArrayVector sizes; + + TArraySizes(const TArraySizes&); + + // For tracking maximum referenced compile-time constant index. + // Applies only to the outer-most dimension. Potentially becomes + // the implicit size of the array, if not variably indexed and + // otherwise legal. + int implicitArraySize; + bool variablyIndexed; // true if array is indexed with a non compile-time constant +}; + +} // end namespace glslang + +#endif // _ARRAYS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/glslang_c_interface.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/glslang_c_interface.h new file mode 100644 index 0000000..f540f26 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/glslang_c_interface.h @@ -0,0 +1,279 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef GLSLANG_C_IFACE_H_INCLUDED +#define GLSLANG_C_IFACE_H_INCLUDED + +#include +#include + +#include "glslang_c_shader_types.h" + +typedef struct glslang_shader_s glslang_shader_t; +typedef struct glslang_program_s glslang_program_t; + +/* TLimits counterpart */ +typedef struct glslang_limits_s { + bool non_inductive_for_loops; + bool while_loops; + bool do_while_loops; + bool general_uniform_indexing; + bool general_attribute_matrix_vector_indexing; + bool general_varying_indexing; + bool general_sampler_indexing; + bool general_variable_indexing; + bool general_constant_matrix_vector_indexing; +} glslang_limits_t; + +/* TBuiltInResource counterpart */ +typedef struct glslang_resource_s { + int max_lights; + int max_clip_planes; + int max_texture_units; + int max_texture_coords; + int max_vertex_attribs; + int max_vertex_uniform_components; + int max_varying_floats; + int max_vertex_texture_image_units; + int max_combined_texture_image_units; + int max_texture_image_units; + int max_fragment_uniform_components; + int max_draw_buffers; + int max_vertex_uniform_vectors; + int max_varying_vectors; + int max_fragment_uniform_vectors; + int max_vertex_output_vectors; + int max_fragment_input_vectors; + int min_program_texel_offset; + int max_program_texel_offset; + int max_clip_distances; + int max_compute_work_group_count_x; + int max_compute_work_group_count_y; + int max_compute_work_group_count_z; + int max_compute_work_group_size_x; + int max_compute_work_group_size_y; + int max_compute_work_group_size_z; + int max_compute_uniform_components; + int max_compute_texture_image_units; + int max_compute_image_uniforms; + int max_compute_atomic_counters; + int max_compute_atomic_counter_buffers; + int max_varying_components; + int max_vertex_output_components; + int max_geometry_input_components; + int max_geometry_output_components; + int max_fragment_input_components; + int max_image_units; + int max_combined_image_units_and_fragment_outputs; + int max_combined_shader_output_resources; + int max_image_samples; + int max_vertex_image_uniforms; + int max_tess_control_image_uniforms; + int max_tess_evaluation_image_uniforms; + int max_geometry_image_uniforms; + int max_fragment_image_uniforms; + int max_combined_image_uniforms; + int max_geometry_texture_image_units; + int max_geometry_output_vertices; + int max_geometry_total_output_components; + int max_geometry_uniform_components; + int max_geometry_varying_components; + int max_tess_control_input_components; + int max_tess_control_output_components; + int max_tess_control_texture_image_units; + int max_tess_control_uniform_components; + int max_tess_control_total_output_components; + int max_tess_evaluation_input_components; + int max_tess_evaluation_output_components; + int max_tess_evaluation_texture_image_units; + int max_tess_evaluation_uniform_components; + int max_tess_patch_components; + int max_patch_vertices; + int max_tess_gen_level; + int max_viewports; + int max_vertex_atomic_counters; + int max_tess_control_atomic_counters; + int max_tess_evaluation_atomic_counters; + int max_geometry_atomic_counters; + int max_fragment_atomic_counters; + int max_combined_atomic_counters; + int max_atomic_counter_bindings; + int max_vertex_atomic_counter_buffers; + int max_tess_control_atomic_counter_buffers; + int max_tess_evaluation_atomic_counter_buffers; + int max_geometry_atomic_counter_buffers; + int max_fragment_atomic_counter_buffers; + int max_combined_atomic_counter_buffers; + int max_atomic_counter_buffer_size; + int max_transform_feedback_buffers; + int max_transform_feedback_interleaved_components; + int max_cull_distances; + int max_combined_clip_and_cull_distances; + int max_samples; + int max_mesh_output_vertices_nv; + int max_mesh_output_primitives_nv; + int max_mesh_work_group_size_x_nv; + int max_mesh_work_group_size_y_nv; + int max_mesh_work_group_size_z_nv; + int max_task_work_group_size_x_nv; + int max_task_work_group_size_y_nv; + int max_task_work_group_size_z_nv; + int max_mesh_view_count_nv; + int max_mesh_output_vertices_ext; + int max_mesh_output_primitives_ext; + int max_mesh_work_group_size_x_ext; + int max_mesh_work_group_size_y_ext; + int max_mesh_work_group_size_z_ext; + int max_task_work_group_size_x_ext; + int max_task_work_group_size_y_ext; + int max_task_work_group_size_z_ext; + int max_mesh_view_count_ext; + int maxDualSourceDrawBuffersEXT; + + glslang_limits_t limits; +} glslang_resource_t; + +typedef struct glslang_input_s { + glslang_source_t language; + glslang_stage_t stage; + glslang_client_t client; + glslang_target_client_version_t client_version; + glslang_target_language_t target_language; + glslang_target_language_version_t target_language_version; + /** Shader source code */ + const char* code; + int default_version; + glslang_profile_t default_profile; + int force_default_version_and_profile; + int forward_compatible; + glslang_messages_t messages; + const glslang_resource_t* resource; +} glslang_input_t; + +/* Inclusion result structure allocated by C include_local/include_system callbacks */ +typedef struct glsl_include_result_s { + /* Header file name or NULL if inclusion failed */ + const char* header_name; + + /* Header contents or NULL */ + const char* header_data; + size_t header_length; + +} glsl_include_result_t; + +/* Callback for local file inclusion */ +typedef glsl_include_result_t* (*glsl_include_local_func)(void* ctx, const char* header_name, const char* includer_name, + size_t include_depth); + +/* Callback for system file inclusion */ +typedef glsl_include_result_t* (*glsl_include_system_func)(void* ctx, const char* header_name, + const char* includer_name, size_t include_depth); + +/* Callback for include result destruction */ +typedef int (*glsl_free_include_result_func)(void* ctx, glsl_include_result_t* result); + +/* Collection of callbacks for GLSL preprocessor */ +typedef struct glsl_include_callbacks_s { + glsl_include_system_func include_system; + glsl_include_local_func include_local; + glsl_free_include_result_func free_include_result; +} glsl_include_callbacks_t; + +/* SpvOptions counterpart */ +typedef struct glslang_spv_options_s { + bool generate_debug_info; + bool strip_debug_info; + bool disable_optimizer; + bool optimize_size; + bool disassemble; + bool validate; + bool emit_nonsemantic_shader_debug_info; + bool emit_nonsemantic_shader_debug_source; +} glslang_spv_options_t; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +GLSLANG_EXPORT int glslang_initialize_process(); +GLSLANG_EXPORT void glslang_finalize_process(); + +GLSLANG_EXPORT glslang_shader_t* glslang_shader_create(const glslang_input_t* input); +GLSLANG_EXPORT void glslang_shader_delete(glslang_shader_t* shader); +GLSLANG_EXPORT void glslang_shader_set_preamble(glslang_shader_t* shader, const char* s); +GLSLANG_EXPORT void glslang_shader_shift_binding(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base); +GLSLANG_EXPORT void glslang_shader_shift_binding_for_set(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base, unsigned int set); +GLSLANG_EXPORT void glslang_shader_set_options(glslang_shader_t* shader, int options); // glslang_shader_options_t +GLSLANG_EXPORT void glslang_shader_set_glsl_version(glslang_shader_t* shader, int version); +GLSLANG_EXPORT int glslang_shader_preprocess(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT int glslang_shader_parse(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT const char* glslang_shader_get_preprocessed_code(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_log(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_debug_log(glslang_shader_t* shader); + +GLSLANG_EXPORT glslang_program_t* glslang_program_create(); +GLSLANG_EXPORT void glslang_program_delete(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_add_shader(glslang_program_t* program, glslang_shader_t* shader); +GLSLANG_EXPORT int glslang_program_link(glslang_program_t* program, int messages); // glslang_messages_t +GLSLANG_EXPORT void glslang_program_add_source_text(glslang_program_t* program, glslang_stage_t stage, const char* text, size_t len); +GLSLANG_EXPORT void glslang_program_set_source_file(glslang_program_t* program, glslang_stage_t stage, const char* file); +GLSLANG_EXPORT int glslang_program_map_io(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_generate(glslang_program_t* program, glslang_stage_t stage); +GLSLANG_EXPORT void glslang_program_SPIRV_generate_with_options(glslang_program_t* program, glslang_stage_t stage, glslang_spv_options_t* spv_options); +GLSLANG_EXPORT size_t glslang_program_SPIRV_get_size(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_get(glslang_program_t* program, unsigned int*); +GLSLANG_EXPORT unsigned int* glslang_program_SPIRV_get_ptr(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_SPIRV_get_messages(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_log(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_debug_log(glslang_program_t* program); + +#ifdef __cplusplus +} +#endif + +#endif /* #ifdef GLSLANG_C_IFACE_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/glslang_c_shader_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/glslang_c_shader_types.h new file mode 100644 index 0000000..9bc2114 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/glslang_c_shader_types.h @@ -0,0 +1,227 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef C_SHADER_TYPES_H_INCLUDED +#define C_SHADER_TYPES_H_INCLUDED + +#define LAST_ELEMENT_MARKER(x) x + +/* EShLanguage counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX, + GLSLANG_STAGE_TESSCONTROL, + GLSLANG_STAGE_TESSEVALUATION, + GLSLANG_STAGE_GEOMETRY, + GLSLANG_STAGE_FRAGMENT, + GLSLANG_STAGE_COMPUTE, + GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_RAYGEN_NV = GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_INTERSECT_NV = GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_ANYHIT_NV = GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_CLOSESTHIT_NV = GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_MISS, + GLSLANG_STAGE_MISS_NV = GLSLANG_STAGE_MISS, + GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_CALLABLE_NV = GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_TASK, + GLSLANG_STAGE_TASK_NV = GLSLANG_STAGE_TASK, + GLSLANG_STAGE_MESH, + GLSLANG_STAGE_MESH_NV = GLSLANG_STAGE_MESH, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_COUNT), +} glslang_stage_t; // would be better as stage, but this is ancient now + +/* EShLanguageMask counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX_MASK = (1 << GLSLANG_STAGE_VERTEX), + GLSLANG_STAGE_TESSCONTROL_MASK = (1 << GLSLANG_STAGE_TESSCONTROL), + GLSLANG_STAGE_TESSEVALUATION_MASK = (1 << GLSLANG_STAGE_TESSEVALUATION), + GLSLANG_STAGE_GEOMETRY_MASK = (1 << GLSLANG_STAGE_GEOMETRY), + GLSLANG_STAGE_FRAGMENT_MASK = (1 << GLSLANG_STAGE_FRAGMENT), + GLSLANG_STAGE_COMPUTE_MASK = (1 << GLSLANG_STAGE_COMPUTE), + GLSLANG_STAGE_RAYGEN_MASK = (1 << GLSLANG_STAGE_RAYGEN), + GLSLANG_STAGE_RAYGEN_NV_MASK = GLSLANG_STAGE_RAYGEN_MASK, + GLSLANG_STAGE_INTERSECT_MASK = (1 << GLSLANG_STAGE_INTERSECT), + GLSLANG_STAGE_INTERSECT_NV_MASK = GLSLANG_STAGE_INTERSECT_MASK, + GLSLANG_STAGE_ANYHIT_MASK = (1 << GLSLANG_STAGE_ANYHIT), + GLSLANG_STAGE_ANYHIT_NV_MASK = GLSLANG_STAGE_ANYHIT_MASK, + GLSLANG_STAGE_CLOSESTHIT_MASK = (1 << GLSLANG_STAGE_CLOSESTHIT), + GLSLANG_STAGE_CLOSESTHIT_NV_MASK = GLSLANG_STAGE_CLOSESTHIT_MASK, + GLSLANG_STAGE_MISS_MASK = (1 << GLSLANG_STAGE_MISS), + GLSLANG_STAGE_MISS_NV_MASK = GLSLANG_STAGE_MISS_MASK, + GLSLANG_STAGE_CALLABLE_MASK = (1 << GLSLANG_STAGE_CALLABLE), + GLSLANG_STAGE_CALLABLE_NV_MASK = GLSLANG_STAGE_CALLABLE_MASK, + GLSLANG_STAGE_TASK_MASK = (1 << GLSLANG_STAGE_TASK), + GLSLANG_STAGE_TASK_NV_MASK = GLSLANG_STAGE_TASK_MASK, + GLSLANG_STAGE_MESH_MASK = (1 << GLSLANG_STAGE_MESH), + GLSLANG_STAGE_MESH_NV_MASK = GLSLANG_STAGE_MESH_MASK, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_MASK_COUNT), +} glslang_stage_mask_t; + +/* EShSource counterpart */ +typedef enum { + GLSLANG_SOURCE_NONE, + GLSLANG_SOURCE_GLSL, + GLSLANG_SOURCE_HLSL, + LAST_ELEMENT_MARKER(GLSLANG_SOURCE_COUNT), +} glslang_source_t; + +/* EShClient counterpart */ +typedef enum { + GLSLANG_CLIENT_NONE, + GLSLANG_CLIENT_VULKAN, + GLSLANG_CLIENT_OPENGL, + LAST_ELEMENT_MARKER(GLSLANG_CLIENT_COUNT), +} glslang_client_t; + +/* EShTargetLanguage counterpart */ +typedef enum { + GLSLANG_TARGET_NONE, + GLSLANG_TARGET_SPV, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_COUNT), +} glslang_target_language_t; + +/* SH_TARGET_ClientVersion counterpart */ +typedef enum { + GLSLANG_TARGET_VULKAN_1_0 = (1 << 22), + GLSLANG_TARGET_VULKAN_1_1 = (1 << 22) | (1 << 12), + GLSLANG_TARGET_VULKAN_1_2 = (1 << 22) | (2 << 12), + GLSLANG_TARGET_VULKAN_1_3 = (1 << 22) | (3 << 12), + GLSLANG_TARGET_OPENGL_450 = 450, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_CLIENT_VERSION_COUNT = 5), +} glslang_target_client_version_t; + +/* SH_TARGET_LanguageVersion counterpart */ +typedef enum { + GLSLANG_TARGET_SPV_1_0 = (1 << 16), + GLSLANG_TARGET_SPV_1_1 = (1 << 16) | (1 << 8), + GLSLANG_TARGET_SPV_1_2 = (1 << 16) | (2 << 8), + GLSLANG_TARGET_SPV_1_3 = (1 << 16) | (3 << 8), + GLSLANG_TARGET_SPV_1_4 = (1 << 16) | (4 << 8), + GLSLANG_TARGET_SPV_1_5 = (1 << 16) | (5 << 8), + GLSLANG_TARGET_SPV_1_6 = (1 << 16) | (6 << 8), + LAST_ELEMENT_MARKER(GLSLANG_TARGET_LANGUAGE_VERSION_COUNT = 7), +} glslang_target_language_version_t; + +/* EShExecutable counterpart */ +typedef enum { GLSLANG_EX_VERTEX_FRAGMENT, GLSLANG_EX_FRAGMENT } glslang_executable_t; + +// EShOptimizationLevel counterpart +// This enum is not used in the current C interface, but could be added at a later date. +// GLSLANG_OPT_NONE is the current default. +typedef enum { + GLSLANG_OPT_NO_GENERATION, + GLSLANG_OPT_NONE, + GLSLANG_OPT_SIMPLE, + GLSLANG_OPT_FULL, + LAST_ELEMENT_MARKER(GLSLANG_OPT_LEVEL_COUNT), +} glslang_optimization_level_t; + +/* EShTextureSamplerTransformMode counterpart */ +typedef enum { + GLSLANG_TEX_SAMP_TRANS_KEEP, + GLSLANG_TEX_SAMP_TRANS_UPGRADE_TEXTURE_REMOVE_SAMPLER, + LAST_ELEMENT_MARKER(GLSLANG_TEX_SAMP_TRANS_COUNT), +} glslang_texture_sampler_transform_mode_t; + +/* EShMessages counterpart */ +typedef enum { + GLSLANG_MSG_DEFAULT_BIT = 0, + GLSLANG_MSG_RELAXED_ERRORS_BIT = (1 << 0), + GLSLANG_MSG_SUPPRESS_WARNINGS_BIT = (1 << 1), + GLSLANG_MSG_AST_BIT = (1 << 2), + GLSLANG_MSG_SPV_RULES_BIT = (1 << 3), + GLSLANG_MSG_VULKAN_RULES_BIT = (1 << 4), + GLSLANG_MSG_ONLY_PREPROCESSOR_BIT = (1 << 5), + GLSLANG_MSG_READ_HLSL_BIT = (1 << 6), + GLSLANG_MSG_CASCADING_ERRORS_BIT = (1 << 7), + GLSLANG_MSG_KEEP_UNCALLED_BIT = (1 << 8), + GLSLANG_MSG_HLSL_OFFSETS_BIT = (1 << 9), + GLSLANG_MSG_DEBUG_INFO_BIT = (1 << 10), + GLSLANG_MSG_HLSL_ENABLE_16BIT_TYPES_BIT = (1 << 11), + GLSLANG_MSG_HLSL_LEGALIZATION_BIT = (1 << 12), + GLSLANG_MSG_HLSL_DX9_COMPATIBLE_BIT = (1 << 13), + GLSLANG_MSG_BUILTIN_SYMBOL_TABLE_BIT = (1 << 14), + GLSLANG_MSG_ENHANCED = (1 << 15), + LAST_ELEMENT_MARKER(GLSLANG_MSG_COUNT), +} glslang_messages_t; + +/* EShReflectionOptions counterpart */ +typedef enum { + GLSLANG_REFLECTION_DEFAULT_BIT = 0, + GLSLANG_REFLECTION_STRICT_ARRAY_SUFFIX_BIT = (1 << 0), + GLSLANG_REFLECTION_BASIC_ARRAY_SUFFIX_BIT = (1 << 1), + GLSLANG_REFLECTION_INTERMEDIATE_IOO_BIT = (1 << 2), + GLSLANG_REFLECTION_SEPARATE_BUFFERS_BIT = (1 << 3), + GLSLANG_REFLECTION_ALL_BLOCK_VARIABLES_BIT = (1 << 4), + GLSLANG_REFLECTION_UNWRAP_IO_BLOCKS_BIT = (1 << 5), + GLSLANG_REFLECTION_ALL_IO_VARIABLES_BIT = (1 << 6), + GLSLANG_REFLECTION_SHARED_STD140_SSBO_BIT = (1 << 7), + GLSLANG_REFLECTION_SHARED_STD140_UBO_BIT = (1 << 8), + LAST_ELEMENT_MARKER(GLSLANG_REFLECTION_COUNT), +} glslang_reflection_options_t; + +/* EProfile counterpart (from Versions.h) */ +typedef enum { + GLSLANG_BAD_PROFILE = 0, + GLSLANG_NO_PROFILE = (1 << 0), + GLSLANG_CORE_PROFILE = (1 << 1), + GLSLANG_COMPATIBILITY_PROFILE = (1 << 2), + GLSLANG_ES_PROFILE = (1 << 3), + LAST_ELEMENT_MARKER(GLSLANG_PROFILE_COUNT), +} glslang_profile_t; + +/* Shader options */ +typedef enum { + GLSLANG_SHADER_DEFAULT_BIT = 0, + GLSLANG_SHADER_AUTO_MAP_BINDINGS = (1 << 0), + GLSLANG_SHADER_AUTO_MAP_LOCATIONS = (1 << 1), + GLSLANG_SHADER_VULKAN_RULES_RELAXED = (1 << 2), + LAST_ELEMENT_MARKER(GLSLANG_SHADER_COUNT), +} glslang_shader_options_t; + +/* TResourceType counterpart */ +typedef enum { + GLSLANG_RESOURCE_TYPE_SAMPLER, + GLSLANG_RESOURCE_TYPE_TEXTURE, + GLSLANG_RESOURCE_TYPE_IMAGE, + GLSLANG_RESOURCE_TYPE_UBO, + GLSLANG_RESOURCE_TYPE_SSBO, + GLSLANG_RESOURCE_TYPE_UAV, + LAST_ELEMENT_MARKER(GLSLANG_RESOURCE_TYPE_COUNT), +} glslang_resource_type_t; + +#undef LAST_ELEMENT_MARKER + +#endif diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/intermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/intermediate.h new file mode 100644 index 0000000..a024002 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Include/intermediate.h @@ -0,0 +1,1850 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Definition of the in-memory high-level intermediate representation +// of shaders. This is a tree that parser creates. +// +// Nodes in the tree are defined as a hierarchy of classes derived from +// TIntermNode. Each is a node in a tree. There is no preset branching factor; +// each node can have it's own type of list of children. +// + +#ifndef __INTERMEDIATE_H +#define __INTERMEDIATE_H + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' + #pragma warning(disable : 5026) // 'glslang::TIntermUnary': move constructor was implicitly defined as deleted +#endif + +#include "../Include/Common.h" +#include "../Include/Types.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + +class TIntermediate; + +// +// Operators used by the high-level (parse tree) representation. +// +enum TOperator { + EOpNull, // if in a node, should only mean a node is still being built + EOpSequence, // denotes a list of statements, or parameters, etc. + EOpScope, // Used by debugging to denote a scoped list of statements + EOpLinkerObjects, // for aggregate node of objects the linker may need, if not reference by the rest of the AST + EOpFunctionCall, + EOpFunction, // For function definition + EOpParameters, // an aggregate listing the parameters to a function +#ifndef GLSLANG_WEB + EOpSpirvInst, +#endif + + // + // Unary operators + // + + EOpNegative, + EOpLogicalNot, + EOpVectorLogicalNot, + EOpBitwiseNot, + + EOpPostIncrement, + EOpPostDecrement, + EOpPreIncrement, + EOpPreDecrement, + + EOpCopyObject, + + EOpDeclare, // Used by debugging to force declaration of variable in correct scope + + // (u)int* -> bool + EOpConvInt8ToBool, + EOpConvUint8ToBool, + EOpConvInt16ToBool, + EOpConvUint16ToBool, + EOpConvIntToBool, + EOpConvUintToBool, + EOpConvInt64ToBool, + EOpConvUint64ToBool, + + // float* -> bool + EOpConvFloat16ToBool, + EOpConvFloatToBool, + EOpConvDoubleToBool, + + // bool -> (u)int* + EOpConvBoolToInt8, + EOpConvBoolToUint8, + EOpConvBoolToInt16, + EOpConvBoolToUint16, + EOpConvBoolToInt, + EOpConvBoolToUint, + EOpConvBoolToInt64, + EOpConvBoolToUint64, + + // bool -> float* + EOpConvBoolToFloat16, + EOpConvBoolToFloat, + EOpConvBoolToDouble, + + // int8_t -> (u)int* + EOpConvInt8ToInt16, + EOpConvInt8ToInt, + EOpConvInt8ToInt64, + EOpConvInt8ToUint8, + EOpConvInt8ToUint16, + EOpConvInt8ToUint, + EOpConvInt8ToUint64, + + // uint8_t -> (u)int* + EOpConvUint8ToInt8, + EOpConvUint8ToInt16, + EOpConvUint8ToInt, + EOpConvUint8ToInt64, + EOpConvUint8ToUint16, + EOpConvUint8ToUint, + EOpConvUint8ToUint64, + + // int8_t -> float* + EOpConvInt8ToFloat16, + EOpConvInt8ToFloat, + EOpConvInt8ToDouble, + + // uint8_t -> float* + EOpConvUint8ToFloat16, + EOpConvUint8ToFloat, + EOpConvUint8ToDouble, + + // int16_t -> (u)int* + EOpConvInt16ToInt8, + EOpConvInt16ToInt, + EOpConvInt16ToInt64, + EOpConvInt16ToUint8, + EOpConvInt16ToUint16, + EOpConvInt16ToUint, + EOpConvInt16ToUint64, + + // uint16_t -> (u)int* + EOpConvUint16ToInt8, + EOpConvUint16ToInt16, + EOpConvUint16ToInt, + EOpConvUint16ToInt64, + EOpConvUint16ToUint8, + EOpConvUint16ToUint, + EOpConvUint16ToUint64, + + // int16_t -> float* + EOpConvInt16ToFloat16, + EOpConvInt16ToFloat, + EOpConvInt16ToDouble, + + // uint16_t -> float* + EOpConvUint16ToFloat16, + EOpConvUint16ToFloat, + EOpConvUint16ToDouble, + + // int32_t -> (u)int* + EOpConvIntToInt8, + EOpConvIntToInt16, + EOpConvIntToInt64, + EOpConvIntToUint8, + EOpConvIntToUint16, + EOpConvIntToUint, + EOpConvIntToUint64, + + // uint32_t -> (u)int* + EOpConvUintToInt8, + EOpConvUintToInt16, + EOpConvUintToInt, + EOpConvUintToInt64, + EOpConvUintToUint8, + EOpConvUintToUint16, + EOpConvUintToUint64, + + // int32_t -> float* + EOpConvIntToFloat16, + EOpConvIntToFloat, + EOpConvIntToDouble, + + // uint32_t -> float* + EOpConvUintToFloat16, + EOpConvUintToFloat, + EOpConvUintToDouble, + + // int64_t -> (u)int* + EOpConvInt64ToInt8, + EOpConvInt64ToInt16, + EOpConvInt64ToInt, + EOpConvInt64ToUint8, + EOpConvInt64ToUint16, + EOpConvInt64ToUint, + EOpConvInt64ToUint64, + + // uint64_t -> (u)int* + EOpConvUint64ToInt8, + EOpConvUint64ToInt16, + EOpConvUint64ToInt, + EOpConvUint64ToInt64, + EOpConvUint64ToUint8, + EOpConvUint64ToUint16, + EOpConvUint64ToUint, + + // int64_t -> float* + EOpConvInt64ToFloat16, + EOpConvInt64ToFloat, + EOpConvInt64ToDouble, + + // uint64_t -> float* + EOpConvUint64ToFloat16, + EOpConvUint64ToFloat, + EOpConvUint64ToDouble, + + // float16_t -> (u)int* + EOpConvFloat16ToInt8, + EOpConvFloat16ToInt16, + EOpConvFloat16ToInt, + EOpConvFloat16ToInt64, + EOpConvFloat16ToUint8, + EOpConvFloat16ToUint16, + EOpConvFloat16ToUint, + EOpConvFloat16ToUint64, + + // float16_t -> float* + EOpConvFloat16ToFloat, + EOpConvFloat16ToDouble, + + // float -> (u)int* + EOpConvFloatToInt8, + EOpConvFloatToInt16, + EOpConvFloatToInt, + EOpConvFloatToInt64, + EOpConvFloatToUint8, + EOpConvFloatToUint16, + EOpConvFloatToUint, + EOpConvFloatToUint64, + + // float -> float* + EOpConvFloatToFloat16, + EOpConvFloatToDouble, + + // float64 _t-> (u)int* + EOpConvDoubleToInt8, + EOpConvDoubleToInt16, + EOpConvDoubleToInt, + EOpConvDoubleToInt64, + EOpConvDoubleToUint8, + EOpConvDoubleToUint16, + EOpConvDoubleToUint, + EOpConvDoubleToUint64, + + // float64_t -> float* + EOpConvDoubleToFloat16, + EOpConvDoubleToFloat, + + // uint64_t <-> pointer + EOpConvUint64ToPtr, + EOpConvPtrToUint64, + + // uvec2 <-> pointer + EOpConvUvec2ToPtr, + EOpConvPtrToUvec2, + + // uint64_t -> accelerationStructureEXT + EOpConvUint64ToAccStruct, + + // uvec2 -> accelerationStructureEXT + EOpConvUvec2ToAccStruct, + + // + // binary operations + // + + EOpAdd, + EOpSub, + EOpMul, + EOpDiv, + EOpMod, + EOpRightShift, + EOpLeftShift, + EOpAnd, + EOpInclusiveOr, + EOpExclusiveOr, + EOpEqual, + EOpNotEqual, + EOpVectorEqual, + EOpVectorNotEqual, + EOpLessThan, + EOpGreaterThan, + EOpLessThanEqual, + EOpGreaterThanEqual, + EOpComma, + + EOpVectorTimesScalar, + EOpVectorTimesMatrix, + EOpMatrixTimesVector, + EOpMatrixTimesScalar, + + EOpLogicalOr, + EOpLogicalXor, + EOpLogicalAnd, + + EOpIndexDirect, + EOpIndexIndirect, + EOpIndexDirectStruct, + + EOpVectorSwizzle, + + EOpMethod, + EOpScoping, + + // + // Built-in functions mapped to operators + // + + EOpRadians, + EOpDegrees, + EOpSin, + EOpCos, + EOpTan, + EOpAsin, + EOpAcos, + EOpAtan, + EOpSinh, + EOpCosh, + EOpTanh, + EOpAsinh, + EOpAcosh, + EOpAtanh, + + EOpPow, + EOpExp, + EOpLog, + EOpExp2, + EOpLog2, + EOpSqrt, + EOpInverseSqrt, + + EOpAbs, + EOpSign, + EOpFloor, + EOpTrunc, + EOpRound, + EOpRoundEven, + EOpCeil, + EOpFract, + EOpModf, + EOpMin, + EOpMax, + EOpClamp, + EOpMix, + EOpStep, + EOpSmoothStep, + + EOpIsNan, + EOpIsInf, + + EOpFma, + + EOpFrexp, + EOpLdexp, + + EOpFloatBitsToInt, + EOpFloatBitsToUint, + EOpIntBitsToFloat, + EOpUintBitsToFloat, + EOpDoubleBitsToInt64, + EOpDoubleBitsToUint64, + EOpInt64BitsToDouble, + EOpUint64BitsToDouble, + EOpFloat16BitsToInt16, + EOpFloat16BitsToUint16, + EOpInt16BitsToFloat16, + EOpUint16BitsToFloat16, + EOpPackSnorm2x16, + EOpUnpackSnorm2x16, + EOpPackUnorm2x16, + EOpUnpackUnorm2x16, + EOpPackSnorm4x8, + EOpUnpackSnorm4x8, + EOpPackUnorm4x8, + EOpUnpackUnorm4x8, + EOpPackHalf2x16, + EOpUnpackHalf2x16, + EOpPackDouble2x32, + EOpUnpackDouble2x32, + EOpPackInt2x32, + EOpUnpackInt2x32, + EOpPackUint2x32, + EOpUnpackUint2x32, + EOpPackFloat2x16, + EOpUnpackFloat2x16, + EOpPackInt2x16, + EOpUnpackInt2x16, + EOpPackUint2x16, + EOpUnpackUint2x16, + EOpPackInt4x16, + EOpUnpackInt4x16, + EOpPackUint4x16, + EOpUnpackUint4x16, + EOpPack16, + EOpPack32, + EOpPack64, + EOpUnpack32, + EOpUnpack16, + EOpUnpack8, + + EOpLength, + EOpDistance, + EOpDot, + EOpCross, + EOpNormalize, + EOpFaceForward, + EOpReflect, + EOpRefract, + + EOpMin3, + EOpMax3, + EOpMid3, + + EOpDPdx, // Fragment only + EOpDPdy, // Fragment only + EOpFwidth, // Fragment only + EOpDPdxFine, // Fragment only + EOpDPdyFine, // Fragment only + EOpFwidthFine, // Fragment only + EOpDPdxCoarse, // Fragment only + EOpDPdyCoarse, // Fragment only + EOpFwidthCoarse, // Fragment only + + EOpInterpolateAtCentroid, // Fragment only + EOpInterpolateAtSample, // Fragment only + EOpInterpolateAtOffset, // Fragment only + EOpInterpolateAtVertex, + + EOpMatrixTimesMatrix, + EOpOuterProduct, + EOpDeterminant, + EOpMatrixInverse, + EOpTranspose, + + EOpFtransform, + + EOpNoise, + + EOpEmitVertex, // geometry only + EOpEndPrimitive, // geometry only + EOpEmitStreamVertex, // geometry only + EOpEndStreamPrimitive, // geometry only + + EOpBarrier, + EOpMemoryBarrier, + EOpMemoryBarrierAtomicCounter, + EOpMemoryBarrierBuffer, + EOpMemoryBarrierImage, + EOpMemoryBarrierShared, // compute only + EOpGroupMemoryBarrier, // compute only + + EOpBallot, + EOpReadInvocation, + EOpReadFirstInvocation, + + EOpAnyInvocation, + EOpAllInvocations, + EOpAllInvocationsEqual, + + EOpSubgroupGuardStart, + EOpSubgroupBarrier, + EOpSubgroupMemoryBarrier, + EOpSubgroupMemoryBarrierBuffer, + EOpSubgroupMemoryBarrierImage, + EOpSubgroupMemoryBarrierShared, // compute only + EOpSubgroupElect, + EOpSubgroupAll, + EOpSubgroupAny, + EOpSubgroupAllEqual, + EOpSubgroupBroadcast, + EOpSubgroupBroadcastFirst, + EOpSubgroupBallot, + EOpSubgroupInverseBallot, + EOpSubgroupBallotBitExtract, + EOpSubgroupBallotBitCount, + EOpSubgroupBallotInclusiveBitCount, + EOpSubgroupBallotExclusiveBitCount, + EOpSubgroupBallotFindLSB, + EOpSubgroupBallotFindMSB, + EOpSubgroupShuffle, + EOpSubgroupShuffleXor, + EOpSubgroupShuffleUp, + EOpSubgroupShuffleDown, + EOpSubgroupAdd, + EOpSubgroupMul, + EOpSubgroupMin, + EOpSubgroupMax, + EOpSubgroupAnd, + EOpSubgroupOr, + EOpSubgroupXor, + EOpSubgroupInclusiveAdd, + EOpSubgroupInclusiveMul, + EOpSubgroupInclusiveMin, + EOpSubgroupInclusiveMax, + EOpSubgroupInclusiveAnd, + EOpSubgroupInclusiveOr, + EOpSubgroupInclusiveXor, + EOpSubgroupExclusiveAdd, + EOpSubgroupExclusiveMul, + EOpSubgroupExclusiveMin, + EOpSubgroupExclusiveMax, + EOpSubgroupExclusiveAnd, + EOpSubgroupExclusiveOr, + EOpSubgroupExclusiveXor, + EOpSubgroupClusteredAdd, + EOpSubgroupClusteredMul, + EOpSubgroupClusteredMin, + EOpSubgroupClusteredMax, + EOpSubgroupClusteredAnd, + EOpSubgroupClusteredOr, + EOpSubgroupClusteredXor, + EOpSubgroupQuadBroadcast, + EOpSubgroupQuadSwapHorizontal, + EOpSubgroupQuadSwapVertical, + EOpSubgroupQuadSwapDiagonal, + + EOpSubgroupPartition, + EOpSubgroupPartitionedAdd, + EOpSubgroupPartitionedMul, + EOpSubgroupPartitionedMin, + EOpSubgroupPartitionedMax, + EOpSubgroupPartitionedAnd, + EOpSubgroupPartitionedOr, + EOpSubgroupPartitionedXor, + EOpSubgroupPartitionedInclusiveAdd, + EOpSubgroupPartitionedInclusiveMul, + EOpSubgroupPartitionedInclusiveMin, + EOpSubgroupPartitionedInclusiveMax, + EOpSubgroupPartitionedInclusiveAnd, + EOpSubgroupPartitionedInclusiveOr, + EOpSubgroupPartitionedInclusiveXor, + EOpSubgroupPartitionedExclusiveAdd, + EOpSubgroupPartitionedExclusiveMul, + EOpSubgroupPartitionedExclusiveMin, + EOpSubgroupPartitionedExclusiveMax, + EOpSubgroupPartitionedExclusiveAnd, + EOpSubgroupPartitionedExclusiveOr, + EOpSubgroupPartitionedExclusiveXor, + + EOpSubgroupGuardStop, + + EOpMinInvocations, + EOpMaxInvocations, + EOpAddInvocations, + EOpMinInvocationsNonUniform, + EOpMaxInvocationsNonUniform, + EOpAddInvocationsNonUniform, + EOpMinInvocationsInclusiveScan, + EOpMaxInvocationsInclusiveScan, + EOpAddInvocationsInclusiveScan, + EOpMinInvocationsInclusiveScanNonUniform, + EOpMaxInvocationsInclusiveScanNonUniform, + EOpAddInvocationsInclusiveScanNonUniform, + EOpMinInvocationsExclusiveScan, + EOpMaxInvocationsExclusiveScan, + EOpAddInvocationsExclusiveScan, + EOpMinInvocationsExclusiveScanNonUniform, + EOpMaxInvocationsExclusiveScanNonUniform, + EOpAddInvocationsExclusiveScanNonUniform, + EOpSwizzleInvocations, + EOpSwizzleInvocationsMasked, + EOpWriteInvocation, + EOpMbcnt, + + EOpCubeFaceIndex, + EOpCubeFaceCoord, + EOpTime, + + EOpAtomicAdd, + EOpAtomicSubtract, + EOpAtomicMin, + EOpAtomicMax, + EOpAtomicAnd, + EOpAtomicOr, + EOpAtomicXor, + EOpAtomicExchange, + EOpAtomicCompSwap, + EOpAtomicLoad, + EOpAtomicStore, + + EOpAtomicCounterIncrement, // results in pre-increment value + EOpAtomicCounterDecrement, // results in post-decrement value + EOpAtomicCounter, + EOpAtomicCounterAdd, + EOpAtomicCounterSubtract, + EOpAtomicCounterMin, + EOpAtomicCounterMax, + EOpAtomicCounterAnd, + EOpAtomicCounterOr, + EOpAtomicCounterXor, + EOpAtomicCounterExchange, + EOpAtomicCounterCompSwap, + + EOpAny, + EOpAll, + + EOpCooperativeMatrixLoad, + EOpCooperativeMatrixStore, + EOpCooperativeMatrixMulAdd, + + EOpBeginInvocationInterlock, // Fragment only + EOpEndInvocationInterlock, // Fragment only + + EOpIsHelperInvocation, + + EOpDebugPrintf, + + // + // Branch + // + + EOpKill, // Fragment only + EOpTerminateInvocation, // Fragment only + EOpDemote, // Fragment only + EOpTerminateRayKHR, // Any-hit only + EOpIgnoreIntersectionKHR, // Any-hit only + EOpReturn, + EOpBreak, + EOpContinue, + EOpCase, + EOpDefault, + + // + // Constructors + // + + EOpConstructGuardStart, + EOpConstructInt, // these first scalar forms also identify what implicit conversion is needed + EOpConstructUint, + EOpConstructInt8, + EOpConstructUint8, + EOpConstructInt16, + EOpConstructUint16, + EOpConstructInt64, + EOpConstructUint64, + EOpConstructBool, + EOpConstructFloat, + EOpConstructDouble, + // Keep vector and matrix constructors in a consistent relative order for + // TParseContext::constructBuiltIn, which converts between 8/16/32 bit + // vector constructors + EOpConstructVec2, + EOpConstructVec3, + EOpConstructVec4, + EOpConstructMat2x2, + EOpConstructMat2x3, + EOpConstructMat2x4, + EOpConstructMat3x2, + EOpConstructMat3x3, + EOpConstructMat3x4, + EOpConstructMat4x2, + EOpConstructMat4x3, + EOpConstructMat4x4, + EOpConstructDVec2, + EOpConstructDVec3, + EOpConstructDVec4, + EOpConstructBVec2, + EOpConstructBVec3, + EOpConstructBVec4, + EOpConstructI8Vec2, + EOpConstructI8Vec3, + EOpConstructI8Vec4, + EOpConstructU8Vec2, + EOpConstructU8Vec3, + EOpConstructU8Vec4, + EOpConstructI16Vec2, + EOpConstructI16Vec3, + EOpConstructI16Vec4, + EOpConstructU16Vec2, + EOpConstructU16Vec3, + EOpConstructU16Vec4, + EOpConstructIVec2, + EOpConstructIVec3, + EOpConstructIVec4, + EOpConstructUVec2, + EOpConstructUVec3, + EOpConstructUVec4, + EOpConstructI64Vec2, + EOpConstructI64Vec3, + EOpConstructI64Vec4, + EOpConstructU64Vec2, + EOpConstructU64Vec3, + EOpConstructU64Vec4, + EOpConstructDMat2x2, + EOpConstructDMat2x3, + EOpConstructDMat2x4, + EOpConstructDMat3x2, + EOpConstructDMat3x3, + EOpConstructDMat3x4, + EOpConstructDMat4x2, + EOpConstructDMat4x3, + EOpConstructDMat4x4, + EOpConstructIMat2x2, + EOpConstructIMat2x3, + EOpConstructIMat2x4, + EOpConstructIMat3x2, + EOpConstructIMat3x3, + EOpConstructIMat3x4, + EOpConstructIMat4x2, + EOpConstructIMat4x3, + EOpConstructIMat4x4, + EOpConstructUMat2x2, + EOpConstructUMat2x3, + EOpConstructUMat2x4, + EOpConstructUMat3x2, + EOpConstructUMat3x3, + EOpConstructUMat3x4, + EOpConstructUMat4x2, + EOpConstructUMat4x3, + EOpConstructUMat4x4, + EOpConstructBMat2x2, + EOpConstructBMat2x3, + EOpConstructBMat2x4, + EOpConstructBMat3x2, + EOpConstructBMat3x3, + EOpConstructBMat3x4, + EOpConstructBMat4x2, + EOpConstructBMat4x3, + EOpConstructBMat4x4, + EOpConstructFloat16, + EOpConstructF16Vec2, + EOpConstructF16Vec3, + EOpConstructF16Vec4, + EOpConstructF16Mat2x2, + EOpConstructF16Mat2x3, + EOpConstructF16Mat2x4, + EOpConstructF16Mat3x2, + EOpConstructF16Mat3x3, + EOpConstructF16Mat3x4, + EOpConstructF16Mat4x2, + EOpConstructF16Mat4x3, + EOpConstructF16Mat4x4, + EOpConstructStruct, + EOpConstructTextureSampler, + EOpConstructNonuniform, // expected to be transformed away, not present in final AST + EOpConstructReference, + EOpConstructCooperativeMatrix, + EOpConstructAccStruct, + EOpConstructGuardEnd, + + // + // moves + // + + EOpAssign, + EOpAddAssign, + EOpSubAssign, + EOpMulAssign, + EOpVectorTimesMatrixAssign, + EOpVectorTimesScalarAssign, + EOpMatrixTimesScalarAssign, + EOpMatrixTimesMatrixAssign, + EOpDivAssign, + EOpModAssign, + EOpAndAssign, + EOpInclusiveOrAssign, + EOpExclusiveOrAssign, + EOpLeftShiftAssign, + EOpRightShiftAssign, + + // + // Array operators + // + + // Can apply to arrays, vectors, or matrices. + // Can be decomposed to a constant at compile time, but this does not always happen, + // due to link-time effects. So, consumer can expect either a link-time sized or + // run-time sized array. + EOpArrayLength, + + // + // Image operations + // + + EOpImageGuardBegin, + + EOpImageQuerySize, + EOpImageQuerySamples, + EOpImageLoad, + EOpImageStore, + EOpImageLoadLod, + EOpImageStoreLod, + EOpImageAtomicAdd, + EOpImageAtomicMin, + EOpImageAtomicMax, + EOpImageAtomicAnd, + EOpImageAtomicOr, + EOpImageAtomicXor, + EOpImageAtomicExchange, + EOpImageAtomicCompSwap, + EOpImageAtomicLoad, + EOpImageAtomicStore, + + EOpSubpassLoad, + EOpSubpassLoadMS, + EOpSparseImageLoad, + EOpSparseImageLoadLod, + + EOpImageGuardEnd, + + // + // Texture operations + // + + EOpTextureGuardBegin, + + EOpTextureQuerySize, + EOpTextureQueryLod, + EOpTextureQueryLevels, + EOpTextureQuerySamples, + + EOpSamplingGuardBegin, + + EOpTexture, + EOpTextureProj, + EOpTextureLod, + EOpTextureOffset, + EOpTextureFetch, + EOpTextureFetchOffset, + EOpTextureProjOffset, + EOpTextureLodOffset, + EOpTextureProjLod, + EOpTextureProjLodOffset, + EOpTextureGrad, + EOpTextureGradOffset, + EOpTextureProjGrad, + EOpTextureProjGradOffset, + EOpTextureGather, + EOpTextureGatherOffset, + EOpTextureGatherOffsets, + EOpTextureClamp, + EOpTextureOffsetClamp, + EOpTextureGradClamp, + EOpTextureGradOffsetClamp, + EOpTextureGatherLod, + EOpTextureGatherLodOffset, + EOpTextureGatherLodOffsets, + EOpFragmentMaskFetch, + EOpFragmentFetch, + + EOpSparseTextureGuardBegin, + + EOpSparseTexture, + EOpSparseTextureLod, + EOpSparseTextureOffset, + EOpSparseTextureFetch, + EOpSparseTextureFetchOffset, + EOpSparseTextureLodOffset, + EOpSparseTextureGrad, + EOpSparseTextureGradOffset, + EOpSparseTextureGather, + EOpSparseTextureGatherOffset, + EOpSparseTextureGatherOffsets, + EOpSparseTexelsResident, + EOpSparseTextureClamp, + EOpSparseTextureOffsetClamp, + EOpSparseTextureGradClamp, + EOpSparseTextureGradOffsetClamp, + EOpSparseTextureGatherLod, + EOpSparseTextureGatherLodOffset, + EOpSparseTextureGatherLodOffsets, + + EOpSparseTextureGuardEnd, + + EOpImageFootprintGuardBegin, + EOpImageSampleFootprintNV, + EOpImageSampleFootprintClampNV, + EOpImageSampleFootprintLodNV, + EOpImageSampleFootprintGradNV, + EOpImageSampleFootprintGradClampNV, + EOpImageFootprintGuardEnd, + EOpSamplingGuardEnd, + EOpTextureGuardEnd, + + // + // Integer operations + // + + EOpAddCarry, + EOpSubBorrow, + EOpUMulExtended, + EOpIMulExtended, + EOpBitfieldExtract, + EOpBitfieldInsert, + EOpBitFieldReverse, + EOpBitCount, + EOpFindLSB, + EOpFindMSB, + + EOpCountLeadingZeros, + EOpCountTrailingZeros, + EOpAbsDifference, + EOpAddSaturate, + EOpSubSaturate, + EOpAverage, + EOpAverageRounded, + EOpMul32x16, + + EOpTraceNV, + EOpTraceRayMotionNV, + EOpTraceKHR, + EOpReportIntersection, + EOpIgnoreIntersectionNV, + EOpTerminateRayNV, + EOpExecuteCallableNV, + EOpExecuteCallableKHR, + EOpWritePackedPrimitiveIndices4x8NV, + EOpEmitMeshTasksEXT, + EOpSetMeshOutputsEXT, + + // + // GL_EXT_ray_query operations + // + + EOpRayQueryInitialize, + EOpRayQueryTerminate, + EOpRayQueryGenerateIntersection, + EOpRayQueryConfirmIntersection, + EOpRayQueryProceed, + EOpRayQueryGetIntersectionType, + EOpRayQueryGetRayTMin, + EOpRayQueryGetRayFlags, + EOpRayQueryGetIntersectionT, + EOpRayQueryGetIntersectionInstanceCustomIndex, + EOpRayQueryGetIntersectionInstanceId, + EOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffset, + EOpRayQueryGetIntersectionGeometryIndex, + EOpRayQueryGetIntersectionPrimitiveIndex, + EOpRayQueryGetIntersectionBarycentrics, + EOpRayQueryGetIntersectionFrontFace, + EOpRayQueryGetIntersectionCandidateAABBOpaque, + EOpRayQueryGetIntersectionObjectRayDirection, + EOpRayQueryGetIntersectionObjectRayOrigin, + EOpRayQueryGetWorldRayDirection, + EOpRayQueryGetWorldRayOrigin, + EOpRayQueryGetIntersectionObjectToWorld, + EOpRayQueryGetIntersectionWorldToObject, + + // + // HLSL operations + // + + EOpClip, // discard if input value < 0 + EOpIsFinite, + EOpLog10, // base 10 log + EOpRcp, // 1/x + EOpSaturate, // clamp from 0 to 1 + EOpSinCos, // sin and cos in out parameters + EOpGenMul, // mul(x,y) on any of mat/vec/scalars + EOpDst, // x = 1, y=src0.y * src1.y, z=src0.z, w=src1.w + EOpInterlockedAdd, // atomic ops, but uses [optional] out arg instead of return + EOpInterlockedAnd, // ... + EOpInterlockedCompareExchange, // ... + EOpInterlockedCompareStore, // ... + EOpInterlockedExchange, // ... + EOpInterlockedMax, // ... + EOpInterlockedMin, // ... + EOpInterlockedOr, // ... + EOpInterlockedXor, // ... + EOpAllMemoryBarrierWithGroupSync, // memory barriers without non-hlsl AST equivalents + EOpDeviceMemoryBarrier, // ... + EOpDeviceMemoryBarrierWithGroupSync, // ... + EOpWorkgroupMemoryBarrier, // ... + EOpWorkgroupMemoryBarrierWithGroupSync, // ... + EOpEvaluateAttributeSnapped, // InterpolateAtOffset with int position on 16x16 grid + EOpF32tof16, // HLSL conversion: half of a PackHalf2x16 + EOpF16tof32, // HLSL conversion: half of an UnpackHalf2x16 + EOpLit, // HLSL lighting coefficient vector + EOpTextureBias, // HLSL texture bias: will be lowered to EOpTexture + EOpAsDouble, // slightly different from EOpUint64BitsToDouble + EOpD3DCOLORtoUBYTE4, // convert and swizzle 4-component color to UBYTE4 range + + EOpMethodSample, // Texture object methods. These are translated to existing + EOpMethodSampleBias, // AST methods, and exist to represent HLSL semantics until that + EOpMethodSampleCmp, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodSampleCmpLevelZero, // ... + EOpMethodSampleGrad, // ... + EOpMethodSampleLevel, // ... + EOpMethodLoad, // ... + EOpMethodGetDimensions, // ... + EOpMethodGetSamplePosition, // ... + EOpMethodGather, // ... + EOpMethodCalculateLevelOfDetail, // ... + EOpMethodCalculateLevelOfDetailUnclamped, // ... + + // Load already defined above for textures + EOpMethodLoad2, // Structure buffer object methods. These are translated to existing + EOpMethodLoad3, // AST methods, and exist to represent HLSL semantics until that + EOpMethodLoad4, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodStore, // ... + EOpMethodStore2, // ... + EOpMethodStore3, // ... + EOpMethodStore4, // ... + EOpMethodIncrementCounter, // ... + EOpMethodDecrementCounter, // ... + // EOpMethodAppend is defined for geo shaders below + EOpMethodConsume, + + // SM5 texture methods + EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about + EOpMethodGatherGreen, // translation to existing AST opcodes. They exist temporarily + EOpMethodGatherBlue, // because HLSL arguments are slightly different. + EOpMethodGatherAlpha, // ... + EOpMethodGatherCmp, // ... + EOpMethodGatherCmpRed, // ... + EOpMethodGatherCmpGreen, // ... + EOpMethodGatherCmpBlue, // ... + EOpMethodGatherCmpAlpha, // ... + + // geometry methods + EOpMethodAppend, // Geometry shader methods + EOpMethodRestartStrip, // ... + + // matrix + EOpMatrixSwizzle, // select multiple matrix components (non-column) + + // SM6 wave ops + EOpWaveGetLaneCount, // Will decompose to gl_SubgroupSize. + EOpWaveGetLaneIndex, // Will decompose to gl_SubgroupInvocationID. + EOpWaveActiveCountBits, // Will decompose to subgroupBallotBitCount(subgroupBallot()). + EOpWavePrefixCountBits, // Will decompose to subgroupBallotInclusiveBitCount(subgroupBallot()). + + // Shader Clock Ops + EOpReadClockSubgroupKHR, + EOpReadClockDeviceKHR, +}; + +class TIntermTraverser; +class TIntermOperator; +class TIntermAggregate; +class TIntermUnary; +class TIntermBinary; +class TIntermConstantUnion; +class TIntermSelection; +class TIntermSwitch; +class TIntermBranch; +class TIntermTyped; +class TIntermMethod; +class TIntermSymbol; +class TIntermLoop; + +} // end namespace glslang + +// +// Base class for the tree nodes +// +// (Put outside the glslang namespace, as it's used as part of the external interface.) +// +class TIntermNode { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + + TIntermNode() { loc.init(); } + virtual const glslang::TSourceLoc& getLoc() const { return loc; } + virtual void setLoc(const glslang::TSourceLoc& l) { loc = l; } + virtual void traverse(glslang::TIntermTraverser*) = 0; + virtual glslang::TIntermTyped* getAsTyped() { return 0; } + virtual glslang::TIntermOperator* getAsOperator() { return 0; } + virtual glslang::TIntermConstantUnion* getAsConstantUnion() { return 0; } + virtual glslang::TIntermAggregate* getAsAggregate() { return 0; } + virtual glslang::TIntermUnary* getAsUnaryNode() { return 0; } + virtual glslang::TIntermBinary* getAsBinaryNode() { return 0; } + virtual glslang::TIntermSelection* getAsSelectionNode() { return 0; } + virtual glslang::TIntermSwitch* getAsSwitchNode() { return 0; } + virtual glslang::TIntermMethod* getAsMethodNode() { return 0; } + virtual glslang::TIntermSymbol* getAsSymbolNode() { return 0; } + virtual glslang::TIntermBranch* getAsBranchNode() { return 0; } + virtual glslang::TIntermLoop* getAsLoopNode() { return 0; } + + virtual const glslang::TIntermTyped* getAsTyped() const { return 0; } + virtual const glslang::TIntermOperator* getAsOperator() const { return 0; } + virtual const glslang::TIntermConstantUnion* getAsConstantUnion() const { return 0; } + virtual const glslang::TIntermAggregate* getAsAggregate() const { return 0; } + virtual const glslang::TIntermUnary* getAsUnaryNode() const { return 0; } + virtual const glslang::TIntermBinary* getAsBinaryNode() const { return 0; } + virtual const glslang::TIntermSelection* getAsSelectionNode() const { return 0; } + virtual const glslang::TIntermSwitch* getAsSwitchNode() const { return 0; } + virtual const glslang::TIntermMethod* getAsMethodNode() const { return 0; } + virtual const glslang::TIntermSymbol* getAsSymbolNode() const { return 0; } + virtual const glslang::TIntermBranch* getAsBranchNode() const { return 0; } + virtual const glslang::TIntermLoop* getAsLoopNode() const { return 0; } + virtual ~TIntermNode() { } + +protected: + TIntermNode(const TIntermNode&); + TIntermNode& operator=(const TIntermNode&); + glslang::TSourceLoc loc; +}; + +namespace glslang { + +// +// This is just to help yacc. +// +struct TIntermNodePair { + TIntermNode* node1; + TIntermNode* node2; +}; + +// +// Intermediate class for nodes that have a type. +// +class TIntermTyped : public TIntermNode { +public: + TIntermTyped(const TType& t) { type.shallowCopy(t); } + TIntermTyped(TBasicType basicType) { TType bt(basicType); type.shallowCopy(bt); } + virtual TIntermTyped* getAsTyped() { return this; } + virtual const TIntermTyped* getAsTyped() const { return this; } + virtual void setType(const TType& t) { type.shallowCopy(t); } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { return type; } + + virtual TBasicType getBasicType() const { return type.getBasicType(); } + virtual TQualifier& getQualifier() { return type.getQualifier(); } + virtual const TQualifier& getQualifier() const { return type.getQualifier(); } + virtual TArraySizes* getArraySizes() { return type.getArraySizes(); } + virtual const TArraySizes* getArraySizes() const { return type.getArraySizes(); } + virtual void propagatePrecision(TPrecisionQualifier); + virtual int getVectorSize() const { return type.getVectorSize(); } + virtual int getMatrixCols() const { return type.getMatrixCols(); } + virtual int getMatrixRows() const { return type.getMatrixRows(); } + virtual bool isMatrix() const { return type.isMatrix(); } + virtual bool isArray() const { return type.isArray(); } + virtual bool isVector() const { return type.isVector(); } + virtual bool isScalar() const { return type.isScalar(); } + virtual bool isStruct() const { return type.isStruct(); } + virtual bool isFloatingDomain() const { return type.isFloatingDomain(); } + virtual bool isIntegerDomain() const { return type.isIntegerDomain(); } + bool isAtomic() const { return type.isAtomic(); } + bool isReference() const { return type.isReference(); } + TString getCompleteString(bool enhanced = false) const { return type.getCompleteString(enhanced); } + +protected: + TIntermTyped& operator=(const TIntermTyped&); + TType type; +}; + +// +// Handle for, do-while, and while loops. +// +class TIntermLoop : public TIntermNode { +public: + TIntermLoop(TIntermNode* aBody, TIntermTyped* aTest, TIntermTyped* aTerminal, bool testFirst) : + body(aBody), + test(aTest), + terminal(aTerminal), + first(testFirst), + unroll(false), + dontUnroll(false), + dependency(0), + minIterations(0), + maxIterations(iterationsInfinite), + iterationMultiple(1), + peelCount(0), + partialCount(0) + { } + + virtual TIntermLoop* getAsLoopNode() { return this; } + virtual const TIntermLoop* getAsLoopNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TIntermNode* getBody() const { return body; } + TIntermTyped* getTest() const { return test; } + TIntermTyped* getTerminal() const { return terminal; } + bool testFirst() const { return first; } + + void setUnroll() { unroll = true; } + void setDontUnroll() { + dontUnroll = true; + peelCount = 0; + partialCount = 0; + } + bool getUnroll() const { return unroll; } + bool getDontUnroll() const { return dontUnroll; } + + static const unsigned int dependencyInfinite = 0xFFFFFFFF; + static const unsigned int iterationsInfinite = 0xFFFFFFFF; + void setLoopDependency(int d) { dependency = d; } + int getLoopDependency() const { return dependency; } + + void setMinIterations(unsigned int v) { minIterations = v; } + unsigned int getMinIterations() const { return minIterations; } + void setMaxIterations(unsigned int v) { maxIterations = v; } + unsigned int getMaxIterations() const { return maxIterations; } + void setIterationMultiple(unsigned int v) { iterationMultiple = v; } + unsigned int getIterationMultiple() const { return iterationMultiple; } + void setPeelCount(unsigned int v) { + peelCount = v; + dontUnroll = false; + } + unsigned int getPeelCount() const { return peelCount; } + void setPartialCount(unsigned int v) { + partialCount = v; + dontUnroll = false; + } + unsigned int getPartialCount() const { return partialCount; } + +protected: + TIntermNode* body; // code to loop over + TIntermTyped* test; // exit condition associated with loop, could be 0 for 'for' loops + TIntermTyped* terminal; // exists for for-loops + bool first; // true for while and for, not for do-while + bool unroll; // true if unroll requested + bool dontUnroll; // true if request to not unroll + unsigned int dependency; // loop dependency hint; 0 means not set or unknown + unsigned int minIterations; // as per the SPIR-V specification + unsigned int maxIterations; // as per the SPIR-V specification + unsigned int iterationMultiple; // as per the SPIR-V specification + unsigned int peelCount; // as per the SPIR-V specification + unsigned int partialCount; // as per the SPIR-V specification +}; + +// +// Handle case, break, continue, return, and kill. +// +class TIntermBranch : public TIntermNode { +public: + TIntermBranch(TOperator op, TIntermTyped* e) : + flowOp(op), + expression(e) { } + virtual TIntermBranch* getAsBranchNode() { return this; } + virtual const TIntermBranch* getAsBranchNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TOperator getFlowOp() const { return flowOp; } + TIntermTyped* getExpression() const { return expression; } + void setExpression(TIntermTyped* pExpression) { expression = pExpression; } + void updatePrecision(TPrecisionQualifier parentPrecision); +protected: + TOperator flowOp; + TIntermTyped* expression; +}; + +// +// Represent method names before seeing their calling signature +// or resolving them to operations. Just an expression as the base object +// and a textural name. +// +class TIntermMethod : public TIntermTyped { +public: + TIntermMethod(TIntermTyped* o, const TType& t, const TString& m) : TIntermTyped(t), object(o), method(m) { } + virtual TIntermMethod* getAsMethodNode() { return this; } + virtual const TIntermMethod* getAsMethodNode() const { return this; } + virtual const TString& getMethodName() const { return method; } + virtual TIntermTyped* getObject() const { return object; } + virtual void traverse(TIntermTraverser*); +protected: + TIntermTyped* object; + TString method; +}; + +// +// Nodes that correspond to symbols or constants in the source code. +// +class TIntermSymbol : public TIntermTyped { +public: + // if symbol is initialized as symbol(sym), the memory comes from the pool allocator of sym. If sym comes from + // per process threadPoolAllocator, then it causes increased memory usage per compile + // it is essential to use "symbol = sym" to assign to symbol + TIntermSymbol(long long i, const TString& n, const TType& t) + : TIntermTyped(t), id(i), +#ifndef GLSLANG_WEB + flattenSubset(-1), +#endif + constSubtree(nullptr) + { name = n; } + virtual long long getId() const { return id; } + virtual void changeId(long long i) { id = i; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual TIntermSymbol* getAsSymbolNode() { return this; } + virtual const TIntermSymbol* getAsSymbolNode() const { return this; } + void setConstArray(const TConstUnionArray& c) { constArray = c; } + const TConstUnionArray& getConstArray() const { return constArray; } + void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + TIntermTyped* getConstSubtree() const { return constSubtree; } +#ifndef GLSLANG_WEB + void setFlattenSubset(int subset) { flattenSubset = subset; } + virtual const TString& getAccessName() const; + + int getFlattenSubset() const { return flattenSubset; } // -1 means full object +#endif + + // This is meant for cases where a node has already been constructed, and + // later on, it becomes necessary to switch to a different symbol. + virtual void switchId(long long newId) { id = newId; } + +protected: + long long id; // the unique id of the symbol this node represents +#ifndef GLSLANG_WEB + int flattenSubset; // how deeply the flattened object rooted at id has been dereferenced +#endif + TString name; // the name of the symbol this node represents + TConstUnionArray constArray; // if the symbol is a front-end compile-time constant, this is its value + TIntermTyped* constSubtree; +}; + +class TIntermConstantUnion : public TIntermTyped { +public: + TIntermConstantUnion(const TConstUnionArray& ua, const TType& t) : TIntermTyped(t), constArray(ua), literal(false) { } + const TConstUnionArray& getConstArray() const { return constArray; } + virtual TIntermConstantUnion* getAsConstantUnion() { return this; } + virtual const TIntermConstantUnion* getAsConstantUnion() const { return this; } + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* fold(TOperator, const TIntermTyped*) const; + virtual TIntermTyped* fold(TOperator, const TType&) const; + void setLiteral() { literal = true; } + void setExpression() { literal = false; } + bool isLiteral() const { return literal; } + +protected: + TIntermConstantUnion& operator=(const TIntermConstantUnion&); + + const TConstUnionArray constArray; + bool literal; // true if node represents a literal in the source code +}; + +// Represent the independent aspects of a texturing TOperator +struct TCrackedTextureOp { + bool query; + bool proj; + bool lod; + bool fetch; + bool offset; + bool offsets; + bool gather; + bool grad; + bool subpass; + bool lodClamp; + bool fragMask; +}; + +// +// Intermediate class for node types that hold operators. +// +class TIntermOperator : public TIntermTyped { +public: + virtual TIntermOperator* getAsOperator() { return this; } + virtual const TIntermOperator* getAsOperator() const { return this; } + TOperator getOp() const { return op; } + void setOp(TOperator newOp) { op = newOp; } + bool modifiesState() const; + bool isConstructor() const; + bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; } + bool isSampling() const { return op > EOpSamplingGuardBegin && op < EOpSamplingGuardEnd; } +#ifdef GLSLANG_WEB + bool isImage() const { return false; } + bool isSparseTexture() const { return false; } + bool isImageFootprint() const { return false; } + bool isSparseImage() const { return false; } + bool isSubgroup() const { return false; } +#else + bool isImage() const { return op > EOpImageGuardBegin && op < EOpImageGuardEnd; } + bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; } + bool isImageFootprint() const { return op > EOpImageFootprintGuardBegin && op < EOpImageFootprintGuardEnd; } + bool isSparseImage() const { return op == EOpSparseImageLoad; } + bool isSubgroup() const { return op > EOpSubgroupGuardStart && op < EOpSubgroupGuardStop; } +#endif + + void setOperationPrecision(TPrecisionQualifier p) { operationPrecision = p; } + TPrecisionQualifier getOperationPrecision() const { return operationPrecision != EpqNone ? + operationPrecision : + type.getQualifier().precision; } + TString getCompleteString() const + { + TString cs = type.getCompleteString(); + if (getOperationPrecision() != type.getQualifier().precision) { + cs += ", operation at "; + cs += GetPrecisionQualifierString(getOperationPrecision()); + } + + return cs; + } + + // Crack the op into the individual dimensions of texturing operation. + void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const + { + cracked.query = false; + cracked.proj = false; + cracked.lod = false; + cracked.fetch = false; + cracked.offset = false; + cracked.offsets = false; + cracked.gather = false; + cracked.grad = false; + cracked.subpass = false; + cracked.lodClamp = false; + cracked.fragMask = false; + + switch (op) { + case EOpImageQuerySize: + case EOpImageQuerySamples: + case EOpTextureQuerySize: + case EOpTextureQueryLod: + case EOpTextureQueryLevels: + case EOpTextureQuerySamples: + case EOpSparseTexelsResident: + cracked.query = true; + break; + case EOpTexture: + case EOpSparseTexture: + break; + case EOpTextureProj: + cracked.proj = true; + break; + case EOpTextureLod: + case EOpSparseTextureLod: + cracked.lod = true; + break; + case EOpTextureOffset: + case EOpSparseTextureOffset: + cracked.offset = true; + break; + case EOpTextureFetch: + case EOpSparseTextureFetch: + cracked.fetch = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureFetchOffset: + case EOpSparseTextureFetchOffset: + cracked.fetch = true; + cracked.offset = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureProjOffset: + cracked.offset = true; + cracked.proj = true; + break; + case EOpTextureLodOffset: + case EOpSparseTextureLodOffset: + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureProjLod: + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureProjLodOffset: + cracked.offset = true; + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureGrad: + case EOpSparseTextureGrad: + cracked.grad = true; + break; + case EOpTextureGradOffset: + case EOpSparseTextureGradOffset: + cracked.grad = true; + cracked.offset = true; + break; + case EOpTextureProjGrad: + cracked.grad = true; + cracked.proj = true; + break; + case EOpTextureProjGradOffset: + cracked.grad = true; + cracked.offset = true; + cracked.proj = true; + break; +#ifndef GLSLANG_WEB + case EOpTextureClamp: + case EOpSparseTextureClamp: + cracked.lodClamp = true; + break; + case EOpTextureOffsetClamp: + case EOpSparseTextureOffsetClamp: + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGradClamp: + case EOpSparseTextureGradClamp: + cracked.grad = true; + cracked.lodClamp = true; + break; + case EOpTextureGradOffsetClamp: + case EOpSparseTextureGradOffsetClamp: + cracked.grad = true; + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGather: + case EOpSparseTextureGather: + cracked.gather = true; + break; + case EOpTextureGatherOffset: + case EOpSparseTextureGatherOffset: + cracked.gather = true; + cracked.offset = true; + break; + case EOpTextureGatherOffsets: + case EOpSparseTextureGatherOffsets: + cracked.gather = true; + cracked.offsets = true; + break; + case EOpTextureGatherLod: + case EOpSparseTextureGatherLod: + cracked.gather = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffset: + case EOpSparseTextureGatherLodOffset: + cracked.gather = true; + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffsets: + case EOpSparseTextureGatherLodOffsets: + cracked.gather = true; + cracked.offsets = true; + cracked.lod = true; + break; + case EOpImageLoadLod: + case EOpImageStoreLod: + case EOpSparseImageLoadLod: + cracked.lod = true; + break; + case EOpFragmentMaskFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpFragmentFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpImageSampleFootprintNV: + break; + case EOpImageSampleFootprintClampNV: + cracked.lodClamp = true; + break; + case EOpImageSampleFootprintLodNV: + cracked.lod = true; + break; + case EOpImageSampleFootprintGradNV: + cracked.grad = true; + break; + case EOpImageSampleFootprintGradClampNV: + cracked.lodClamp = true; + cracked.grad = true; + break; + case EOpSubpassLoad: + case EOpSubpassLoadMS: + cracked.subpass = true; + break; +#endif + default: + break; + } + } + +protected: + TIntermOperator(TOperator o) : TIntermTyped(EbtFloat), op(o), operationPrecision(EpqNone) {} + TIntermOperator(TOperator o, TType& t) : TIntermTyped(t), op(o), operationPrecision(EpqNone) {} + TOperator op; + // The result precision is in the inherited TType, and is usually meant to be both + // the operation precision and the result precision. However, some more complex things, + // like built-in function calls, distinguish between the two, in which case non-EqpNone + // 'operationPrecision' overrides the result precision as far as operation precision + // is concerned. + TPrecisionQualifier operationPrecision; +}; + +// +// Nodes for all the basic binary math operators. +// +class TIntermBinary : public TIntermOperator { +public: + TIntermBinary(TOperator o) : TIntermOperator(o) {} + virtual void traverse(TIntermTraverser*); + virtual void setLeft(TIntermTyped* n) { left = n; } + virtual void setRight(TIntermTyped* n) { right = n; } + virtual TIntermTyped* getLeft() const { return left; } + virtual TIntermTyped* getRight() const { return right; } + virtual TIntermBinary* getAsBinaryNode() { return this; } + virtual const TIntermBinary* getAsBinaryNode() const { return this; } + virtual void updatePrecision(); +protected: + TIntermTyped* left; + TIntermTyped* right; +}; + +// +// Nodes for unary math operators. +// +class TIntermUnary : public TIntermOperator { +public: + TIntermUnary(TOperator o, TType& t) : TIntermOperator(o, t), operand(0) {} + TIntermUnary(TOperator o) : TIntermOperator(o), operand(0) {} + virtual void traverse(TIntermTraverser*); + virtual void setOperand(TIntermTyped* o) { operand = o; } + virtual TIntermTyped* getOperand() { return operand; } + virtual const TIntermTyped* getOperand() const { return operand; } + virtual TIntermUnary* getAsUnaryNode() { return this; } + virtual const TIntermUnary* getAsUnaryNode() const { return this; } + virtual void updatePrecision(); +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermTyped* operand; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +typedef TVector TIntermSequence; +typedef TVector TQualifierList; +// +// Nodes that operate on an arbitrary sized set of children. +// +class TIntermAggregate : public TIntermOperator { +public: + TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(nullptr) { } + TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(nullptr) { } + ~TIntermAggregate() { delete pragmaTable; } + virtual TIntermAggregate* getAsAggregate() { return this; } + virtual const TIntermAggregate* getAsAggregate() const { return this; } + virtual void updatePrecision(); + virtual void setOperator(TOperator o) { op = o; } + virtual TIntermSequence& getSequence() { return sequence; } + virtual const TIntermSequence& getSequence() const { return sequence; } + virtual void setName(const TString& n) { name = n; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual void setUserDefined() { userDefined = true; } + virtual bool isUserDefined() { return userDefined; } + virtual TQualifierList& getQualifierList() { return qualifier; } + virtual const TQualifierList& getQualifierList() const { return qualifier; } + void setOptimize(bool o) { optimize = o; } + void setDebug(bool d) { debug = d; } + bool getOptimize() const { return optimize; } + bool getDebug() const { return debug; } + void setPragmaTable(const TPragmaTable& pTable); + const TPragmaTable& getPragmaTable() const { return *pragmaTable; } +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermAggregate(const TIntermAggregate&); // disallow copy constructor + TIntermAggregate& operator=(const TIntermAggregate&); // disallow assignment operator + TIntermSequence sequence; + TQualifierList qualifier; + TString name; + bool userDefined; // used for user defined function names + bool optimize; + bool debug; + TPragmaTable* pragmaTable; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +// +// For if tests. +// +class TIntermSelection : public TIntermTyped { +public: + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB) : + TIntermTyped(EbtVoid), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB, const TType& type) : + TIntermTyped(type), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* getCondition() const { return condition; } + virtual void setCondition(TIntermTyped* c) { condition = c; } + virtual TIntermNode* getTrueBlock() const { return trueBlock; } + virtual void setTrueBlock(TIntermTyped* tb) { trueBlock = tb; } + virtual TIntermNode* getFalseBlock() const { return falseBlock; } + virtual void setFalseBlock(TIntermTyped* fb) { falseBlock = fb; } + virtual TIntermSelection* getAsSelectionNode() { return this; } + virtual const TIntermSelection* getAsSelectionNode() const { return this; } + + void setNoShortCircuit() { shortCircuit = false; } + bool getShortCircuit() const { return shortCircuit; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermNode* trueBlock; + TIntermNode* falseBlock; + bool shortCircuit; // normally all if-then-else and all GLSL ?: short-circuit, but HLSL ?: does not + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +// +// For switch statements. Designed use is that a switch will have sequence of nodes +// that are either case/default nodes or a *single* node that represents all the code +// in between (if any) consecutive case/defaults. So, a traversal need only deal with +// 0 or 1 nodes per case/default statement. +// +class TIntermSwitch : public TIntermNode { +public: + TIntermSwitch(TIntermTyped* cond, TIntermAggregate* b) : condition(cond), body(b), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermNode* getCondition() const { return condition; } + virtual TIntermAggregate* getBody() const { return body; } + virtual TIntermSwitch* getAsSwitchNode() { return this; } + virtual const TIntermSwitch* getAsSwitchNode() const { return this; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermAggregate* body; + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +enum TVisit +{ + EvPreVisit, + EvInVisit, + EvPostVisit +}; + +// +// For traversing the tree. User should derive from this, +// put their traversal specific data in it, and then pass +// it to a Traverse method. +// +// When using this, just fill in the methods for nodes you want visited. +// Return false from a pre-visit to skip visiting that node's subtree. +// +// Explicitly set postVisit to true if you want post visiting, otherwise, +// filled in methods will only be called at pre-visit time (before processing +// the subtree). Similarly for inVisit for in-order visiting of nodes with +// multiple children. +// +// If you only want post-visits, explicitly turn off preVisit (and inVisit) +// and turn on postVisit. +// +// In general, for the visit*() methods, return true from interior nodes +// to have the traversal continue on to children. +// +// If you process children yourself, or don't want them processed, return false. +// +class TIntermTraverser { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + TIntermTraverser(bool preVisit = true, bool inVisit = false, bool postVisit = false, bool rightToLeft = false) : + preVisit(preVisit), + inVisit(inVisit), + postVisit(postVisit), + rightToLeft(rightToLeft), + depth(0), + maxDepth(0) { } + virtual ~TIntermTraverser() { } + + virtual void visitSymbol(TIntermSymbol*) { } + virtual void visitConstantUnion(TIntermConstantUnion*) { } + virtual bool visitBinary(TVisit, TIntermBinary*) { return true; } + virtual bool visitUnary(TVisit, TIntermUnary*) { return true; } + virtual bool visitSelection(TVisit, TIntermSelection*) { return true; } + virtual bool visitAggregate(TVisit, TIntermAggregate*) { return true; } + virtual bool visitLoop(TVisit, TIntermLoop*) { return true; } + virtual bool visitBranch(TVisit, TIntermBranch*) { return true; } + virtual bool visitSwitch(TVisit, TIntermSwitch*) { return true; } + + int getMaxDepth() const { return maxDepth; } + + void incrementDepth(TIntermNode *current) + { + depth++; + maxDepth = (std::max)(maxDepth, depth); + path.push_back(current); + } + + void decrementDepth() + { + depth--; + path.pop_back(); + } + + TIntermNode *getParentNode() + { + return path.size() == 0 ? NULL : path.back(); + } + + const bool preVisit; + const bool inVisit; + const bool postVisit; + const bool rightToLeft; + +protected: + TIntermTraverser& operator=(TIntermTraverser&); + + int depth; + int maxDepth; + + // All the nodes from root to the current node's parent during traversing. + TVector path; +}; + +// KHR_vulkan_glsl says "Two arrays sized with specialization constants are the same type only if +// sized with the same symbol, involving no operations" +inline bool SameSpecializationConstants(TIntermTyped* node1, TIntermTyped* node2) +{ + return node1->getAsSymbolNode() && node2->getAsSymbolNode() && + node1->getAsSymbolNode()->getId() == node2->getAsSymbolNode()->getId(); +} + +} // end namespace glslang + +#endif // __INTERMEDIATE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Initialize.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Initialize.h new file mode 100644 index 0000000..ac8ec33 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Initialize.h @@ -0,0 +1,112 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INITIALIZE_INCLUDED_ +#define _INITIALIZE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../Include/Common.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "Versions.h" + +namespace glslang { + +// +// This is made to hold parseable strings for almost all the built-in +// functions and variables for one specific combination of version +// and profile. (Some still need to be added programmatically.) +// This is a base class for language-specific derivations, which +// can be used for language independent builtins. +// +// The strings are organized by +// commonBuiltins: intersection of all stages' built-ins, processed just once +// stageBuiltins[]: anything a stage needs that's not in commonBuiltins +// +class TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltInParseables(); + virtual ~TBuiltInParseables(); + virtual void initialize(int version, EProfile, const SpvVersion& spvVersion) = 0; + virtual void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage) = 0; + virtual const TString& getCommonString() const { return commonBuiltins; } + virtual const TString& getStageString(EShLanguage language) const { return stageBuiltins[language]; } + + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable) = 0; + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources) = 0; + +protected: + TString commonBuiltins; + TString stageBuiltins[EShLangCount]; +}; + +// +// This is a GLSL specific derivation of TBuiltInParseables. To present a stable +// interface and match other similar code, it is called TBuiltIns, rather +// than TBuiltInParseablesGlsl. +// +class TBuiltIns : public TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltIns(); + virtual ~TBuiltIns(); + void initialize(int version, EProfile, const SpvVersion& spvVersion); + void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage); + + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable); + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources); + +protected: + void addTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion); + void relateTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage, TSymbolTable&); + void add2ndGenerationSamplingImaging(int version, EProfile profile, const SpvVersion& spvVersion); + void addSubpassSampling(TSampler, const TString& typeName, int version, EProfile profile); + void addQueryFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addImageFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addSamplingFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addGatherFunctions(TSampler, const TString& typeName, int version, EProfile profile); + + // Helpers for making textual representations of the permutations + // of texturing/imaging functions. + const char* postfixes[5]; + const char* prefixes[EbtNumTypes]; + int dimMap[EsdNumDims]; +}; + +} // end namespace glslang + +#endif // _INITIALIZE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/LiveTraverser.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/LiveTraverser.h new file mode 100644 index 0000000..9b39b59 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/LiveTraverser.h @@ -0,0 +1,168 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#include "../Include/Common.h" +#include "reflection.h" +#include "localintermediate.h" + +#include "gl_types.h" + +#include +#include + +namespace glslang { + +// +// The traverser: mostly pass through, except +// - processing function-call nodes to push live functions onto the stack of functions to process +// - processing selection nodes to trim semantically dead code +// +// This is in the glslang namespace directly so it can be a friend of TReflection. +// This can be derived from to implement reflection database traversers or +// binding mappers: anything that wants to traverse the live subset of the tree. +// + +class TLiveTraverser : public TIntermTraverser { +public: + TLiveTraverser(const TIntermediate& i, bool traverseAll = false, + bool preVisit = true, bool inVisit = false, bool postVisit = false) : + TIntermTraverser(preVisit, inVisit, postVisit), + intermediate(i), traverseAll(traverseAll) + { } + + // + // Given a function name, find its subroot in the tree, and push it onto the stack of + // functions left to process. + // + void pushFunction(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpFunction && candidate->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + + void pushGlobalReference(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpSequence && + candidate->getSequence().size() == 1 && + candidate->getSequence()[0]->getAsBinaryNode()) { + TIntermBinary* binary = candidate->getSequence()[0]->getAsBinaryNode(); + TIntermSymbol* symbol = binary->getLeft()->getAsSymbolNode(); + if (symbol && symbol->getQualifier().storage == EvqGlobal && + symbol->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + } + + typedef std::list TDestinationStack; + TDestinationStack destinations; + +protected: + // To catch which function calls are not dead, and hence which functions must be visited. + virtual bool visitAggregate(TVisit, TIntermAggregate* node) + { + if (!traverseAll) + if (node->getOp() == EOpFunctionCall) + addFunctionCall(node); + + return true; // traverse this subtree + } + + // To prune semantically dead paths. + virtual bool visitSelection(TVisit /* visit */, TIntermSelection* node) + { + if (traverseAll) + return true; // traverse all code + + TIntermConstantUnion* constant = node->getCondition()->getAsConstantUnion(); + if (constant) { + // cull the path that is dead + if (constant->getConstArray()[0].getBConst() == true && node->getTrueBlock()) + node->getTrueBlock()->traverse(this); + if (constant->getConstArray()[0].getBConst() == false && node->getFalseBlock()) + node->getFalseBlock()->traverse(this); + + return false; // don't traverse any more, we did it all above + } else + return true; // traverse the whole subtree + } + + // Track live functions as well as uniforms, so that we don't visit dead functions + // and only visit each function once. + void addFunctionCall(TIntermAggregate* call) + { + // just use the map to ensure we process each function at most once + if (liveFunctions.find(call->getName()) == liveFunctions.end()) { + liveFunctions.insert(call->getName()); + pushFunction(call->getName()); + } + } + + void addGlobalReference(const TString& name) + { + // just use the map to ensure we process each global at most once + if (liveGlobals.find(name) == liveGlobals.end()) { + liveGlobals.insert(name); + pushGlobalReference(name); + } + } + + const TIntermediate& intermediate; + typedef std::unordered_set TLiveFunctions; + TLiveFunctions liveFunctions; + typedef std::unordered_set TLiveGlobals; + TLiveGlobals liveGlobals; + bool traverseAll; + +private: + // prevent copy & copy construct + TLiveTraverser(TLiveTraverser&); + TLiveTraverser& operator=(TLiveTraverser&); +}; + +} // namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/ParseHelper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/ParseHelper.h new file mode 100644 index 0000000..885fd90 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/ParseHelper.h @@ -0,0 +1,588 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This header defines a two-level parse-helper hierarchy, derived from +// TParseVersions: +// - TParseContextBase: sharable across multiple parsers +// - TParseContext: GLSL specific helper +// + +#ifndef _PARSER_HELPER_INCLUDED_ +#define _PARSER_HELPER_INCLUDED_ + +#include +#include + +#include "parseVersions.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "localintermediate.h" +#include "Scan.h" +#include "attribute.h" + +namespace glslang { + +struct TPragma { + TPragma(bool o, bool d) : optimize(o), debug(d) { } + bool optimize; + bool debug; + TPragmaTable pragmaTable; +}; + +class TScanContext; +class TPpContext; + +typedef std::set TIdSetType; +typedef std::map> TStructRecord; + +// +// Sharable code (as well as what's in TParseVersions) across +// parse helpers. +// +class TParseContextBase : public TParseVersions { +public: + TParseContextBase(TSymbolTable& symbolTable, TIntermediate& interm, bool parsingBuiltins, int version, + EProfile profile, const SpvVersion& spvVersion, EShLanguage language, + TInfoSink& infoSink, bool forwardCompatible, EShMessages messages, + const TString* entryPoint = nullptr) + : TParseVersions(interm, version, profile, spvVersion, language, infoSink, forwardCompatible, messages), + scopeMangler("::"), + symbolTable(symbolTable), + statementNestingLevel(0), loopNestingLevel(0), structNestingLevel(0), blockNestingLevel(0), controlFlowNestingLevel(0), + currentFunctionType(nullptr), + postEntryPointReturn(false), + contextPragma(true, false), + beginInvocationInterlockCount(0), endInvocationInterlockCount(0), + parsingBuiltins(parsingBuiltins), scanContext(nullptr), ppContext(nullptr), + limits(resources.limits), + globalUniformBlock(nullptr), + globalUniformBinding(TQualifier::layoutBindingEnd), + globalUniformSet(TQualifier::layoutSetEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) + { + if (entryPoint != nullptr) + sourceEntryPointName = *entryPoint; + } + virtual ~TParseContextBase() { } + +#if !defined(GLSLANG_WEB) || defined(GLSLANG_WEB_DEVEL) + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); +#endif + + virtual void setLimits(const TBuiltInResource&) = 0; + + void checkIndex(const TSourceLoc&, const TType&, int& index); + + EShLanguage getLanguage() const { return language; } + void setScanContext(TScanContext* c) { scanContext = c; } + TScanContext* getScanContext() const { return scanContext; } + void setPpContext(TPpContext* c) { ppContext = c; } + TPpContext* getPpContext() const { return ppContext; } + + virtual void setLineCallback(const std::function& func) { lineCallback = func; } + virtual void setExtensionCallback(const std::function& func) { extensionCallback = func; } + virtual void setVersionCallback(const std::function& func) { versionCallback = func; } + virtual void setPragmaCallback(const std::function&)>& func) { pragmaCallback = func; } + virtual void setErrorCallback(const std::function& func) { errorCallback = func; } + + virtual void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) = 0; + virtual bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) = 0; + virtual bool lineDirectiveShouldSetNextLine() const = 0; + virtual void handlePragma(const TSourceLoc&, const TVector&) = 0; + + virtual bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) = 0; + + virtual void notifyVersion(int line, int version, const char* type_string) + { + if (versionCallback) + versionCallback(line, version, type_string); + } + virtual void notifyErrorDirective(int line, const char* error_message) + { + if (errorCallback) + errorCallback(line, error_message); + } + virtual void notifyLineDirective(int curLineNo, int newLineNo, bool hasSource, int sourceNum, const char* sourceName) + { + if (lineCallback) + lineCallback(curLineNo, newLineNo, hasSource, sourceNum, sourceName); + } + virtual void notifyExtensionDirective(int line, const char* extension, const char* behavior) + { + if (extensionCallback) + extensionCallback(line, extension, behavior); + } + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Manage global buffer (used for backing atomic counters in GLSL when using relaxed Vulkan semantics) + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Potentially rename shader entry point function + void renameShaderFunction(TString*& name) const + { + // Replace the entry point name given in the shader with the real entry point name, + // if there is a substitution. + if (name != nullptr && *name == sourceEntryPointName && intermediate.getEntryPointName().size() > 0) + name = NewPoolTString(intermediate.getEntryPointName().c_str()); + } + + virtual bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + virtual void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + + const char* const scopeMangler; + + // Basic parsing state, easily accessible to the grammar + + TSymbolTable& symbolTable; // symbol table that goes with the current language, version, and profile + int statementNestingLevel; // 0 if outside all flow control or compound statements + int loopNestingLevel; // 0 if outside all loops + int structNestingLevel; // 0 if outside structures + int blockNestingLevel; // 0 if outside blocks + int controlFlowNestingLevel; // 0 if outside all flow control + const TType* currentFunctionType; // the return type of the function that's currently being parsed + bool functionReturnsValue; // true if a non-void function has a return + // if inside a function, true if the function is the entry point and this is after a return statement + bool postEntryPointReturn; + // case, node, case, case, node, ...; ensure only one node between cases; stack of them for nesting + TList switchSequenceStack; + // the statementNestingLevel the current switch statement is at, which must match the level of its case statements + TList switchLevel; + struct TPragma contextPragma; + int beginInvocationInterlockCount; + int endInvocationInterlockCount; + +protected: + TParseContextBase(TParseContextBase&); + TParseContextBase& operator=(TParseContextBase&); + + const bool parsingBuiltins; // true if parsing built-in symbols/functions + TVector linkageSymbols; // will be transferred to 'linkage', after all editing is done, order preserving + TScanContext* scanContext; + TPpContext* ppContext; + TBuiltInResource resources; + TLimits& limits; + TString sourceEntryPointName; + + // These, if set, will be called when a line, pragma ... is preprocessed. + // They will be called with any parameters to the original directive. + std::function lineCallback; + std::function&)> pragmaCallback; + std::function versionCallback; + std::function extensionCallback; + std::function errorCallback; + + // see implementation for detail + const TFunction* selectFunction(const TVector, const TFunction&, + std::function, + std::function, + /* output */ bool& tie); + + virtual void parseSwizzleSelector(const TSourceLoc&, const TString&, int size, + TSwizzleSelectors&); + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + TVariable* globalUniformBlock; // the actual block, inserted into the symbol table + unsigned int globalUniformBinding; // the block's binding number + unsigned int globalUniformSet; // the block's set number + int firstNewMember; // the index of the first member not yet inserted into the symbol table + // override this to set the language-specific name + virtual const char* getGlobalUniformBlockName() const { return ""; } + virtual void setUniformBlockDefaults(TType&) const { } + virtual void finalizeGlobalUniformBlockLayout(TVariable&) {} + + // Manage the atomic counter block (used for atomic_uints with Vulkan-Relaxed) + TMap atomicCounterBuffers; + unsigned int atomicCounterBlockSet; + TMap atomicCounterBlockFirstNewMember; + // override this to set the language-specific name + virtual const char* getAtomicCounterBlockName() const { return ""; } + virtual void setAtomicCounterBlockDefaults(TType&) const {} + virtual void setInvariant(const TSourceLoc&, const char*) {} + virtual void finalizeAtomicCounterBlockLayout(TVariable&) {} + bool isAtomicCounterBlock(const TSymbol& symbol) { + const TVariable* var = symbol.getAsVariable(); + if (!var) + return false; + const auto& at = atomicCounterBuffers.find(var->getType().getQualifier().layoutBinding); + return (at != atomicCounterBuffers.end() && (*at).second->getType() == var->getType()); + } + + virtual void outputMessage(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, TPrefixType prefix, + va_list args); + virtual void trackLinkage(TSymbol& symbol); + virtual void makeEditable(TSymbol*&); + virtual TVariable* getEditableVariable(const char* name); + virtual void finish(); +}; + +// +// Manage the state for when to respect precision qualifiers and when to warn about +// the defaults being different than might be expected. +// +class TPrecisionManager { +public: + TPrecisionManager() : obey(false), warn(false), explicitIntDefault(false), explicitFloatDefault(false){ } + virtual ~TPrecisionManager() {} + + void respectPrecisionQualifiers() { obey = true; } + bool respectingPrecisionQualifiers() const { return obey; } + bool shouldWarnAboutDefaults() const { return warn; } + void defaultWarningGiven() { warn = false; } + void warnAboutDefaults() { warn = true; } + void explicitIntDefaultSeen() + { + explicitIntDefault = true; + if (explicitFloatDefault) + warn = false; + } + void explicitFloatDefaultSeen() + { + explicitFloatDefault = true; + if (explicitIntDefault) + warn = false; + } + +protected: + bool obey; // respect precision qualifiers + bool warn; // need to give a warning about the defaults + bool explicitIntDefault; // user set the default for int/uint + bool explicitFloatDefault; // user set the default for float +}; + +// +// GLSL-specific parse helper. Should have GLSL in the name, but that's +// too big of a change for comparing branches at the moment, and perhaps +// impacts downstream consumers as well. +// +class TParseContext : public TParseContextBase { +public: + TParseContext(TSymbolTable&, TIntermediate&, bool parsingBuiltins, int version, EProfile, const SpvVersion& spvVersion, EShLanguage, TInfoSink&, + bool forwardCompatible = false, EShMessages messages = EShMsgDefault, + const TString* entryPoint = nullptr); + virtual ~TParseContext(); + + bool obeyPrecisionQualifiers() const { return precisionManager.respectingPrecisionQualifiers(); } + void setPrecisionDefaults(); + + void setLimits(const TBuiltInResource&) override; + bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) override; + void parserError(const char* s); // for bison's yyerror + + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + + void reservedErrorCheck(const TSourceLoc&, const TString&); + void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) override; + bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) override; + bool lineDirectiveShouldSetNextLine() const override; + bool builtInName(const TString&); + + void handlePragma(const TSourceLoc&, const TVector&) override; + TIntermTyped* handleVariable(const TSourceLoc&, TSymbol* symbol, const TString* string); + TIntermTyped* handleBracketDereference(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + void handleIndexLimits(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + +#ifndef GLSLANG_WEB + void makeEditable(TSymbol*&) override; + void ioArrayCheck(const TSourceLoc&, const TType&, const TString& identifier); +#endif + bool isIoResizeArray(const TType&) const; + void fixIoArraySize(const TSourceLoc&, TType&); + void handleIoResizeArrayAccess(const TSourceLoc&, TIntermTyped* base); + void checkIoArraysConsistency(const TSourceLoc&, bool tailOnly = false); + int getIoArrayImplicitSize(const TQualifier&, TString* featureString = nullptr) const; + void checkIoArrayConsistency(const TSourceLoc&, int requiredSize, const char* feature, TType&, const TString&); + + TIntermTyped* handleBinaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* left, TIntermTyped* right); + TIntermTyped* handleUnaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* childNode); + TIntermTyped* handleDotDereference(const TSourceLoc&, TIntermTyped* base, const TString& field); + TIntermTyped* handleDotSwizzle(const TSourceLoc&, TIntermTyped* base, const TString& field); + void blockMemberExtensionCheck(const TSourceLoc&, const TIntermTyped* base, int member, const TString& memberName); + TFunction* handleFunctionDeclarator(const TSourceLoc&, TFunction& function, bool prototype); + TIntermAggregate* handleFunctionDefinition(const TSourceLoc&, TFunction&); + TIntermTyped* handleFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + TIntermTyped* handleBuiltInFunctionCall(TSourceLoc, TIntermNode* arguments, const TFunction& function); + void computeBuiltinPrecisions(TIntermTyped&, const TFunction&); + TIntermNode* handleReturnValue(const TSourceLoc&, TIntermTyped*); + void checkLocation(const TSourceLoc&, TOperator); + TIntermTyped* handleLengthMethod(const TSourceLoc&, TFunction*, TIntermNode*); + void addInputArgumentConversions(const TFunction&, TIntermNode*&) const; + TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermAggregate&) const; + TIntermTyped* addAssign(const TSourceLoc&, TOperator op, TIntermTyped* left, TIntermTyped* right); + void builtInOpCheck(const TSourceLoc&, const TFunction&, TIntermOperator&); + void nonOpBuiltInCheck(const TSourceLoc&, const TFunction&, TIntermAggregate&); + void userFunctionCallCheck(const TSourceLoc&, TIntermAggregate&); + void samplerConstructorLocationCheck(const TSourceLoc&, const char* token, TIntermNode*); + TFunction* handleConstructorCall(const TSourceLoc&, const TPublicType&); + void handlePrecisionQualifier(const TSourceLoc&, TQualifier&, TPrecisionQualifier); + void checkPrecisionQualifier(const TSourceLoc&, TPrecisionQualifier); + void memorySemanticsCheck(const TSourceLoc&, const TFunction&, const TIntermOperator& callNode); + + TIntermTyped* vkRelaxedRemapFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + // returns true if the variable was remapped to something else + bool vkRelaxedRemapUniformVariable(const TSourceLoc&, TString&, const TPublicType&, TArraySizes*, TIntermTyped*, TType&); + + void assignError(const TSourceLoc&, const char* op, TString left, TString right); + void unaryOpError(const TSourceLoc&, const char* op, TString operand); + void binaryOpError(const TSourceLoc&, const char* op, TString left, TString right); + void variableCheck(TIntermTyped*& nodePtr); + bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void constantValueCheck(TIntermTyped* node, const char* token); + void integerCheck(const TIntermTyped* node, const char* token); + void globalCheck(const TSourceLoc&, const char* token); + bool constructorError(const TSourceLoc&, TIntermNode*, TFunction&, TOperator, TType&); + bool constructorTextureSamplerError(const TSourceLoc&, const TFunction&); + void arraySizeCheck(const TSourceLoc&, TIntermTyped* expr, TArraySize&, const char *sizeType); + bool arrayQualifierError(const TSourceLoc&, const TQualifier&); + bool arrayError(const TSourceLoc&, const TType&); + void arraySizeRequiredCheck(const TSourceLoc&, const TArraySizes&); + void structArrayCheck(const TSourceLoc&, const TType& structure); + void arraySizesCheck(const TSourceLoc&, const TQualifier&, TArraySizes*, const TIntermTyped* initializer, bool lastMember); + void arrayOfArrayVersionCheck(const TSourceLoc&, const TArraySizes*); + bool voidErrorCheck(const TSourceLoc&, const TString&, TBasicType); + void boolCheck(const TSourceLoc&, const TIntermTyped*); + void boolCheck(const TSourceLoc&, const TPublicType&); + void samplerCheck(const TSourceLoc&, const TType&, const TString& identifier, TIntermTyped* initializer); + void atomicUintCheck(const TSourceLoc&, const TType&, const TString& identifier); + void accStructCheck(const TSourceLoc & loc, const TType & type, const TString & identifier); + void transparentOpaqueCheck(const TSourceLoc&, const TType&, const TString& identifier); + void memberQualifierCheck(glslang::TPublicType&); + void globalQualifierFixCheck(const TSourceLoc&, TQualifier&, bool isMemberCheck = false); + void globalQualifierTypeCheck(const TSourceLoc&, const TQualifier&, const TPublicType&); + bool structQualifierErrorCheck(const TSourceLoc&, const TPublicType& pType); + void mergeQualifiers(const TSourceLoc&, TQualifier& dst, const TQualifier& src, bool force); + void setDefaultPrecision(const TSourceLoc&, TPublicType&, TPrecisionQualifier); + int computeSamplerTypeIndex(TSampler&); + TPrecisionQualifier getDefaultPrecision(TPublicType&); + void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&); + void parameterTypeCheck(const TSourceLoc&, TStorageQualifier qualifier, const TType& type); + bool containsFieldWithBasicType(const TType& type ,TBasicType basicType); + TSymbol* redeclareBuiltinVariable(const TSourceLoc&, const TString&, const TQualifier&, const TShaderQualifiers&); + void redeclareBuiltinBlock(const TSourceLoc&, TTypeList& typeList, const TString& blockName, const TString* instanceName, TArraySizes* arraySizes); + void paramCheckFixStorage(const TSourceLoc&, const TStorageQualifier&, TType& type); + void paramCheckFix(const TSourceLoc&, const TQualifier&, TType& type); + void nestedBlockCheck(const TSourceLoc&); + void nestedStructCheck(const TSourceLoc&); + void arrayObjectCheck(const TSourceLoc&, const TType&, const char* op); + void opaqueCheck(const TSourceLoc&, const TType&, const char* op); + void referenceCheck(const TSourceLoc&, const TType&, const char* op); + void storage16BitAssignmentCheck(const TSourceLoc&, const TType&, const char* op); + void specializationCheck(const TSourceLoc&, const TType&, const char* op); + void structTypeCheck(const TSourceLoc&, TPublicType&); + void inductiveLoopCheck(const TSourceLoc&, TIntermNode* init, TIntermLoop* loop); + void arrayLimitCheck(const TSourceLoc&, const TString&, int size); + void limitCheck(const TSourceLoc&, int value, const char* limit, const char* feature); + + void inductiveLoopBodyCheck(TIntermNode*, long long loopIndexId, TSymbolTable&); + void constantIndexExpressionCheck(TIntermNode*); + + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&); + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&, const TIntermTyped*); + void mergeObjectLayoutQualifiers(TQualifier& dest, const TQualifier& src, bool inheritOnly); + void layoutObjectCheck(const TSourceLoc&, const TSymbol&); + void layoutMemberLocationArrayCheck(const TSourceLoc&, bool memberWithLocation, TArraySizes* arraySizes); + void layoutTypeCheck(const TSourceLoc&, const TType&); + void layoutQualifierCheck(const TSourceLoc&, const TQualifier&); + void checkNoShaderLayouts(const TSourceLoc&, const TShaderQualifiers&); + void fixOffset(const TSourceLoc&, TSymbol&); + + const TFunction* findFunction(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExact(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction120(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction400(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExplicitTypes(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + void declareTypeDefaults(const TSourceLoc&, const TPublicType&); + TIntermNode* declareVariable(const TSourceLoc&, TString& identifier, const TPublicType&, TArraySizes* typeArray = 0, TIntermTyped* initializer = 0); + TIntermTyped* addConstructor(const TSourceLoc&, TIntermNode*, const TType&); + TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&); + TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset); + void inheritMemoryQualifiers(const TQualifier& from, TQualifier& to); + void declareBlock(const TSourceLoc&, TTypeList& typeList, const TString* instanceName = 0, TArraySizes* arraySizes = 0); + void blockStorageRemap(const TSourceLoc&, const TString*, TQualifier&); + void blockStageIoCheck(const TSourceLoc&, const TQualifier&); + void blockQualifierCheck(const TSourceLoc&, const TQualifier&, bool instanceName); + void fixBlockLocations(const TSourceLoc&, TQualifier&, TTypeList&, bool memberWithLocation, bool memberWithoutLocation); + void fixXfbOffsets(TQualifier&, TTypeList&); + void fixBlockUniformOffsets(TQualifier&, TTypeList&); + void fixBlockUniformLayoutMatrix(TQualifier&, TTypeList*, TTypeList*); + void fixBlockUniformLayoutPacking(TQualifier&, TTypeList*, TTypeList*); + void addQualifierToExisting(const TSourceLoc&, TQualifier, const TString& identifier); + void addQualifierToExisting(const TSourceLoc&, TQualifier, TIdentifierList&); + void invariantCheck(const TSourceLoc&, const TQualifier&); + void updateStandaloneQualifierDefaults(const TSourceLoc&, const TPublicType&); + void wrapupSwitchSubsequence(TIntermAggregate* statements, TIntermNode* branchNode); + TIntermNode* addSwitch(const TSourceLoc&, TIntermTyped* expression, TIntermAggregate* body); + const TTypeList* recordStructCopy(TStructRecord&, const TType*, const TType*); + +#ifndef GLSLANG_WEB + TAttributeType attributeFromName(const TString& name) const; + TAttributes* makeAttributes(const TString& identifier) const; + TAttributes* makeAttributes(const TString& identifier, TIntermNode* node) const; + TAttributes* mergeAttributes(TAttributes*, TAttributes*) const; + + // Determine selection control from attributes + void handleSelectionAttributes(const TAttributes& attributes, TIntermNode*); + void handleSwitchAttributes(const TAttributes& attributes, TIntermNode*); + // Determine loop control from attributes + void handleLoopAttributes(const TAttributes& attributes, TIntermNode*); + // Function attributes + void handleFunctionAttributes(const TSourceLoc&, const TAttributes&); + + // GL_EXT_spirv_intrinsics + TSpirvRequirement* makeSpirvRequirement(const TSourceLoc& loc, const TString& name, + const TIntermAggregate* extensions, const TIntermAggregate* capabilities); + TSpirvRequirement* mergeSpirvRequirements(const TSourceLoc& loc, TSpirvRequirement* spirvReq1, + TSpirvRequirement* spirvReq2); + TSpirvTypeParameters* makeSpirvTypeParameters(const TSourceLoc& loc, const TIntermConstantUnion* constant); + TSpirvTypeParameters* mergeSpirvTypeParameters(TSpirvTypeParameters* spirvTypeParams1, + TSpirvTypeParameters* spirvTypeParams2); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, const TString& value); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, int value); + TSpirvInstruction* mergeSpirvInstruction(const TSourceLoc& loc, TSpirvInstruction* spirvInst1, + TSpirvInstruction* spirvInst2); +#endif + + void checkAndResizeMeshViewDim(const TSourceLoc&, TType&, bool isBlockMember); + +protected: + void nonInitConstCheck(const TSourceLoc&, TString& identifier, TType& type); + void inheritGlobalDefaults(TQualifier& dst) const; + TVariable* makeInternalVariable(const char* name, const TType&) const; + TVariable* declareNonArray(const TSourceLoc&, const TString& identifier, const TType&); + void declareArray(const TSourceLoc&, const TString& identifier, const TType&, TSymbol*&); + void checkRuntimeSizable(const TSourceLoc&, const TIntermTyped&); + bool isRuntimeLength(const TIntermTyped&) const; + TIntermNode* executeInitializer(const TSourceLoc&, TIntermTyped* initializer, TVariable* variable); + TIntermTyped* convertInitializerList(const TSourceLoc&, const TType&, TIntermTyped* initializer); +#ifndef GLSLANG_WEB + void finish() override; +#endif + + virtual const char* getGlobalUniformBlockName() const override; + virtual void finalizeGlobalUniformBlockLayout(TVariable&) override; + virtual void setUniformBlockDefaults(TType& block) const override; + + virtual const char* getAtomicCounterBlockName() const override; + virtual void finalizeAtomicCounterBlockLayout(TVariable&) override; + virtual void setAtomicCounterBlockDefaults(TType& block) const override; + virtual void setInvariant(const TSourceLoc& loc, const char* builtin) override; + +public: + // + // Generally, bison productions, the scanner, and the PP need read/write access to these; just give them direct access + // + + // Current state of parsing + bool inMain; // if inside a function, true if the function is main + const TString* blockName; + TQualifier currentBlockQualifier; + TPrecisionQualifier defaultPrecision[EbtNumTypes]; + TBuiltInResource resources; + TLimits& limits; + +protected: + TParseContext(TParseContext&); + TParseContext& operator=(TParseContext&); + + static const int maxSamplerIndex = EsdNumDims * (EbtNumTypes * (2 * 2 * 2 * 2 * 2)); // see computeSamplerTypeIndex() + TPrecisionQualifier defaultSamplerPrecision[maxSamplerIndex]; + TPrecisionManager precisionManager; + TQualifier globalBufferDefaults; + TQualifier globalUniformDefaults; + TQualifier globalInputDefaults; + TQualifier globalOutputDefaults; + TQualifier globalSharedDefaults; + TString currentCaller; // name of last function body entered (not valid when at global scope) +#ifndef GLSLANG_WEB + int* atomicUintOffsets; // to become an array of the right size to hold an offset per binding point + bool anyIndexLimits; + TIdSetType inductiveLoopIds; + TVector needsIndexLimitationChecking; + TStructRecord matrixFixRecord; + TStructRecord packingFixRecord; + + // + // Geometry shader input arrays: + // - array sizing is based on input primitive and/or explicit size + // + // Tessellation control output arrays: + // - array sizing is based on output layout(vertices=...) and/or explicit size + // + // Both: + // - array sizing is retroactive + // - built-in block redeclarations interact with this + // + // Design: + // - use a per-context "resize-list", a list of symbols whose array sizes + // can be fixed + // + // - the resize-list starts empty at beginning of user-shader compilation, it does + // not have built-ins in it + // + // - on built-in array use: copyUp() symbol and add it to the resize-list + // + // - on user array declaration: add it to the resize-list + // + // - on block redeclaration: copyUp() symbol and add it to the resize-list + // * note, that appropriately gives an error if redeclaring a block that + // was already used and hence already copied-up + // + // - on seeing a layout declaration that sizes the array, fix everything in the + // resize-list, giving errors for mismatch + // + // - on seeing an array size declaration, give errors on mismatch between it and previous + // array-sizing declarations + // + TVector ioArraySymbolResizeList; +#endif +}; + +} // end namespace glslang + +#endif // _PARSER_HELPER_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/RemoveTree.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/RemoveTree.h new file mode 100644 index 0000000..1ed0156 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/RemoveTree.h @@ -0,0 +1,41 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +namespace glslang { + +void RemoveAllTreeNodes(TIntermNode*); + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Scan.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Scan.h new file mode 100644 index 0000000..24b75cf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Scan.h @@ -0,0 +1,276 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _GLSLANG_SCAN_INCLUDED_ +#define _GLSLANG_SCAN_INCLUDED_ + +#include "Versions.h" + +namespace glslang { + +// Use a global end-of-input character, so no translation is needed across +// layers of encapsulation. Characters are all 8 bit, and positive, so there is +// no aliasing of character 255 onto -1, for example. +const int EndOfInput = -1; + +// +// A character scanner that seamlessly, on read-only strings, reads across an +// array of strings without assuming null termination. +// +class TInputScanner { +public: + TInputScanner(int n, const char* const s[], size_t L[], const char* const* names = nullptr, + int b = 0, int f = 0, bool single = false) : + numSources(n), + // up to this point, common usage is "char*", but now we need positive 8-bit characters + sources(reinterpret_cast(s)), + lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f), singleLogical(single), + endOfFileReached(false) + { + loc = new TSourceLoc[numSources]; + for (int i = 0; i < numSources; ++i) { + loc[i].init(i - stringBias); + } + if (names != nullptr) { + for (int i = 0; i < numSources; ++i) + loc[i].name = names[i] != nullptr ? NewPoolTString(names[i]) : nullptr; + } + loc[currentSource].line = 1; + logicalSourceLoc.init(1); + logicalSourceLoc.name = loc[0].name; + } + + virtual ~TInputScanner() + { + delete [] loc; + } + + // retrieve the next character and advance one character + int get() + { + int ret = peek(); + if (ret == EndOfInput) + return ret; + ++loc[currentSource].column; + ++logicalSourceLoc.column; + if (ret == '\n') { + ++loc[currentSource].line; + ++logicalSourceLoc.line; + logicalSourceLoc.column = 0; + loc[currentSource].column = 0; + } + advance(); + + return ret; + } + + // retrieve the next character, no advance + int peek() + { + if (currentSource >= numSources) { + endOfFileReached = true; + return EndOfInput; + } + // Make sure we do not read off the end of a string. + // N.B. Sources can have a length of 0. + int sourceToRead = currentSource; + size_t charToRead = currentChar; + while(charToRead >= lengths[sourceToRead]) { + charToRead = 0; + sourceToRead += 1; + if (sourceToRead >= numSources) { + return EndOfInput; + } + } + + // Here, we care about making negative valued characters positive + return sources[sourceToRead][charToRead]; + } + + // go back one character + void unget() + { + // Do not roll back once we've reached the end of the file. + if (endOfFileReached) + return; + + if (currentChar > 0) { + --currentChar; + --loc[currentSource].column; + --logicalSourceLoc.column; + if (loc[currentSource].column < 0) { + // We've moved back past a new line. Find the + // previous newline (or start of the file) to compute + // the column count on the now current line. + size_t chIndex = currentChar; + while (chIndex > 0) { + if (sources[currentSource][chIndex] == '\n') { + break; + } + --chIndex; + } + logicalSourceLoc.column = (int)(currentChar - chIndex); + loc[currentSource].column = (int)(currentChar - chIndex); + } + } else { + do { + --currentSource; + } while (currentSource > 0 && lengths[currentSource] == 0); + if (lengths[currentSource] == 0) { + // set to 0 if we've backed up to the start of an empty string + currentChar = 0; + } else + currentChar = lengths[currentSource] - 1; + } + if (peek() == '\n') { + --loc[currentSource].line; + --logicalSourceLoc.line; + } + } + + // for #line override + void setLine(int newLine) + { + logicalSourceLoc.line = newLine; + loc[getLastValidSourceIndex()].line = newLine; + } + + // for #line override in filename based parsing + void setFile(const char* filename) + { + TString* fn_tstr = NewPoolTString(filename); + logicalSourceLoc.name = fn_tstr; + loc[getLastValidSourceIndex()].name = fn_tstr; + } + + void setFile(const char* filename, int i) + { + TString* fn_tstr = NewPoolTString(filename); + if (i == getLastValidSourceIndex()) { + logicalSourceLoc.name = fn_tstr; + } + loc[i].name = fn_tstr; + } + + void setString(int newString) + { + logicalSourceLoc.string = newString; + loc[getLastValidSourceIndex()].string = newString; + logicalSourceLoc.name = nullptr; + loc[getLastValidSourceIndex()].name = nullptr; + } + + // for #include content indentation + void setColumn(int col) + { + logicalSourceLoc.column = col; + loc[getLastValidSourceIndex()].column = col; + } + + void setEndOfInput() + { + endOfFileReached = true; + currentSource = numSources; + } + + bool atEndOfInput() const { return endOfFileReached; } + + const TSourceLoc& getSourceLoc() const + { + if (singleLogical) { + return logicalSourceLoc; + } else { + return loc[std::max(0, std::min(currentSource, numSources - finale - 1))]; + } + } + // Returns the index (starting from 0) of the most recent valid source string we are reading from. + int getLastValidSourceIndex() const { return std::min(currentSource, numSources - 1); } + + void consumeWhiteSpace(bool& foundNonSpaceTab); + bool consumeComment(); + void consumeWhitespaceComment(bool& foundNonSpaceTab); + bool scanVersion(int& version, EProfile& profile, bool& notFirstToken); + +protected: + + // advance one character + void advance() + { + ++currentChar; + if (currentChar >= lengths[currentSource]) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + while (currentSource < numSources && lengths[currentSource] == 0) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + } + currentChar = 0; + } + } + + int numSources; // number of strings in source + const unsigned char* const *sources; // array of strings; must be converted to positive values on use, to avoid aliasing with -1 as EndOfInput + const size_t *lengths; // length of each string + int currentSource; + size_t currentChar; + + // This is for reporting what string/line an error occurred on, and can be overridden by #line. + // It remembers the last state of each source string as it is left for the next one, so unget() + // can restore that state. + TSourceLoc* loc; // an array + + int stringBias; // the first string that is the user's string number 0 + int finale; // number of internal strings after user's last string + + TSourceLoc logicalSourceLoc; + bool singleLogical; // treats the strings as a single logical string. + // locations will be reported from the first string. + + // Set to true once peek() returns EndOfFile, so that we won't roll back + // once we've reached EndOfFile. + bool endOfFileReached; +}; + +} // end namespace glslang + +#endif // _GLSLANG_SCAN_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/ScanContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/ScanContext.h new file mode 100644 index 0000000..74b2b3c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/ScanContext.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This holds context specific to the GLSL scanner, which +// sits between the preprocessor scanner and parser. +// + +#pragma once + +#include "ParseHelper.h" + +namespace glslang { + +class TPpContext; +class TPpToken; +class TParserToken; + +class TScanContext { +public: + explicit TScanContext(TParseContextBase& pc) : + parseContext(pc), + afterType(false), afterStruct(false), + field(false), afterBuffer(false) { } + virtual ~TScanContext() { } + + static void fillInKeywordMap(); + static void deleteKeywordMap(); + + int tokenize(TPpContext*, TParserToken&); + +protected: + TScanContext(TScanContext&); + TScanContext& operator=(TScanContext&); + + int tokenizeIdentifier(); + int identifierOrType(); + int reservedWord(); + int identifierOrReserved(bool reserved); + int es30ReservedFromGLSL(int version); + int nonreservedKeyword(int esVersion, int nonEsVersion); + int precisionKeyword(); + int matNxM(); + int dMat(); + int firstGenerationImage(bool inEs310); + int secondGenerationImage(); + + TParseContextBase& parseContext; + bool afterType; // true if we've recognized a type, so can only be looking for an identifier + bool afterStruct; // true if we've recognized the STRUCT keyword, so can only be looking for an identifier + bool field; // true if we're on a field, right after a '.' + bool afterBuffer; // true if we've recognized the BUFFER keyword + TSourceLoc loc; + TParserToken* parserToken; + TPpToken* ppToken; + + const char* tokenText; + int keyword; +}; + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/SymbolTable.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/SymbolTable.h new file mode 100644 index 0000000..0d45e48 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/SymbolTable.h @@ -0,0 +1,956 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SYMBOL_TABLE_INCLUDED_ +#define _SYMBOL_TABLE_INCLUDED_ + +// +// Symbol table for parsing. Has these design characteristics: +// +// * Same symbol table can be used to compile many shaders, to preserve +// effort of creating and loading with the large numbers of built-in +// symbols. +// +// --> This requires a copy mechanism, so initial pools used to create +// the shared information can be popped. Done through "clone" +// methods. +// +// * Name mangling will be used to give each function a unique name +// so that symbol table lookups are never ambiguous. This allows +// a simpler symbol table structure. +// +// * Pushing and popping of scope, so symbol table will really be a stack +// of symbol tables. Searched from the top, with new inserts going into +// the top. +// +// * Constants: Compile time constant symbols will keep their values +// in the symbol table. The parser can substitute constants at parse +// time, including doing constant folding and constant propagation. +// +// * No temporaries: Temporaries made from operations (+, --, .xy, etc.) +// are tracked in the intermediate representation, not the symbol table. +// + +#include "../Include/Common.h" +#include "../Include/intermediate.h" +#include "../Include/InfoSink.h" + +namespace glslang { + +// +// Symbol base class. (Can build functions or variables out of these...) +// + +class TVariable; +class TFunction; +class TAnonMember; + +typedef TVector TExtensionList; + +class TSymbol { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + explicit TSymbol(const TString *n) : name(n), uniqueId(0), extensions(0), writable(true) { } + virtual TSymbol* clone() const = 0; + virtual ~TSymbol() { } // rely on all symbol owned memory coming from the pool + + virtual const TString& getName() const { return *name; } + virtual void changeName(const TString* newName) { name = newName; } + virtual void addPrefix(const char* prefix) + { + TString newName(prefix); + newName.append(*name); + changeName(NewPoolTString(newName.c_str())); + } + virtual const TString& getMangledName() const { return getName(); } + virtual TFunction* getAsFunction() { return 0; } + virtual const TFunction* getAsFunction() const { return 0; } + virtual TVariable* getAsVariable() { return 0; } + virtual const TVariable* getAsVariable() const { return 0; } + virtual const TAnonMember* getAsAnonMember() const { return 0; } + virtual const TType& getType() const = 0; + virtual TType& getWritableType() = 0; + virtual void setUniqueId(long long id) { uniqueId = id; } + virtual long long getUniqueId() const { return uniqueId; } + virtual void setExtensions(int numExts, const char* const exts[]) + { + assert(extensions == 0); + assert(numExts > 0); + extensions = NewPoolObject(extensions); + for (int e = 0; e < numExts; ++e) + extensions->push_back(exts[e]); + } + virtual int getNumExtensions() const { return extensions == nullptr ? 0 : (int)extensions->size(); } + virtual const char** getExtensions() const { return extensions->data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const = 0; + void dumpExtensions(TInfoSink& infoSink) const; +#endif + + virtual bool isReadOnly() const { return ! writable; } + virtual void makeReadOnly() { writable = false; } + +protected: + explicit TSymbol(const TSymbol&); + TSymbol& operator=(const TSymbol&); + + const TString *name; + unsigned long long uniqueId; // For cross-scope comparing during code generation + + // For tracking what extensions must be present + // (don't use if correct version/profile is present). + TExtensionList* extensions; // an array of pointers to existing constant char strings + + // + // N.B.: Non-const functions that will be generally used should assert on this, + // to avoid overwriting shared symbol-table information. + // + bool writable; +}; + +// +// Variable class, meaning a symbol that's not a function. +// +// There could be a separate class hierarchy for Constant variables; +// Only one of int, bool, or float, (or none) is correct for +// any particular use, but it's easy to do this way, and doesn't +// seem worth having separate classes, and "getConst" can't simply return +// different values for different types polymorphically, so this is +// just simple and pragmatic. +// +class TVariable : public TSymbol { +public: + TVariable(const TString *name, const TType& t, bool uT = false ) + : TSymbol(name), + userType(uT), + constSubtree(nullptr), + memberExtensions(nullptr), + anonId(-1) + { type.shallowCopy(t); } + virtual TVariable* clone() const; + virtual ~TVariable() { } + + virtual TVariable* getAsVariable() { return this; } + virtual const TVariable* getAsVariable() const { return this; } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { assert(writable); return type; } + virtual bool isUserType() const { return userType; } + virtual const TConstUnionArray& getConstArray() const { return constArray; } + virtual TConstUnionArray& getWritableConstArray() { assert(writable); return constArray; } + virtual void setConstArray(const TConstUnionArray& array) { constArray = array; } + virtual void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + virtual TIntermTyped* getConstSubtree() const { return constSubtree; } + virtual void setAnonId(int i) { anonId = i; } + virtual int getAnonId() const { return anonId; } + + virtual void setMemberExtensions(int member, int numExts, const char* const exts[]) + { + assert(type.isStruct()); + assert(numExts > 0); + if (memberExtensions == nullptr) { + memberExtensions = NewPoolObject(memberExtensions); + memberExtensions->resize(type.getStruct()->size()); + } + for (int e = 0; e < numExts; ++e) + (*memberExtensions)[member].push_back(exts[e]); + } + virtual bool hasMemberExtensions() const { return memberExtensions != nullptr; } + virtual int getNumMemberExtensions(int member) const + { + return memberExtensions == nullptr ? 0 : (int)(*memberExtensions)[member].size(); + } + virtual const char** getMemberExtensions(int member) const { return (*memberExtensions)[member].data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + +protected: + explicit TVariable(const TVariable&); + TVariable& operator=(const TVariable&); + + TType type; + bool userType; + + // we are assuming that Pool Allocator will free the memory allocated to unionArray + // when this object is destroyed + + TConstUnionArray constArray; // for compile-time constant value + TIntermTyped* constSubtree; // for specialization constant computation + TVector* memberExtensions; // per-member extension list, allocated only when needed + int anonId; // the ID used for anonymous blocks: TODO: see if uniqueId could serve a dual purpose +}; + +// +// The function sub-class of symbols and the parser will need to +// share this definition of a function parameter. +// +struct TParameter { + TString *name; + TType* type; + TIntermTyped* defaultValue; + TParameter& copyParam(const TParameter& param) + { + if (param.name) + name = NewPoolTString(param.name->c_str()); + else + name = 0; + type = param.type->clone(); + defaultValue = param.defaultValue; + return *this; + } + TBuiltInVariable getDeclaredBuiltIn() const { return type->getQualifier().declaredBuiltIn; } +}; + +// +// The function sub-class of a symbol. +// +class TFunction : public TSymbol { +public: + explicit TFunction(TOperator o) : + TSymbol(0), + op(o), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) { } + TFunction(const TString *name, const TType& retType, TOperator tOp = EOpNull) : + TSymbol(name), + mangledName(*name + '('), + op(tOp), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) + { + returnType.shallowCopy(retType); + declaredBuiltIn = retType.getQualifier().builtIn; + } + virtual TFunction* clone() const override; + virtual ~TFunction(); + + virtual TFunction* getAsFunction() override { return this; } + virtual const TFunction* getAsFunction() const override { return this; } + + // Install 'p' as the (non-'this') last parameter. + // Non-'this' parameters are reflected in both the list of parameters and the + // mangled name. + virtual void addParameter(TParameter& p) + { + assert(writable); + parameters.push_back(p); + p.type->appendMangledName(mangledName); + + if (p.defaultValue != nullptr) + defaultParamCount++; + } + + // Install 'this' as the first parameter. + // 'this' is reflected in the list of parameters, but not the mangled name. + virtual void addThisParameter(TType& type, const char* name) + { + TParameter p = { NewPoolTString(name), new TType, nullptr }; + p.type->shallowCopy(type); + parameters.insert(parameters.begin(), p); + } + + virtual void addPrefix(const char* prefix) override + { + TSymbol::addPrefix(prefix); + mangledName.insert(0, prefix); + } + + virtual void removePrefix(const TString& prefix) + { + assert(mangledName.compare(0, prefix.size(), prefix) == 0); + mangledName.erase(0, prefix.size()); + } + + virtual const TString& getMangledName() const override { return mangledName; } + virtual const TType& getType() const override { return returnType; } + virtual TBuiltInVariable getDeclaredBuiltInType() const { return declaredBuiltIn; } + virtual TType& getWritableType() override { return returnType; } + virtual void relateToOperator(TOperator o) { assert(writable); op = o; } + virtual TOperator getBuiltInOp() const { return op; } + virtual void setDefined() { assert(writable); defined = true; } + virtual bool isDefined() const { return defined; } + virtual void setPrototyped() { assert(writable); prototyped = true; } + virtual bool isPrototyped() const { return prototyped; } + virtual void setImplicitThis() { assert(writable); implicitThis = true; } + virtual bool hasImplicitThis() const { return implicitThis; } + virtual void setIllegalImplicitThis() { assert(writable); illegalImplicitThis = true; } + virtual bool hasIllegalImplicitThis() const { return illegalImplicitThis; } + + // Return total number of parameters + virtual int getParamCount() const { return static_cast(parameters.size()); } + // Return number of parameters with default values. + virtual int getDefaultParamCount() const { return defaultParamCount; } + // Return number of fixed parameters (without default values) + virtual int getFixedParamCount() const { return getParamCount() - getDefaultParamCount(); } + + virtual TParameter& operator[](int i) { assert(writable); return parameters[i]; } + virtual const TParameter& operator[](int i) const { return parameters[i]; } + +#ifndef GLSLANG_WEB + virtual void setSpirvInstruction(const TSpirvInstruction& inst) + { + relateToOperator(EOpSpirvInst); + spirvInst = inst; + } + virtual const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TFunction(const TFunction&); + TFunction& operator=(const TFunction&); + + typedef TVector TParamList; + TParamList parameters; + TType returnType; + TBuiltInVariable declaredBuiltIn; + + TString mangledName; + TOperator op; + bool defined; + bool prototyped; + bool implicitThis; // True if this function is allowed to see all members of 'this' + bool illegalImplicitThis; // True if this function is not supposed to have access to dynamic members of 'this', + // even if it finds member variables in the symbol table. + // This is important for a static member function that has member variables in scope, + // but is not allowed to use them, or see hidden symbols instead. + int defaultParamCount; + +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; // SPIR-V instruction qualifiers +#endif +}; + +// +// Members of anonymous blocks are a kind of TSymbol. They are not hidden in +// the symbol table behind a container; rather they are visible and point to +// their anonymous container. (The anonymous container is found through the +// member, not the other way around.) +// +class TAnonMember : public TSymbol { +public: + TAnonMember(const TString* n, unsigned int m, TVariable& a, int an) : TSymbol(n), anonContainer(a), memberNumber(m), anonId(an) { } + virtual TAnonMember* clone() const override; + virtual ~TAnonMember() { } + + virtual const TAnonMember* getAsAnonMember() const override { return this; } + virtual const TVariable& getAnonContainer() const { return anonContainer; } + virtual unsigned int getMemberNumber() const { return memberNumber; } + + virtual const TType& getType() const override + { + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual TType& getWritableType() override + { + assert(writable); + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual void setExtensions(int numExts, const char* const exts[]) override + { + anonContainer.setMemberExtensions(memberNumber, numExts, exts); + } + virtual int getNumExtensions() const override { return anonContainer.getNumMemberExtensions(memberNumber); } + virtual const char** getExtensions() const override { return anonContainer.getMemberExtensions(memberNumber); } + + virtual int getAnonId() const { return anonId; } +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TAnonMember(const TAnonMember&); + TAnonMember& operator=(const TAnonMember&); + + TVariable& anonContainer; + unsigned int memberNumber; + int anonId; +}; + +class TSymbolTableLevel { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TSymbolTableLevel() : defaultPrecision(0), anonId(0), thisLevel(false) { } + ~TSymbolTableLevel(); + + bool insert(const TString& name, TSymbol* symbol) { + return level.insert(tLevelPair(name, symbol)).second; + } + + bool insert(TSymbol& symbol, bool separateNameSpaces, const TString& forcedKeyName = TString()) + { + // + // returning true means symbol was added to the table with no semantic errors + // + const TString& name = symbol.getName(); + if (forcedKeyName.length()) { + return level.insert(tLevelPair(forcedKeyName, &symbol)).second; + } + else if (name == "") { + symbol.getAsVariable()->setAnonId(anonId++); + // An empty name means an anonymous container, exposing its members to the external scope. + // Give it a name and insert its members in the symbol table, pointing to the container. + char buf[20]; + snprintf(buf, 20, "%s%d", AnonymousPrefix, symbol.getAsVariable()->getAnonId()); + symbol.changeName(NewPoolTString(buf)); + + return insertAnonymousMembers(symbol, 0); + } else { + // Check for redefinition errors: + // - STL itself will tell us if there is a direct name collision, with name mangling, at this level + // - additionally, check for function-redefining-variable name collisions + const TString& insertName = symbol.getMangledName(); + if (symbol.getAsFunction()) { + // make sure there isn't a variable of this name + if (! separateNameSpaces && level.find(name) != level.end()) + return false; + + // insert, and whatever happens is okay + level.insert(tLevelPair(insertName, &symbol)); + + return true; + } else + return level.insert(tLevelPair(insertName, &symbol)).second; + } + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + // Only supporting amend of anonymous blocks so far. + if (IsAnonymous(symbol.getName())) + return insertAnonymousMembers(symbol, firstNewMember); + else + return false; + } + + bool insertAnonymousMembers(TSymbol& symbol, int firstMember) + { + const TTypeList& types = *symbol.getAsVariable()->getType().getStruct(); + for (unsigned int m = firstMember; m < types.size(); ++m) { + TAnonMember* member = new TAnonMember(&types[m].type->getFieldName(), m, *symbol.getAsVariable(), symbol.getAsVariable()->getAnonId()); + if (! level.insert(tLevelPair(member->getMangledName(), member)).second) + return false; + } + + return true; + } + + void retargetSymbol(const TString& from, const TString& to) { + tLevel::const_iterator fromIt = level.find(from); + tLevel::const_iterator toIt = level.find(to); + if (fromIt == level.end() || toIt == level.end()) + return; + delete fromIt->second; + level[from] = toIt->second; + retargetedSymbols.push_back({from, to}); + } + + TSymbol* find(const TString& name) const + { + tLevel::const_iterator it = level.find(name); + if (it == level.end()) + return 0; + else + return (*it).second; + } + + void findFunctionNameList(const TString& name, TVector& list) + { + size_t parenAt = name.find_first_of('('); + TString base(name, 0, parenAt + 1); + + tLevel::const_iterator begin = level.lower_bound(base); + base[parenAt] = ')'; // assume ')' is lexically after '(' + tLevel::const_iterator end = level.upper_bound(base); + for (tLevel::const_iterator it = begin; it != end; ++it) + list.push_back(it->second->getAsFunction()); + } + + // See if there is already a function in the table having the given non-function-style name. + bool hasFunctionName(const TString& name) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt != candidateName.npos && candidateName.compare(0, parenAt, name) == 0) + + return true; + } + + return false; + } + + // See if there is a variable at this level having the given non-function-style name. + // Return true if name is found, and set variable to true if the name was a variable. + bool findFunctionVariableName(const TString& name, bool& variable) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt == candidateName.npos) { + // not a mangled name + if (candidateName == name) { + // found a variable name match + variable = true; + return true; + } + } else { + // a mangled name + if (candidateName.compare(0, parenAt, name) == 0) { + // found a function name match + variable = false; + return true; + } + } + } + + return false; + } + + // Use this to do a lazy 'push' of precision defaults the first time + // a precision statement is seen in a new scope. Leave it at 0 for + // when no push was needed. Thus, it is not the current defaults, + // it is what to restore the defaults to when popping a level. + void setPreviousDefaultPrecisions(const TPrecisionQualifier *p) + { + // can call multiple times at one scope, will only latch on first call, + // as we're tracking the previous scope's values, not the current values + if (defaultPrecision != 0) + return; + + defaultPrecision = new TPrecisionQualifier[EbtNumTypes]; + for (int t = 0; t < EbtNumTypes; ++t) + defaultPrecision[t] = p[t]; + } + + void getPreviousDefaultPrecisions(TPrecisionQualifier *p) + { + // can be called for table level pops that didn't set the + // defaults + if (defaultPrecision == 0 || p == 0) + return; + + for (int t = 0; t < EbtNumTypes; ++t) + p[t] = defaultPrecision[t]; + } + + void relateToOperator(const char* name, TOperator op); + void setFunctionExtensions(const char* name, int num, const char* const extensions[]); +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + TSymbolTableLevel* clone() const; + void readOnly(); + + void setThisLevel() { thisLevel = true; } + bool isThisLevel() const { return thisLevel; } + +protected: + explicit TSymbolTableLevel(TSymbolTableLevel&); + TSymbolTableLevel& operator=(TSymbolTableLevel&); + + typedef std::map, pool_allocator > > tLevel; + typedef const tLevel::value_type tLevelPair; + typedef std::pair tInsertResult; + + tLevel level; // named mappings + TPrecisionQualifier *defaultPrecision; + // pair + TVector> retargetedSymbols; + int anonId; + bool thisLevel; // True if this level of the symbol table is a structure scope containing member function + // that are supposed to see anonymous access to member variables. +}; + +class TSymbolTable { +public: + TSymbolTable() : uniqueId(0), noBuiltInRedeclarations(false), separateNameSpaces(false), adoptedLevels(0) + { + // + // This symbol table cannot be used until push() is called. + // + } + ~TSymbolTable() + { + // this can be called explicitly; safest to code it so it can be called multiple times + + // don't deallocate levels passed in from elsewhere + while (table.size() > adoptedLevels) + pop(0); + } + + void adoptLevels(TSymbolTable& symTable) + { + for (unsigned int level = 0; level < symTable.table.size(); ++level) { + table.push_back(symTable.table[level]); + ++adoptedLevels; + } + uniqueId = symTable.uniqueId; + noBuiltInRedeclarations = symTable.noBuiltInRedeclarations; + separateNameSpaces = symTable.separateNameSpaces; + } + + // + // While level adopting is generic, the methods below enact a the following + // convention for levels: + // 0: common built-ins shared across all stages, all compiles, only one copy for all symbol tables + // 1: per-stage built-ins, shared across all compiles, but a different copy per stage + // 2: built-ins specific to a compile, like resources that are context-dependent, or redeclared built-ins + // 3: user-shader globals + // +protected: + static const uint32_t LevelFlagBitOffset = 56; + static const int globalLevel = 3; + static bool isSharedLevel(int level) { return level <= 1; } // exclude all per-compile levels + static bool isBuiltInLevel(int level) { return level <= 2; } // exclude user globals + static bool isGlobalLevel(int level) { return level <= globalLevel; } // include user globals +public: + bool isEmpty() { return table.size() == 0; } + bool atBuiltInLevel() { return isBuiltInLevel(currentLevel()); } + bool atGlobalLevel() { return isGlobalLevel(currentLevel()); } + static bool isBuiltInSymbol(long long uniqueId) { + int level = static_cast(uniqueId >> LevelFlagBitOffset); + return isBuiltInLevel(level); + } + static constexpr uint64_t uniqueIdMask = (1LL << LevelFlagBitOffset) - 1; + static const uint32_t MaxLevelInUniqueID = 127; + void setNoBuiltInRedeclarations() { noBuiltInRedeclarations = true; } + void setSeparateNameSpaces() { separateNameSpaces = true; } + + void push() + { + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + } + + // Make a new symbol-table level to represent the scope introduced by a structure + // containing member functions, such that the member functions can find anonymous + // references to member variables. + // + // 'thisSymbol' should have a name of "" to trigger anonymous structure-member + // symbol finds. + void pushThis(TSymbol& thisSymbol) + { + assert(thisSymbol.getName().size() == 0); + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + table.back()->setThisLevel(); + insert(thisSymbol); + } + + void pop(TPrecisionQualifier *p) + { + table[currentLevel()]->getPreviousDefaultPrecisions(p); + delete table.back(); + table.pop_back(); + updateUniqueIdLevelFlag(); + } + + // + // Insert a visible symbol into the symbol table so it can + // be found later by name. + // + // Returns false if the was a name collision. + // + bool insert(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + + // make sure there isn't a function of this variable name + if (! separateNameSpaces && ! symbol.getAsFunction() && table[currentLevel()]->hasFunctionName(symbol.getName())) + return false; + + // check for not overloading or redefining a built-in function + if (noBuiltInRedeclarations) { + if (atGlobalLevel() && currentLevel() > 0) { + if (table[0]->hasFunctionName(symbol.getName())) + return false; + if (currentLevel() > 1 && table[1]->hasFunctionName(symbol.getName())) + return false; + } + } + + return table[currentLevel()]->insert(symbol, separateNameSpaces); + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + return table[currentLevel()]->amend(symbol, firstNewMember); + } + + // Update the level info in symbol's unique ID to current level + void amendSymbolIdLevel(TSymbol& symbol) + { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uint64_t symbolId = symbol.getUniqueId(); + symbolId &= uniqueIdMask; + symbolId |= (level << LevelFlagBitOffset); + symbol.setUniqueId(symbolId); + } + // + // To allocate an internal temporary, which will need to be uniquely + // identified by the consumer of the AST, but never need to + // found by doing a symbol table search by name, hence allowed an + // arbitrary name in the symbol with no worry of collision. + // + void makeInternalVariable(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + } + + // + // Copy a variable or anonymous member's structure from a shared level so that + // it can be added (soon after return) to the symbol table where it can be + // modified without impacting other users of the shared table. + // + TSymbol* copyUpDeferredInsert(TSymbol* shared) + { + if (shared->getAsVariable()) { + TSymbol* copy = shared->clone(); + copy->setUniqueId(shared->getUniqueId()); + return copy; + } else { + const TAnonMember* anon = shared->getAsAnonMember(); + assert(anon); + TVariable* container = anon->getAnonContainer().clone(); + container->changeName(NewPoolTString("")); + container->setUniqueId(anon->getAnonContainer().getUniqueId()); + return container; + } + } + + TSymbol* copyUp(TSymbol* shared) + { + TSymbol* copy = copyUpDeferredInsert(shared); + table[globalLevel]->insert(*copy, separateNameSpaces); + if (shared->getAsVariable()) + return copy; + else { + // return the copy of the anonymous member + return table[globalLevel]->find(shared->getName()); + } + } + + // Normal find of a symbol, that can optionally say whether the symbol was found + // at a built-in level or the current top-scope level. + TSymbol* find(const TString& name, bool* builtIn = 0, bool* currentScope = 0, int* thisDepthP = 0) + { + int level = currentLevel(); + TSymbol* symbol; + int thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == nullptr && level >= 0); + level++; + if (builtIn) + *builtIn = isBuiltInLevel(level); + if (currentScope) + *currentScope = isGlobalLevel(currentLevel()) || level == currentLevel(); // consider shared levels as "current scope" WRT user globals + if (thisDepthP != nullptr) { + if (! table[level]->isThisLevel()) + thisDepth = 0; + *thisDepthP = thisDepth; + } + + return symbol; + } + + void retargetSymbol(const TString& from, const TString& to) { + int level = currentLevel(); + table[level]->retargetSymbol(from, to); + } + + + // Find of a symbol that returns how many layers deep of nested + // structures-with-member-functions ('this' scopes) deep the symbol was + // found in. + TSymbol* find(const TString& name, int& thisDepth) + { + int level = currentLevel(); + TSymbol* symbol; + thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == 0 && level >= 0); + + if (! table[level + 1]->isThisLevel()) + thisDepth = 0; + + return symbol; + } + + bool isFunctionNameVariable(const TString& name) const + { + if (separateNameSpaces) + return false; + + int level = currentLevel(); + do { + bool variable; + bool found = table[level]->findFunctionVariableName(name, variable); + if (found) + return variable; + --level; + } while (level >= 0); + + return false; + } + + void findFunctionNameList(const TString& name, TVector& list, bool& builtIn) + { + // For user levels, return the set found in the first scope with a match + builtIn = false; + int level = currentLevel(); + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (list.empty() && level >= globalLevel); + + if (! list.empty()) + return; + + // Gather across all built-in levels; they don't hide each other + builtIn = true; + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (level >= 0); + } + + void relateToOperator(const char* name, TOperator op) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->relateToOperator(name, op); + } + + void setFunctionExtensions(const char* name, int num, const char* const extensions[]) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->setFunctionExtensions(name, num, extensions); + } + + void setVariableExtensions(const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(name)); + if (symbol == nullptr) + return; + + symbol->setExtensions(numExts, extensions); + } + + void setVariableExtensions(const char* blockName, const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(blockName)); + if (symbol == nullptr) + return; + TVariable* variable = symbol->getAsVariable(); + assert(variable != nullptr); + + const TTypeList& structure = *variable->getAsVariable()->getType().getStruct(); + for (int member = 0; member < (int)structure.size(); ++member) { + if (structure[member].type->getFieldName().compare(name) == 0) { + variable->setMemberExtensions(member, numExts, extensions); + return; + } + } + } + + long long getMaxSymbolId() { return uniqueId; } +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + void copyTable(const TSymbolTable& copyOf); + + void setPreviousDefaultPrecisions(TPrecisionQualifier *p) { table[currentLevel()]->setPreviousDefaultPrecisions(p); } + + void readOnly() + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->readOnly(); + } + + // Add current level in the high-bits of unique id + void updateUniqueIdLevelFlag() { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uniqueId &= uniqueIdMask; + uniqueId |= (level << LevelFlagBitOffset); + } + + void overwriteUniqueId(long long id) + { + uniqueId = id; + updateUniqueIdLevelFlag(); + } + +protected: + TSymbolTable(TSymbolTable&); + TSymbolTable& operator=(TSymbolTableLevel&); + + int currentLevel() const { return static_cast(table.size()) - 1; } + std::vector table; + long long uniqueId; // for unique identification in code generation + bool noBuiltInRedeclarations; + bool separateNameSpaces; + unsigned int adoptedLevels; +}; + +} // end namespace glslang + +#endif // _SYMBOL_TABLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Versions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Versions.h new file mode 100644 index 0000000..f06abdd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/Versions.h @@ -0,0 +1,359 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _VERSIONS_INCLUDED_ +#define _VERSIONS_INCLUDED_ + +#define LAST_ELEMENT_MARKER(x) x + +// +// Help manage multiple profiles, versions, extensions etc. +// + +// +// Profiles are set up for masking operations, so queries can be done on multiple +// profiles at the same time. +// +// Don't maintain an ordinal set of enums (0,1,2,3...) to avoid all possible +// defects from mixing the two different forms. +// +typedef enum : unsigned { + EBadProfile = 0, + ENoProfile = (1 << 0), // only for desktop, before profiles showed up + ECoreProfile = (1 << 1), + ECompatibilityProfile = (1 << 2), + EEsProfile = (1 << 3), + LAST_ELEMENT_MARKER(EProfileCount), +} EProfile; + +namespace glslang { + +// +// Map from profile enum to externally readable text name. +// +inline const char* ProfileName(EProfile profile) +{ + switch (profile) { + case ENoProfile: return "none"; + case ECoreProfile: return "core"; + case ECompatibilityProfile: return "compatibility"; + case EEsProfile: return "es"; + default: return "unknown profile"; + } +} + +// +// What source rules, validation rules, target language, etc. are needed or +// desired for SPIR-V? +// +// 0 means a target or rule set is not enabled (ignore rules from that entity). +// Non-0 means to apply semantic rules arising from that version of its rule set. +// The union of all requested rule sets will be applied. +// +struct SpvVersion { + SpvVersion() : spv(0), vulkanGlsl(0), vulkan(0), openGl(0), vulkanRelaxed(false) {} + unsigned int spv; // the version of SPIR-V to target, as defined by "word 1" of the SPIR-V binary header + int vulkanGlsl; // the version of GLSL semantics for Vulkan, from GL_KHR_vulkan_glsl, for "#define VULKAN XXX" + int vulkan; // the version of Vulkan, for which SPIR-V execution environment rules to use + int openGl; // the version of GLSL semantics for OpenGL, from GL_ARB_gl_spirv, for "#define GL_SPIRV XXX" + bool vulkanRelaxed; // relax changes to GLSL for Vulkan, allowing some GL-specific to be compiled to Vulkan SPIR-V target +}; + +// +// The behaviors from the GLSL "#extension extension_name : behavior" +// +typedef enum { + EBhMissing = 0, + EBhRequire, + EBhEnable, + EBhWarn, + EBhDisable, + EBhDisablePartial // use as initial state of an extension that is only partially implemented +} TExtensionBehavior; + +// +// Symbolic names for extensions. Strings may be directly used when calling the +// functions, but better to have the compiler do spelling checks. +// +const char* const E_GL_OES_texture_3D = "GL_OES_texture_3D"; +const char* const E_GL_OES_standard_derivatives = "GL_OES_standard_derivatives"; +const char* const E_GL_EXT_frag_depth = "GL_EXT_frag_depth"; +const char* const E_GL_OES_EGL_image_external = "GL_OES_EGL_image_external"; +const char* const E_GL_OES_EGL_image_external_essl3 = "GL_OES_EGL_image_external_essl3"; +const char* const E_GL_EXT_YUV_target = "GL_EXT_YUV_target"; +const char* const E_GL_EXT_shader_texture_lod = "GL_EXT_shader_texture_lod"; +const char* const E_GL_EXT_shadow_samplers = "GL_EXT_shadow_samplers"; + +const char* const E_GL_ARB_texture_rectangle = "GL_ARB_texture_rectangle"; +const char* const E_GL_3DL_array_objects = "GL_3DL_array_objects"; +const char* const E_GL_ARB_shading_language_420pack = "GL_ARB_shading_language_420pack"; +const char* const E_GL_ARB_texture_gather = "GL_ARB_texture_gather"; +const char* const E_GL_ARB_gpu_shader5 = "GL_ARB_gpu_shader5"; +const char* const E_GL_ARB_separate_shader_objects = "GL_ARB_separate_shader_objects"; +const char* const E_GL_ARB_compute_shader = "GL_ARB_compute_shader"; +const char* const E_GL_ARB_tessellation_shader = "GL_ARB_tessellation_shader"; +const char* const E_GL_ARB_enhanced_layouts = "GL_ARB_enhanced_layouts"; +const char* const E_GL_ARB_texture_cube_map_array = "GL_ARB_texture_cube_map_array"; +const char* const E_GL_ARB_texture_multisample = "GL_ARB_texture_multisample"; +const char* const E_GL_ARB_shader_texture_lod = "GL_ARB_shader_texture_lod"; +const char* const E_GL_ARB_explicit_attrib_location = "GL_ARB_explicit_attrib_location"; +const char* const E_GL_ARB_explicit_uniform_location = "GL_ARB_explicit_uniform_location"; +const char* const E_GL_ARB_shader_image_load_store = "GL_ARB_shader_image_load_store"; +const char* const E_GL_ARB_shader_atomic_counters = "GL_ARB_shader_atomic_counters"; +const char* const E_GL_ARB_shader_atomic_counter_ops = "GL_ARB_shader_atomic_counter_ops"; +const char* const E_GL_ARB_shader_draw_parameters = "GL_ARB_shader_draw_parameters"; +const char* const E_GL_ARB_shader_group_vote = "GL_ARB_shader_group_vote"; +const char* const E_GL_ARB_derivative_control = "GL_ARB_derivative_control"; +const char* const E_GL_ARB_shader_texture_image_samples = "GL_ARB_shader_texture_image_samples"; +const char* const E_GL_ARB_viewport_array = "GL_ARB_viewport_array"; +const char* const E_GL_ARB_gpu_shader_int64 = "GL_ARB_gpu_shader_int64"; +const char* const E_GL_ARB_gpu_shader_fp64 = "GL_ARB_gpu_shader_fp64"; +const char* const E_GL_ARB_shader_ballot = "GL_ARB_shader_ballot"; +const char* const E_GL_ARB_sparse_texture2 = "GL_ARB_sparse_texture2"; +const char* const E_GL_ARB_sparse_texture_clamp = "GL_ARB_sparse_texture_clamp"; +const char* const E_GL_ARB_shader_stencil_export = "GL_ARB_shader_stencil_export"; +// const char* const E_GL_ARB_cull_distance = "GL_ARB_cull_distance"; // present for 4.5, but need extension control over block members +const char* const E_GL_ARB_post_depth_coverage = "GL_ARB_post_depth_coverage"; +const char* const E_GL_ARB_shader_viewport_layer_array = "GL_ARB_shader_viewport_layer_array"; +const char* const E_GL_ARB_fragment_shader_interlock = "GL_ARB_fragment_shader_interlock"; +const char* const E_GL_ARB_shader_clock = "GL_ARB_shader_clock"; +const char* const E_GL_ARB_uniform_buffer_object = "GL_ARB_uniform_buffer_object"; +const char* const E_GL_ARB_sample_shading = "GL_ARB_sample_shading"; +const char* const E_GL_ARB_shader_bit_encoding = "GL_ARB_shader_bit_encoding"; +const char* const E_GL_ARB_shader_image_size = "GL_ARB_shader_image_size"; +const char* const E_GL_ARB_shader_storage_buffer_object = "GL_ARB_shader_storage_buffer_object"; +const char* const E_GL_ARB_shading_language_packing = "GL_ARB_shading_language_packing"; +const char* const E_GL_ARB_texture_query_lod = "GL_ARB_texture_query_lod"; +const char* const E_GL_ARB_vertex_attrib_64bit = "GL_ARB_vertex_attrib_64bit"; +const char* const E_GL_ARB_draw_instanced = "GL_ARB_draw_instanced"; +const char* const E_GL_ARB_fragment_coord_conventions = "GL_ARB_fragment_coord_conventions"; + +const char* const E_GL_KHR_shader_subgroup_basic = "GL_KHR_shader_subgroup_basic"; +const char* const E_GL_KHR_shader_subgroup_vote = "GL_KHR_shader_subgroup_vote"; +const char* const E_GL_KHR_shader_subgroup_arithmetic = "GL_KHR_shader_subgroup_arithmetic"; +const char* const E_GL_KHR_shader_subgroup_ballot = "GL_KHR_shader_subgroup_ballot"; +const char* const E_GL_KHR_shader_subgroup_shuffle = "GL_KHR_shader_subgroup_shuffle"; +const char* const E_GL_KHR_shader_subgroup_shuffle_relative = "GL_KHR_shader_subgroup_shuffle_relative"; +const char* const E_GL_KHR_shader_subgroup_clustered = "GL_KHR_shader_subgroup_clustered"; +const char* const E_GL_KHR_shader_subgroup_quad = "GL_KHR_shader_subgroup_quad"; +const char* const E_GL_KHR_memory_scope_semantics = "GL_KHR_memory_scope_semantics"; + +const char* const E_GL_EXT_shader_atomic_int64 = "GL_EXT_shader_atomic_int64"; + +const char* const E_GL_EXT_shader_non_constant_global_initializers = "GL_EXT_shader_non_constant_global_initializers"; +const char* const E_GL_EXT_shader_image_load_formatted = "GL_EXT_shader_image_load_formatted"; + +const char* const E_GL_EXT_shader_16bit_storage = "GL_EXT_shader_16bit_storage"; +const char* const E_GL_EXT_shader_8bit_storage = "GL_EXT_shader_8bit_storage"; + + +// EXT extensions +const char* const E_GL_EXT_device_group = "GL_EXT_device_group"; +const char* const E_GL_EXT_multiview = "GL_EXT_multiview"; +const char* const E_GL_EXT_post_depth_coverage = "GL_EXT_post_depth_coverage"; +const char* const E_GL_EXT_control_flow_attributes = "GL_EXT_control_flow_attributes"; +const char* const E_GL_EXT_nonuniform_qualifier = "GL_EXT_nonuniform_qualifier"; +const char* const E_GL_EXT_samplerless_texture_functions = "GL_EXT_samplerless_texture_functions"; +const char* const E_GL_EXT_scalar_block_layout = "GL_EXT_scalar_block_layout"; +const char* const E_GL_EXT_fragment_invocation_density = "GL_EXT_fragment_invocation_density"; +const char* const E_GL_EXT_buffer_reference = "GL_EXT_buffer_reference"; +const char* const E_GL_EXT_buffer_reference2 = "GL_EXT_buffer_reference2"; +const char* const E_GL_EXT_buffer_reference_uvec2 = "GL_EXT_buffer_reference_uvec2"; +const char* const E_GL_EXT_demote_to_helper_invocation = "GL_EXT_demote_to_helper_invocation"; +const char* const E_GL_EXT_shader_realtime_clock = "GL_EXT_shader_realtime_clock"; +const char* const E_GL_EXT_debug_printf = "GL_EXT_debug_printf"; +const char* const E_GL_EXT_ray_tracing = "GL_EXT_ray_tracing"; +const char* const E_GL_EXT_ray_query = "GL_EXT_ray_query"; +const char* const E_GL_EXT_ray_flags_primitive_culling = "GL_EXT_ray_flags_primitive_culling"; +const char* const E_GL_EXT_ray_cull_mask = "GL_EXT_ray_cull_mask"; +const char* const E_GL_EXT_blend_func_extended = "GL_EXT_blend_func_extended"; +const char* const E_GL_EXT_shader_implicit_conversions = "GL_EXT_shader_implicit_conversions"; +const char* const E_GL_EXT_fragment_shading_rate = "GL_EXT_fragment_shading_rate"; +const char* const E_GL_EXT_shader_image_int64 = "GL_EXT_shader_image_int64"; +const char* const E_GL_EXT_null_initializer = "GL_EXT_null_initializer"; +const char* const E_GL_EXT_shared_memory_block = "GL_EXT_shared_memory_block"; +const char* const E_GL_EXT_subgroup_uniform_control_flow = "GL_EXT_subgroup_uniform_control_flow"; +const char* const E_GL_EXT_spirv_intrinsics = "GL_EXT_spirv_intrinsics"; +const char* const E_GL_EXT_fragment_shader_barycentric = "GL_EXT_fragment_shader_barycentric"; +const char* const E_GL_EXT_mesh_shader = "GL_EXT_mesh_shader"; +const char* const E_GL_EXT_opacity_micromap = "GL_EXT_opacity_micromap"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const post_depth_coverageEXTs[] = { E_GL_ARB_post_depth_coverage, E_GL_EXT_post_depth_coverage }; +const int Num_post_depth_coverageEXTs = sizeof(post_depth_coverageEXTs) / sizeof(post_depth_coverageEXTs[0]); + +// Array of extensions to cover both extensions providing ray tracing capabilities. +const char* const ray_tracing_EXTs[] = { E_GL_EXT_ray_query, E_GL_EXT_ray_tracing }; +const int Num_ray_tracing_EXTs = sizeof(ray_tracing_EXTs) / sizeof(ray_tracing_EXTs[0]); + +// OVR extensions +const char* const E_GL_OVR_multiview = "GL_OVR_multiview"; +const char* const E_GL_OVR_multiview2 = "GL_OVR_multiview2"; + +const char* const OVR_multiview_EXTs[] = { E_GL_OVR_multiview, E_GL_OVR_multiview2 }; +const int Num_OVR_multiview_EXTs = sizeof(OVR_multiview_EXTs) / sizeof(OVR_multiview_EXTs[0]); + +// #line and #include +const char* const E_GL_GOOGLE_cpp_style_line_directive = "GL_GOOGLE_cpp_style_line_directive"; +const char* const E_GL_GOOGLE_include_directive = "GL_GOOGLE_include_directive"; + +const char* const E_GL_AMD_shader_ballot = "GL_AMD_shader_ballot"; +const char* const E_GL_AMD_shader_trinary_minmax = "GL_AMD_shader_trinary_minmax"; +const char* const E_GL_AMD_shader_explicit_vertex_parameter = "GL_AMD_shader_explicit_vertex_parameter"; +const char* const E_GL_AMD_gcn_shader = "GL_AMD_gcn_shader"; +const char* const E_GL_AMD_gpu_shader_half_float = "GL_AMD_gpu_shader_half_float"; +const char* const E_GL_AMD_texture_gather_bias_lod = "GL_AMD_texture_gather_bias_lod"; +const char* const E_GL_AMD_gpu_shader_int16 = "GL_AMD_gpu_shader_int16"; +const char* const E_GL_AMD_shader_image_load_store_lod = "GL_AMD_shader_image_load_store_lod"; +const char* const E_GL_AMD_shader_fragment_mask = "GL_AMD_shader_fragment_mask"; +const char* const E_GL_AMD_gpu_shader_half_float_fetch = "GL_AMD_gpu_shader_half_float_fetch"; +const char* const E_GL_AMD_shader_early_and_late_fragment_tests = "GL_AMD_shader_early_and_late_fragment_tests"; + +const char* const E_GL_INTEL_shader_integer_functions2 = "GL_INTEL_shader_integer_functions2"; + +const char* const E_GL_NV_sample_mask_override_coverage = "GL_NV_sample_mask_override_coverage"; +const char* const E_SPV_NV_geometry_shader_passthrough = "GL_NV_geometry_shader_passthrough"; +const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2"; +const char* const E_GL_NV_stereo_view_rendering = "GL_NV_stereo_view_rendering"; +const char* const E_GL_NVX_multiview_per_view_attributes = "GL_NVX_multiview_per_view_attributes"; +const char* const E_GL_NV_shader_atomic_int64 = "GL_NV_shader_atomic_int64"; +const char* const E_GL_NV_conservative_raster_underestimation = "GL_NV_conservative_raster_underestimation"; +const char* const E_GL_NV_shader_noperspective_interpolation = "GL_NV_shader_noperspective_interpolation"; +const char* const E_GL_NV_shader_subgroup_partitioned = "GL_NV_shader_subgroup_partitioned"; +const char* const E_GL_NV_shading_rate_image = "GL_NV_shading_rate_image"; +const char* const E_GL_NV_ray_tracing = "GL_NV_ray_tracing"; +const char* const E_GL_NV_ray_tracing_motion_blur = "GL_NV_ray_tracing_motion_blur"; +const char* const E_GL_NV_fragment_shader_barycentric = "GL_NV_fragment_shader_barycentric"; +const char* const E_GL_NV_compute_shader_derivatives = "GL_NV_compute_shader_derivatives"; +const char* const E_GL_NV_shader_texture_footprint = "GL_NV_shader_texture_footprint"; +const char* const E_GL_NV_mesh_shader = "GL_NV_mesh_shader"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const viewportEXTs[] = { E_GL_ARB_shader_viewport_layer_array, E_GL_NV_viewport_array2 }; +const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]); + +const char* const E_GL_NV_cooperative_matrix = "GL_NV_cooperative_matrix"; +const char* const E_GL_NV_shader_sm_builtins = "GL_NV_shader_sm_builtins"; +const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer_cooperative_matrix"; + +// AEP +const char* const E_GL_ANDROID_extension_pack_es31a = "GL_ANDROID_extension_pack_es31a"; +const char* const E_GL_KHR_blend_equation_advanced = "GL_KHR_blend_equation_advanced"; +const char* const E_GL_OES_sample_variables = "GL_OES_sample_variables"; +const char* const E_GL_OES_shader_image_atomic = "GL_OES_shader_image_atomic"; +const char* const E_GL_OES_shader_multisample_interpolation = "GL_OES_shader_multisample_interpolation"; +const char* const E_GL_OES_texture_storage_multisample_2d_array = "GL_OES_texture_storage_multisample_2d_array"; +const char* const E_GL_EXT_geometry_shader = "GL_EXT_geometry_shader"; +const char* const E_GL_EXT_geometry_point_size = "GL_EXT_geometry_point_size"; +const char* const E_GL_EXT_gpu_shader5 = "GL_EXT_gpu_shader5"; +const char* const E_GL_EXT_primitive_bounding_box = "GL_EXT_primitive_bounding_box"; +const char* const E_GL_EXT_shader_io_blocks = "GL_EXT_shader_io_blocks"; +const char* const E_GL_EXT_tessellation_shader = "GL_EXT_tessellation_shader"; +const char* const E_GL_EXT_tessellation_point_size = "GL_EXT_tessellation_point_size"; +const char* const E_GL_EXT_texture_buffer = "GL_EXT_texture_buffer"; +const char* const E_GL_EXT_texture_cube_map_array = "GL_EXT_texture_cube_map_array"; +const char* const E_GL_EXT_shader_integer_mix = "GL_EXT_shader_integer_mix"; + +// OES matching AEP +const char* const E_GL_OES_geometry_shader = "GL_OES_geometry_shader"; +const char* const E_GL_OES_geometry_point_size = "GL_OES_geometry_point_size"; +const char* const E_GL_OES_gpu_shader5 = "GL_OES_gpu_shader5"; +const char* const E_GL_OES_primitive_bounding_box = "GL_OES_primitive_bounding_box"; +const char* const E_GL_OES_shader_io_blocks = "GL_OES_shader_io_blocks"; +const char* const E_GL_OES_tessellation_shader = "GL_OES_tessellation_shader"; +const char* const E_GL_OES_tessellation_point_size = "GL_OES_tessellation_point_size"; +const char* const E_GL_OES_texture_buffer = "GL_OES_texture_buffer"; +const char* const E_GL_OES_texture_cube_map_array = "GL_OES_texture_cube_map_array"; + +// EXT +const char* const E_GL_EXT_shader_explicit_arithmetic_types = "GL_EXT_shader_explicit_arithmetic_types"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int8 = "GL_EXT_shader_explicit_arithmetic_types_int8"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int16 = "GL_EXT_shader_explicit_arithmetic_types_int16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int32 = "GL_EXT_shader_explicit_arithmetic_types_int32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int64 = "GL_EXT_shader_explicit_arithmetic_types_int64"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float16 = "GL_EXT_shader_explicit_arithmetic_types_float16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float32 = "GL_EXT_shader_explicit_arithmetic_types_float32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float64 = "GL_EXT_shader_explicit_arithmetic_types_float64"; + +const char* const E_GL_EXT_shader_subgroup_extended_types_int8 = "GL_EXT_shader_subgroup_extended_types_int8"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int16 = "GL_EXT_shader_subgroup_extended_types_int16"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int64 = "GL_EXT_shader_subgroup_extended_types_int64"; +const char* const E_GL_EXT_shader_subgroup_extended_types_float16 = "GL_EXT_shader_subgroup_extended_types_float16"; +const char* const E_GL_EXT_terminate_invocation = "GL_EXT_terminate_invocation"; + +const char* const E_GL_EXT_shader_atomic_float = "GL_EXT_shader_atomic_float"; +const char* const E_GL_EXT_shader_atomic_float2 = "GL_EXT_shader_atomic_float2"; + +// Arrays of extensions for the above AEP duplications + +const char* const AEP_geometry_shader[] = { E_GL_EXT_geometry_shader, E_GL_OES_geometry_shader }; +const int Num_AEP_geometry_shader = sizeof(AEP_geometry_shader)/sizeof(AEP_geometry_shader[0]); + +const char* const AEP_geometry_point_size[] = { E_GL_EXT_geometry_point_size, E_GL_OES_geometry_point_size }; +const int Num_AEP_geometry_point_size = sizeof(AEP_geometry_point_size)/sizeof(AEP_geometry_point_size[0]); + +const char* const AEP_gpu_shader5[] = { E_GL_EXT_gpu_shader5, E_GL_OES_gpu_shader5 }; +const int Num_AEP_gpu_shader5 = sizeof(AEP_gpu_shader5)/sizeof(AEP_gpu_shader5[0]); + +const char* const AEP_primitive_bounding_box[] = { E_GL_EXT_primitive_bounding_box, E_GL_OES_primitive_bounding_box }; +const int Num_AEP_primitive_bounding_box = sizeof(AEP_primitive_bounding_box)/sizeof(AEP_primitive_bounding_box[0]); + +const char* const AEP_shader_io_blocks[] = { E_GL_EXT_shader_io_blocks, E_GL_OES_shader_io_blocks }; +const int Num_AEP_shader_io_blocks = sizeof(AEP_shader_io_blocks)/sizeof(AEP_shader_io_blocks[0]); + +const char* const AEP_tessellation_shader[] = { E_GL_EXT_tessellation_shader, E_GL_OES_tessellation_shader }; +const int Num_AEP_tessellation_shader = sizeof(AEP_tessellation_shader)/sizeof(AEP_tessellation_shader[0]); + +const char* const AEP_tessellation_point_size[] = { E_GL_EXT_tessellation_point_size, E_GL_OES_tessellation_point_size }; +const int Num_AEP_tessellation_point_size = sizeof(AEP_tessellation_point_size)/sizeof(AEP_tessellation_point_size[0]); + +const char* const AEP_texture_buffer[] = { E_GL_EXT_texture_buffer, E_GL_OES_texture_buffer }; +const int Num_AEP_texture_buffer = sizeof(AEP_texture_buffer)/sizeof(AEP_texture_buffer[0]); + +const char* const AEP_texture_cube_map_array[] = { E_GL_EXT_texture_cube_map_array, E_GL_OES_texture_cube_map_array }; +const int Num_AEP_texture_cube_map_array = sizeof(AEP_texture_cube_map_array)/sizeof(AEP_texture_cube_map_array[0]); + +const char* const AEP_mesh_shader[] = { E_GL_NV_mesh_shader, E_GL_EXT_mesh_shader }; +const int Num_AEP_mesh_shader = sizeof(AEP_mesh_shader)/sizeof(AEP_mesh_shader[0]); + +} // end namespace glslang + +#endif // _VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/attribute.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/attribute.h new file mode 100644 index 0000000..c5b2917 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/attribute.h @@ -0,0 +1,150 @@ +// +// Copyright (C) 2017 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _ATTRIBUTE_INCLUDED_ +#define _ATTRIBUTE_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + + enum TAttributeType { + EatNone, + EatAllow_uav_condition, + EatBranch, + EatCall, + EatDomain, + EatEarlyDepthStencil, + EatFastOpt, + EatFlatten, + EatForceCase, + EatInstance, + EatMaxTessFactor, + EatNumThreads, + EatMaxVertexCount, + EatOutputControlPoints, + EatOutputTopology, + EatPartitioning, + EatPatchConstantFunc, + EatPatchSize, + EatUnroll, + EatLoop, + EatBinding, + EatGlobalBinding, + EatLocation, + EatInputAttachment, + EatBuiltIn, + EatPushConstant, + EatConstantId, + EatDependencyInfinite, + EatDependencyLength, + EatMinIterations, + EatMaxIterations, + EatIterationMultiple, + EatPeelCount, + EatPartialCount, + EatFormatRgba32f, + EatFormatRgba16f, + EatFormatR32f, + EatFormatRgba8, + EatFormatRgba8Snorm, + EatFormatRg32f, + EatFormatRg16f, + EatFormatR11fG11fB10f, + EatFormatR16f, + EatFormatRgba16, + EatFormatRgb10A2, + EatFormatRg16, + EatFormatRg8, + EatFormatR16, + EatFormatR8, + EatFormatRgba16Snorm, + EatFormatRg16Snorm, + EatFormatRg8Snorm, + EatFormatR16Snorm, + EatFormatR8Snorm, + EatFormatRgba32i, + EatFormatRgba16i, + EatFormatRgba8i, + EatFormatR32i, + EatFormatRg32i, + EatFormatRg16i, + EatFormatRg8i, + EatFormatR16i, + EatFormatR8i, + EatFormatRgba32ui, + EatFormatRgba16ui, + EatFormatRgba8ui, + EatFormatR32ui, + EatFormatRgb10a2ui, + EatFormatRg32ui, + EatFormatRg16ui, + EatFormatRg8ui, + EatFormatR16ui, + EatFormatR8ui, + EatFormatUnknown, + EatNonWritable, + EatNonReadable, + EatSubgroupUniformControlFlow, + }; + + class TIntermAggregate; + + struct TAttributeArgs { + TAttributeType name; + const TIntermAggregate* args; + + // Obtain attribute as integer + // Return false if it cannot be obtained + bool getInt(int& value, int argNum = 0) const; + + // Obtain attribute as string, with optional to-lower transform + // Return false if it cannot be obtained + bool getString(TString& value, int argNum = 0, bool convertToLower = true) const; + + // How many arguments were provided to the attribute? + int size() const; + + protected: + const TConstUnion* getConstUnion(TBasicType basicType, int argNum) const; + }; + + typedef TList TAttributes; + +} // end namespace glslang + +#endif // _ATTRIBUTE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/gl_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/gl_types.h new file mode 100644 index 0000000..d6c9393 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/gl_types.h @@ -0,0 +1,218 @@ +/* +** Copyright (c) 2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#pragma once + +#define GL_FLOAT 0x1406 +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 + +#define GL_DOUBLE 0x140A +#define GL_DOUBLE_VEC2 0x8FFC +#define GL_DOUBLE_VEC3 0x8FFD +#define GL_DOUBLE_VEC4 0x8FFE + +#define GL_INT 0x1404 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 + +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 + +#define GL_INT64_ARB 0x140E +#define GL_INT64_VEC2_ARB 0x8FE9 +#define GL_INT64_VEC3_ARB 0x8FEA +#define GL_INT64_VEC4_ARB 0x8FEB + +#define GL_UNSIGNED_INT64_ARB 0x140F +#define GL_UNSIGNED_INT64_VEC2_ARB 0x8FF5 +#define GL_UNSIGNED_INT64_VEC3_ARB 0x8FF6 +#define GL_UNSIGNED_INT64_VEC4_ARB 0x8FF7 +#define GL_UNSIGNED_INT16_VEC2_NV 0x8FF1 +#define GL_UNSIGNED_INT16_VEC3_NV 0x8FF2 +#define GL_UNSIGNED_INT16_VEC4_NV 0x8FF3 + +#define GL_INT16_NV 0x8FE4 +#define GL_INT16_VEC2_NV 0x8FE5 +#define GL_INT16_VEC3_NV 0x8FE6 +#define GL_INT16_VEC4_NV 0x8FE7 + +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 + +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A + +#define GL_DOUBLE_MAT2 0x8F46 +#define GL_DOUBLE_MAT3 0x8F47 +#define GL_DOUBLE_MAT4 0x8F48 +#define GL_DOUBLE_MAT2x3 0x8F49 +#define GL_DOUBLE_MAT2x4 0x8F4A +#define GL_DOUBLE_MAT3x2 0x8F4B +#define GL_DOUBLE_MAT3x4 0x8F4C +#define GL_DOUBLE_MAT4x2 0x8F4D +#define GL_DOUBLE_MAT4x3 0x8F4E + +// Those constants are borrowed from extension NV_gpu_shader5 +#define GL_FLOAT16_NV 0x8FF8 +#define GL_FLOAT16_VEC2_NV 0x8FF9 +#define GL_FLOAT16_VEC3_NV 0x8FFA +#define GL_FLOAT16_VEC4_NV 0x8FFB + +#define GL_FLOAT16_MAT2_AMD 0x91C5 +#define GL_FLOAT16_MAT3_AMD 0x91C6 +#define GL_FLOAT16_MAT4_AMD 0x91C7 +#define GL_FLOAT16_MAT2x3_AMD 0x91C8 +#define GL_FLOAT16_MAT2x4_AMD 0x91C9 +#define GL_FLOAT16_MAT3x2_AMD 0x91CA +#define GL_FLOAT16_MAT3x4_AMD 0x91CB +#define GL_FLOAT16_MAT4x2_AMD 0x91CC +#define GL_FLOAT16_MAT4x3_AMD 0x91CD + +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D +#define GL_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW_ARB 0x900D + +#define GL_FLOAT16_SAMPLER_1D_AMD 0x91CE +#define GL_FLOAT16_SAMPLER_2D_AMD 0x91CF +#define GL_FLOAT16_SAMPLER_3D_AMD 0x91D0 +#define GL_FLOAT16_SAMPLER_CUBE_AMD 0x91D1 +#define GL_FLOAT16_SAMPLER_2D_RECT_AMD 0x91D2 +#define GL_FLOAT16_SAMPLER_1D_ARRAY_AMD 0x91D3 +#define GL_FLOAT16_SAMPLER_2D_ARRAY_AMD 0x91D4 +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_AMD 0x91D5 +#define GL_FLOAT16_SAMPLER_BUFFER_AMD 0x91D6 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_AMD 0x91D7 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_ARRAY_AMD 0x91D8 + +#define GL_FLOAT16_SAMPLER_1D_SHADOW_AMD 0x91D9 +#define GL_FLOAT16_SAMPLER_2D_SHADOW_AMD 0x91DA +#define GL_FLOAT16_SAMPLER_2D_RECT_SHADOW_AMD 0x91DB +#define GL_FLOAT16_SAMPLER_1D_ARRAY_SHADOW_AMD 0x91DC +#define GL_FLOAT16_SAMPLER_2D_ARRAY_SHADOW_AMD 0x91DD +#define GL_FLOAT16_SAMPLER_CUBE_SHADOW_AMD 0x91DE +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_SHADOW_AMD 0x91DF + +#define GL_FLOAT16_IMAGE_1D_AMD 0x91E0 +#define GL_FLOAT16_IMAGE_2D_AMD 0x91E1 +#define GL_FLOAT16_IMAGE_3D_AMD 0x91E2 +#define GL_FLOAT16_IMAGE_2D_RECT_AMD 0x91E3 +#define GL_FLOAT16_IMAGE_CUBE_AMD 0x91E4 +#define GL_FLOAT16_IMAGE_1D_ARRAY_AMD 0x91E5 +#define GL_FLOAT16_IMAGE_2D_ARRAY_AMD 0x91E6 +#define GL_FLOAT16_IMAGE_CUBE_MAP_ARRAY_AMD 0x91E7 +#define GL_FLOAT16_IMAGE_BUFFER_AMD 0x91E8 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_AMD 0x91E9 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_ARRAY_AMD 0x91EA + +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900E + +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900F +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A + +#define GL_IMAGE_1D 0x904C +#define GL_IMAGE_2D 0x904D +#define GL_IMAGE_3D 0x904E +#define GL_IMAGE_2D_RECT 0x904F +#define GL_IMAGE_CUBE 0x9050 +#define GL_IMAGE_BUFFER 0x9051 +#define GL_IMAGE_1D_ARRAY 0x9052 +#define GL_IMAGE_2D_ARRAY 0x9053 +#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054 +#define GL_IMAGE_2D_MULTISAMPLE 0x9055 +#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056 +#define GL_INT_IMAGE_1D 0x9057 +#define GL_INT_IMAGE_2D 0x9058 +#define GL_INT_IMAGE_3D 0x9059 +#define GL_INT_IMAGE_2D_RECT 0x905A +#define GL_INT_IMAGE_CUBE 0x905B +#define GL_INT_IMAGE_BUFFER 0x905C +#define GL_INT_IMAGE_1D_ARRAY 0x905D +#define GL_INT_IMAGE_2D_ARRAY 0x905E +#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F +#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060 +#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061 +#define GL_UNSIGNED_INT_IMAGE_1D 0x9062 +#define GL_UNSIGNED_INT_IMAGE_2D 0x9063 +#define GL_UNSIGNED_INT_IMAGE_3D 0x9064 +#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065 +#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066 +#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067 +#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068 +#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069 +#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C + +#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/glslang_tab.cpp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/glslang_tab.cpp.h new file mode 100644 index 0000000..18cef46 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/glslang_tab.cpp.h @@ -0,0 +1,571 @@ +/* A Bison parser, made by GNU Bison 3.7.4. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +#ifndef YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +# define YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 1 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token kinds. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + YYEMPTY = -2, + YYEOF = 0, /* "end of file" */ + YYerror = 256, /* error */ + YYUNDEF = 257, /* "invalid token" */ + CONST = 258, /* CONST */ + BOOL = 259, /* BOOL */ + INT = 260, /* INT */ + UINT = 261, /* UINT */ + FLOAT = 262, /* FLOAT */ + BVEC2 = 263, /* BVEC2 */ + BVEC3 = 264, /* BVEC3 */ + BVEC4 = 265, /* BVEC4 */ + IVEC2 = 266, /* IVEC2 */ + IVEC3 = 267, /* IVEC3 */ + IVEC4 = 268, /* IVEC4 */ + UVEC2 = 269, /* UVEC2 */ + UVEC3 = 270, /* UVEC3 */ + UVEC4 = 271, /* UVEC4 */ + VEC2 = 272, /* VEC2 */ + VEC3 = 273, /* VEC3 */ + VEC4 = 274, /* VEC4 */ + MAT2 = 275, /* MAT2 */ + MAT3 = 276, /* MAT3 */ + MAT4 = 277, /* MAT4 */ + MAT2X2 = 278, /* MAT2X2 */ + MAT2X3 = 279, /* MAT2X3 */ + MAT2X4 = 280, /* MAT2X4 */ + MAT3X2 = 281, /* MAT3X2 */ + MAT3X3 = 282, /* MAT3X3 */ + MAT3X4 = 283, /* MAT3X4 */ + MAT4X2 = 284, /* MAT4X2 */ + MAT4X3 = 285, /* MAT4X3 */ + MAT4X4 = 286, /* MAT4X4 */ + SAMPLER2D = 287, /* SAMPLER2D */ + SAMPLER3D = 288, /* SAMPLER3D */ + SAMPLERCUBE = 289, /* SAMPLERCUBE */ + SAMPLER2DSHADOW = 290, /* SAMPLER2DSHADOW */ + SAMPLERCUBESHADOW = 291, /* SAMPLERCUBESHADOW */ + SAMPLER2DARRAY = 292, /* SAMPLER2DARRAY */ + SAMPLER2DARRAYSHADOW = 293, /* SAMPLER2DARRAYSHADOW */ + ISAMPLER2D = 294, /* ISAMPLER2D */ + ISAMPLER3D = 295, /* ISAMPLER3D */ + ISAMPLERCUBE = 296, /* ISAMPLERCUBE */ + ISAMPLER2DARRAY = 297, /* ISAMPLER2DARRAY */ + USAMPLER2D = 298, /* USAMPLER2D */ + USAMPLER3D = 299, /* USAMPLER3D */ + USAMPLERCUBE = 300, /* USAMPLERCUBE */ + USAMPLER2DARRAY = 301, /* USAMPLER2DARRAY */ + SAMPLER = 302, /* SAMPLER */ + SAMPLERSHADOW = 303, /* SAMPLERSHADOW */ + TEXTURE2D = 304, /* TEXTURE2D */ + TEXTURE3D = 305, /* TEXTURE3D */ + TEXTURECUBE = 306, /* TEXTURECUBE */ + TEXTURE2DARRAY = 307, /* TEXTURE2DARRAY */ + ITEXTURE2D = 308, /* ITEXTURE2D */ + ITEXTURE3D = 309, /* ITEXTURE3D */ + ITEXTURECUBE = 310, /* ITEXTURECUBE */ + ITEXTURE2DARRAY = 311, /* ITEXTURE2DARRAY */ + UTEXTURE2D = 312, /* UTEXTURE2D */ + UTEXTURE3D = 313, /* UTEXTURE3D */ + UTEXTURECUBE = 314, /* UTEXTURECUBE */ + UTEXTURE2DARRAY = 315, /* UTEXTURE2DARRAY */ + ATTRIBUTE = 316, /* ATTRIBUTE */ + VARYING = 317, /* VARYING */ + FLOAT16_T = 318, /* FLOAT16_T */ + FLOAT32_T = 319, /* FLOAT32_T */ + DOUBLE = 320, /* DOUBLE */ + FLOAT64_T = 321, /* FLOAT64_T */ + INT64_T = 322, /* INT64_T */ + UINT64_T = 323, /* UINT64_T */ + INT32_T = 324, /* INT32_T */ + UINT32_T = 325, /* UINT32_T */ + INT16_T = 326, /* INT16_T */ + UINT16_T = 327, /* UINT16_T */ + INT8_T = 328, /* INT8_T */ + UINT8_T = 329, /* UINT8_T */ + I64VEC2 = 330, /* I64VEC2 */ + I64VEC3 = 331, /* I64VEC3 */ + I64VEC4 = 332, /* I64VEC4 */ + U64VEC2 = 333, /* U64VEC2 */ + U64VEC3 = 334, /* U64VEC3 */ + U64VEC4 = 335, /* U64VEC4 */ + I32VEC2 = 336, /* I32VEC2 */ + I32VEC3 = 337, /* I32VEC3 */ + I32VEC4 = 338, /* I32VEC4 */ + U32VEC2 = 339, /* U32VEC2 */ + U32VEC3 = 340, /* U32VEC3 */ + U32VEC4 = 341, /* U32VEC4 */ + I16VEC2 = 342, /* I16VEC2 */ + I16VEC3 = 343, /* I16VEC3 */ + I16VEC4 = 344, /* I16VEC4 */ + U16VEC2 = 345, /* U16VEC2 */ + U16VEC3 = 346, /* U16VEC3 */ + U16VEC4 = 347, /* U16VEC4 */ + I8VEC2 = 348, /* I8VEC2 */ + I8VEC3 = 349, /* I8VEC3 */ + I8VEC4 = 350, /* I8VEC4 */ + U8VEC2 = 351, /* U8VEC2 */ + U8VEC3 = 352, /* U8VEC3 */ + U8VEC4 = 353, /* U8VEC4 */ + DVEC2 = 354, /* DVEC2 */ + DVEC3 = 355, /* DVEC3 */ + DVEC4 = 356, /* DVEC4 */ + DMAT2 = 357, /* DMAT2 */ + DMAT3 = 358, /* DMAT3 */ + DMAT4 = 359, /* DMAT4 */ + F16VEC2 = 360, /* F16VEC2 */ + F16VEC3 = 361, /* F16VEC3 */ + F16VEC4 = 362, /* F16VEC4 */ + F16MAT2 = 363, /* F16MAT2 */ + F16MAT3 = 364, /* F16MAT3 */ + F16MAT4 = 365, /* F16MAT4 */ + F32VEC2 = 366, /* F32VEC2 */ + F32VEC3 = 367, /* F32VEC3 */ + F32VEC4 = 368, /* F32VEC4 */ + F32MAT2 = 369, /* F32MAT2 */ + F32MAT3 = 370, /* F32MAT3 */ + F32MAT4 = 371, /* F32MAT4 */ + F64VEC2 = 372, /* F64VEC2 */ + F64VEC3 = 373, /* F64VEC3 */ + F64VEC4 = 374, /* F64VEC4 */ + F64MAT2 = 375, /* F64MAT2 */ + F64MAT3 = 376, /* F64MAT3 */ + F64MAT4 = 377, /* F64MAT4 */ + DMAT2X2 = 378, /* DMAT2X2 */ + DMAT2X3 = 379, /* DMAT2X3 */ + DMAT2X4 = 380, /* DMAT2X4 */ + DMAT3X2 = 381, /* DMAT3X2 */ + DMAT3X3 = 382, /* DMAT3X3 */ + DMAT3X4 = 383, /* DMAT3X4 */ + DMAT4X2 = 384, /* DMAT4X2 */ + DMAT4X3 = 385, /* DMAT4X3 */ + DMAT4X4 = 386, /* DMAT4X4 */ + F16MAT2X2 = 387, /* F16MAT2X2 */ + F16MAT2X3 = 388, /* F16MAT2X3 */ + F16MAT2X4 = 389, /* F16MAT2X4 */ + F16MAT3X2 = 390, /* F16MAT3X2 */ + F16MAT3X3 = 391, /* F16MAT3X3 */ + F16MAT3X4 = 392, /* F16MAT3X4 */ + F16MAT4X2 = 393, /* F16MAT4X2 */ + F16MAT4X3 = 394, /* F16MAT4X3 */ + F16MAT4X4 = 395, /* F16MAT4X4 */ + F32MAT2X2 = 396, /* F32MAT2X2 */ + F32MAT2X3 = 397, /* F32MAT2X3 */ + F32MAT2X4 = 398, /* F32MAT2X4 */ + F32MAT3X2 = 399, /* F32MAT3X2 */ + F32MAT3X3 = 400, /* F32MAT3X3 */ + F32MAT3X4 = 401, /* F32MAT3X4 */ + F32MAT4X2 = 402, /* F32MAT4X2 */ + F32MAT4X3 = 403, /* F32MAT4X3 */ + F32MAT4X4 = 404, /* F32MAT4X4 */ + F64MAT2X2 = 405, /* F64MAT2X2 */ + F64MAT2X3 = 406, /* F64MAT2X3 */ + F64MAT2X4 = 407, /* F64MAT2X4 */ + F64MAT3X2 = 408, /* F64MAT3X2 */ + F64MAT3X3 = 409, /* F64MAT3X3 */ + F64MAT3X4 = 410, /* F64MAT3X4 */ + F64MAT4X2 = 411, /* F64MAT4X2 */ + F64MAT4X3 = 412, /* F64MAT4X3 */ + F64MAT4X4 = 413, /* F64MAT4X4 */ + ATOMIC_UINT = 414, /* ATOMIC_UINT */ + ACCSTRUCTNV = 415, /* ACCSTRUCTNV */ + ACCSTRUCTEXT = 416, /* ACCSTRUCTEXT */ + RAYQUERYEXT = 417, /* RAYQUERYEXT */ + FCOOPMATNV = 418, /* FCOOPMATNV */ + ICOOPMATNV = 419, /* ICOOPMATNV */ + UCOOPMATNV = 420, /* UCOOPMATNV */ + SAMPLERCUBEARRAY = 421, /* SAMPLERCUBEARRAY */ + SAMPLERCUBEARRAYSHADOW = 422, /* SAMPLERCUBEARRAYSHADOW */ + ISAMPLERCUBEARRAY = 423, /* ISAMPLERCUBEARRAY */ + USAMPLERCUBEARRAY = 424, /* USAMPLERCUBEARRAY */ + SAMPLER1D = 425, /* SAMPLER1D */ + SAMPLER1DARRAY = 426, /* SAMPLER1DARRAY */ + SAMPLER1DARRAYSHADOW = 427, /* SAMPLER1DARRAYSHADOW */ + ISAMPLER1D = 428, /* ISAMPLER1D */ + SAMPLER1DSHADOW = 429, /* SAMPLER1DSHADOW */ + SAMPLER2DRECT = 430, /* SAMPLER2DRECT */ + SAMPLER2DRECTSHADOW = 431, /* SAMPLER2DRECTSHADOW */ + ISAMPLER2DRECT = 432, /* ISAMPLER2DRECT */ + USAMPLER2DRECT = 433, /* USAMPLER2DRECT */ + SAMPLERBUFFER = 434, /* SAMPLERBUFFER */ + ISAMPLERBUFFER = 435, /* ISAMPLERBUFFER */ + USAMPLERBUFFER = 436, /* USAMPLERBUFFER */ + SAMPLER2DMS = 437, /* SAMPLER2DMS */ + ISAMPLER2DMS = 438, /* ISAMPLER2DMS */ + USAMPLER2DMS = 439, /* USAMPLER2DMS */ + SAMPLER2DMSARRAY = 440, /* SAMPLER2DMSARRAY */ + ISAMPLER2DMSARRAY = 441, /* ISAMPLER2DMSARRAY */ + USAMPLER2DMSARRAY = 442, /* USAMPLER2DMSARRAY */ + SAMPLEREXTERNALOES = 443, /* SAMPLEREXTERNALOES */ + SAMPLEREXTERNAL2DY2YEXT = 444, /* SAMPLEREXTERNAL2DY2YEXT */ + ISAMPLER1DARRAY = 445, /* ISAMPLER1DARRAY */ + USAMPLER1D = 446, /* USAMPLER1D */ + USAMPLER1DARRAY = 447, /* USAMPLER1DARRAY */ + F16SAMPLER1D = 448, /* F16SAMPLER1D */ + F16SAMPLER2D = 449, /* F16SAMPLER2D */ + F16SAMPLER3D = 450, /* F16SAMPLER3D */ + F16SAMPLER2DRECT = 451, /* F16SAMPLER2DRECT */ + F16SAMPLERCUBE = 452, /* F16SAMPLERCUBE */ + F16SAMPLER1DARRAY = 453, /* F16SAMPLER1DARRAY */ + F16SAMPLER2DARRAY = 454, /* F16SAMPLER2DARRAY */ + F16SAMPLERCUBEARRAY = 455, /* F16SAMPLERCUBEARRAY */ + F16SAMPLERBUFFER = 456, /* F16SAMPLERBUFFER */ + F16SAMPLER2DMS = 457, /* F16SAMPLER2DMS */ + F16SAMPLER2DMSARRAY = 458, /* F16SAMPLER2DMSARRAY */ + F16SAMPLER1DSHADOW = 459, /* F16SAMPLER1DSHADOW */ + F16SAMPLER2DSHADOW = 460, /* F16SAMPLER2DSHADOW */ + F16SAMPLER1DARRAYSHADOW = 461, /* F16SAMPLER1DARRAYSHADOW */ + F16SAMPLER2DARRAYSHADOW = 462, /* F16SAMPLER2DARRAYSHADOW */ + F16SAMPLER2DRECTSHADOW = 463, /* F16SAMPLER2DRECTSHADOW */ + F16SAMPLERCUBESHADOW = 464, /* F16SAMPLERCUBESHADOW */ + F16SAMPLERCUBEARRAYSHADOW = 465, /* F16SAMPLERCUBEARRAYSHADOW */ + IMAGE1D = 466, /* IMAGE1D */ + IIMAGE1D = 467, /* IIMAGE1D */ + UIMAGE1D = 468, /* UIMAGE1D */ + IMAGE2D = 469, /* IMAGE2D */ + IIMAGE2D = 470, /* IIMAGE2D */ + UIMAGE2D = 471, /* UIMAGE2D */ + IMAGE3D = 472, /* IMAGE3D */ + IIMAGE3D = 473, /* IIMAGE3D */ + UIMAGE3D = 474, /* UIMAGE3D */ + IMAGE2DRECT = 475, /* IMAGE2DRECT */ + IIMAGE2DRECT = 476, /* IIMAGE2DRECT */ + UIMAGE2DRECT = 477, /* UIMAGE2DRECT */ + IMAGECUBE = 478, /* IMAGECUBE */ + IIMAGECUBE = 479, /* IIMAGECUBE */ + UIMAGECUBE = 480, /* UIMAGECUBE */ + IMAGEBUFFER = 481, /* IMAGEBUFFER */ + IIMAGEBUFFER = 482, /* IIMAGEBUFFER */ + UIMAGEBUFFER = 483, /* UIMAGEBUFFER */ + IMAGE1DARRAY = 484, /* IMAGE1DARRAY */ + IIMAGE1DARRAY = 485, /* IIMAGE1DARRAY */ + UIMAGE1DARRAY = 486, /* UIMAGE1DARRAY */ + IMAGE2DARRAY = 487, /* IMAGE2DARRAY */ + IIMAGE2DARRAY = 488, /* IIMAGE2DARRAY */ + UIMAGE2DARRAY = 489, /* UIMAGE2DARRAY */ + IMAGECUBEARRAY = 490, /* IMAGECUBEARRAY */ + IIMAGECUBEARRAY = 491, /* IIMAGECUBEARRAY */ + UIMAGECUBEARRAY = 492, /* UIMAGECUBEARRAY */ + IMAGE2DMS = 493, /* IMAGE2DMS */ + IIMAGE2DMS = 494, /* IIMAGE2DMS */ + UIMAGE2DMS = 495, /* UIMAGE2DMS */ + IMAGE2DMSARRAY = 496, /* IMAGE2DMSARRAY */ + IIMAGE2DMSARRAY = 497, /* IIMAGE2DMSARRAY */ + UIMAGE2DMSARRAY = 498, /* UIMAGE2DMSARRAY */ + F16IMAGE1D = 499, /* F16IMAGE1D */ + F16IMAGE2D = 500, /* F16IMAGE2D */ + F16IMAGE3D = 501, /* F16IMAGE3D */ + F16IMAGE2DRECT = 502, /* F16IMAGE2DRECT */ + F16IMAGECUBE = 503, /* F16IMAGECUBE */ + F16IMAGE1DARRAY = 504, /* F16IMAGE1DARRAY */ + F16IMAGE2DARRAY = 505, /* F16IMAGE2DARRAY */ + F16IMAGECUBEARRAY = 506, /* F16IMAGECUBEARRAY */ + F16IMAGEBUFFER = 507, /* F16IMAGEBUFFER */ + F16IMAGE2DMS = 508, /* F16IMAGE2DMS */ + F16IMAGE2DMSARRAY = 509, /* F16IMAGE2DMSARRAY */ + I64IMAGE1D = 510, /* I64IMAGE1D */ + U64IMAGE1D = 511, /* U64IMAGE1D */ + I64IMAGE2D = 512, /* I64IMAGE2D */ + U64IMAGE2D = 513, /* U64IMAGE2D */ + I64IMAGE3D = 514, /* I64IMAGE3D */ + U64IMAGE3D = 515, /* U64IMAGE3D */ + I64IMAGE2DRECT = 516, /* I64IMAGE2DRECT */ + U64IMAGE2DRECT = 517, /* U64IMAGE2DRECT */ + I64IMAGECUBE = 518, /* I64IMAGECUBE */ + U64IMAGECUBE = 519, /* U64IMAGECUBE */ + I64IMAGEBUFFER = 520, /* I64IMAGEBUFFER */ + U64IMAGEBUFFER = 521, /* U64IMAGEBUFFER */ + I64IMAGE1DARRAY = 522, /* I64IMAGE1DARRAY */ + U64IMAGE1DARRAY = 523, /* U64IMAGE1DARRAY */ + I64IMAGE2DARRAY = 524, /* I64IMAGE2DARRAY */ + U64IMAGE2DARRAY = 525, /* U64IMAGE2DARRAY */ + I64IMAGECUBEARRAY = 526, /* I64IMAGECUBEARRAY */ + U64IMAGECUBEARRAY = 527, /* U64IMAGECUBEARRAY */ + I64IMAGE2DMS = 528, /* I64IMAGE2DMS */ + U64IMAGE2DMS = 529, /* U64IMAGE2DMS */ + I64IMAGE2DMSARRAY = 530, /* I64IMAGE2DMSARRAY */ + U64IMAGE2DMSARRAY = 531, /* U64IMAGE2DMSARRAY */ + TEXTURECUBEARRAY = 532, /* TEXTURECUBEARRAY */ + ITEXTURECUBEARRAY = 533, /* ITEXTURECUBEARRAY */ + UTEXTURECUBEARRAY = 534, /* UTEXTURECUBEARRAY */ + TEXTURE1D = 535, /* TEXTURE1D */ + ITEXTURE1D = 536, /* ITEXTURE1D */ + UTEXTURE1D = 537, /* UTEXTURE1D */ + TEXTURE1DARRAY = 538, /* TEXTURE1DARRAY */ + ITEXTURE1DARRAY = 539, /* ITEXTURE1DARRAY */ + UTEXTURE1DARRAY = 540, /* UTEXTURE1DARRAY */ + TEXTURE2DRECT = 541, /* TEXTURE2DRECT */ + ITEXTURE2DRECT = 542, /* ITEXTURE2DRECT */ + UTEXTURE2DRECT = 543, /* UTEXTURE2DRECT */ + TEXTUREBUFFER = 544, /* TEXTUREBUFFER */ + ITEXTUREBUFFER = 545, /* ITEXTUREBUFFER */ + UTEXTUREBUFFER = 546, /* UTEXTUREBUFFER */ + TEXTURE2DMS = 547, /* TEXTURE2DMS */ + ITEXTURE2DMS = 548, /* ITEXTURE2DMS */ + UTEXTURE2DMS = 549, /* UTEXTURE2DMS */ + TEXTURE2DMSARRAY = 550, /* TEXTURE2DMSARRAY */ + ITEXTURE2DMSARRAY = 551, /* ITEXTURE2DMSARRAY */ + UTEXTURE2DMSARRAY = 552, /* UTEXTURE2DMSARRAY */ + F16TEXTURE1D = 553, /* F16TEXTURE1D */ + F16TEXTURE2D = 554, /* F16TEXTURE2D */ + F16TEXTURE3D = 555, /* F16TEXTURE3D */ + F16TEXTURE2DRECT = 556, /* F16TEXTURE2DRECT */ + F16TEXTURECUBE = 557, /* F16TEXTURECUBE */ + F16TEXTURE1DARRAY = 558, /* F16TEXTURE1DARRAY */ + F16TEXTURE2DARRAY = 559, /* F16TEXTURE2DARRAY */ + F16TEXTURECUBEARRAY = 560, /* F16TEXTURECUBEARRAY */ + F16TEXTUREBUFFER = 561, /* F16TEXTUREBUFFER */ + F16TEXTURE2DMS = 562, /* F16TEXTURE2DMS */ + F16TEXTURE2DMSARRAY = 563, /* F16TEXTURE2DMSARRAY */ + SUBPASSINPUT = 564, /* SUBPASSINPUT */ + SUBPASSINPUTMS = 565, /* SUBPASSINPUTMS */ + ISUBPASSINPUT = 566, /* ISUBPASSINPUT */ + ISUBPASSINPUTMS = 567, /* ISUBPASSINPUTMS */ + USUBPASSINPUT = 568, /* USUBPASSINPUT */ + USUBPASSINPUTMS = 569, /* USUBPASSINPUTMS */ + F16SUBPASSINPUT = 570, /* F16SUBPASSINPUT */ + F16SUBPASSINPUTMS = 571, /* F16SUBPASSINPUTMS */ + SPIRV_INSTRUCTION = 572, /* SPIRV_INSTRUCTION */ + SPIRV_EXECUTION_MODE = 573, /* SPIRV_EXECUTION_MODE */ + SPIRV_EXECUTION_MODE_ID = 574, /* SPIRV_EXECUTION_MODE_ID */ + SPIRV_DECORATE = 575, /* SPIRV_DECORATE */ + SPIRV_DECORATE_ID = 576, /* SPIRV_DECORATE_ID */ + SPIRV_DECORATE_STRING = 577, /* SPIRV_DECORATE_STRING */ + SPIRV_TYPE = 578, /* SPIRV_TYPE */ + SPIRV_STORAGE_CLASS = 579, /* SPIRV_STORAGE_CLASS */ + SPIRV_BY_REFERENCE = 580, /* SPIRV_BY_REFERENCE */ + SPIRV_LITERAL = 581, /* SPIRV_LITERAL */ + LEFT_OP = 582, /* LEFT_OP */ + RIGHT_OP = 583, /* RIGHT_OP */ + INC_OP = 584, /* INC_OP */ + DEC_OP = 585, /* DEC_OP */ + LE_OP = 586, /* LE_OP */ + GE_OP = 587, /* GE_OP */ + EQ_OP = 588, /* EQ_OP */ + NE_OP = 589, /* NE_OP */ + AND_OP = 590, /* AND_OP */ + OR_OP = 591, /* OR_OP */ + XOR_OP = 592, /* XOR_OP */ + MUL_ASSIGN = 593, /* MUL_ASSIGN */ + DIV_ASSIGN = 594, /* DIV_ASSIGN */ + ADD_ASSIGN = 595, /* ADD_ASSIGN */ + MOD_ASSIGN = 596, /* MOD_ASSIGN */ + LEFT_ASSIGN = 597, /* LEFT_ASSIGN */ + RIGHT_ASSIGN = 598, /* RIGHT_ASSIGN */ + AND_ASSIGN = 599, /* AND_ASSIGN */ + XOR_ASSIGN = 600, /* XOR_ASSIGN */ + OR_ASSIGN = 601, /* OR_ASSIGN */ + SUB_ASSIGN = 602, /* SUB_ASSIGN */ + STRING_LITERAL = 603, /* STRING_LITERAL */ + LEFT_PAREN = 604, /* LEFT_PAREN */ + RIGHT_PAREN = 605, /* RIGHT_PAREN */ + LEFT_BRACKET = 606, /* LEFT_BRACKET */ + RIGHT_BRACKET = 607, /* RIGHT_BRACKET */ + LEFT_BRACE = 608, /* LEFT_BRACE */ + RIGHT_BRACE = 609, /* RIGHT_BRACE */ + DOT = 610, /* DOT */ + COMMA = 611, /* COMMA */ + COLON = 612, /* COLON */ + EQUAL = 613, /* EQUAL */ + SEMICOLON = 614, /* SEMICOLON */ + BANG = 615, /* BANG */ + DASH = 616, /* DASH */ + TILDE = 617, /* TILDE */ + PLUS = 618, /* PLUS */ + STAR = 619, /* STAR */ + SLASH = 620, /* SLASH */ + PERCENT = 621, /* PERCENT */ + LEFT_ANGLE = 622, /* LEFT_ANGLE */ + RIGHT_ANGLE = 623, /* RIGHT_ANGLE */ + VERTICAL_BAR = 624, /* VERTICAL_BAR */ + CARET = 625, /* CARET */ + AMPERSAND = 626, /* AMPERSAND */ + QUESTION = 627, /* QUESTION */ + INVARIANT = 628, /* INVARIANT */ + HIGH_PRECISION = 629, /* HIGH_PRECISION */ + MEDIUM_PRECISION = 630, /* MEDIUM_PRECISION */ + LOW_PRECISION = 631, /* LOW_PRECISION */ + PRECISION = 632, /* PRECISION */ + PACKED = 633, /* PACKED */ + RESOURCE = 634, /* RESOURCE */ + SUPERP = 635, /* SUPERP */ + FLOATCONSTANT = 636, /* FLOATCONSTANT */ + INTCONSTANT = 637, /* INTCONSTANT */ + UINTCONSTANT = 638, /* UINTCONSTANT */ + BOOLCONSTANT = 639, /* BOOLCONSTANT */ + IDENTIFIER = 640, /* IDENTIFIER */ + TYPE_NAME = 641, /* TYPE_NAME */ + CENTROID = 642, /* CENTROID */ + IN = 643, /* IN */ + OUT = 644, /* OUT */ + INOUT = 645, /* INOUT */ + STRUCT = 646, /* STRUCT */ + VOID = 647, /* VOID */ + WHILE = 648, /* WHILE */ + BREAK = 649, /* BREAK */ + CONTINUE = 650, /* CONTINUE */ + DO = 651, /* DO */ + ELSE = 652, /* ELSE */ + FOR = 653, /* FOR */ + IF = 654, /* IF */ + DISCARD = 655, /* DISCARD */ + RETURN = 656, /* RETURN */ + SWITCH = 657, /* SWITCH */ + CASE = 658, /* CASE */ + DEFAULT = 659, /* DEFAULT */ + TERMINATE_INVOCATION = 660, /* TERMINATE_INVOCATION */ + TERMINATE_RAY = 661, /* TERMINATE_RAY */ + IGNORE_INTERSECTION = 662, /* IGNORE_INTERSECTION */ + UNIFORM = 663, /* UNIFORM */ + SHARED = 664, /* SHARED */ + BUFFER = 665, /* BUFFER */ + FLAT = 666, /* FLAT */ + SMOOTH = 667, /* SMOOTH */ + LAYOUT = 668, /* LAYOUT */ + DOUBLECONSTANT = 669, /* DOUBLECONSTANT */ + INT16CONSTANT = 670, /* INT16CONSTANT */ + UINT16CONSTANT = 671, /* UINT16CONSTANT */ + FLOAT16CONSTANT = 672, /* FLOAT16CONSTANT */ + INT32CONSTANT = 673, /* INT32CONSTANT */ + UINT32CONSTANT = 674, /* UINT32CONSTANT */ + INT64CONSTANT = 675, /* INT64CONSTANT */ + UINT64CONSTANT = 676, /* UINT64CONSTANT */ + SUBROUTINE = 677, /* SUBROUTINE */ + DEMOTE = 678, /* DEMOTE */ + PAYLOADNV = 679, /* PAYLOADNV */ + PAYLOADINNV = 680, /* PAYLOADINNV */ + HITATTRNV = 681, /* HITATTRNV */ + CALLDATANV = 682, /* CALLDATANV */ + CALLDATAINNV = 683, /* CALLDATAINNV */ + PAYLOADEXT = 684, /* PAYLOADEXT */ + PAYLOADINEXT = 685, /* PAYLOADINEXT */ + HITATTREXT = 686, /* HITATTREXT */ + CALLDATAEXT = 687, /* CALLDATAEXT */ + CALLDATAINEXT = 688, /* CALLDATAINEXT */ + PATCH = 689, /* PATCH */ + SAMPLE = 690, /* SAMPLE */ + NONUNIFORM = 691, /* NONUNIFORM */ + COHERENT = 692, /* COHERENT */ + VOLATILE = 693, /* VOLATILE */ + RESTRICT = 694, /* RESTRICT */ + READONLY = 695, /* READONLY */ + WRITEONLY = 696, /* WRITEONLY */ + DEVICECOHERENT = 697, /* DEVICECOHERENT */ + QUEUEFAMILYCOHERENT = 698, /* QUEUEFAMILYCOHERENT */ + WORKGROUPCOHERENT = 699, /* WORKGROUPCOHERENT */ + SUBGROUPCOHERENT = 700, /* SUBGROUPCOHERENT */ + NONPRIVATE = 701, /* NONPRIVATE */ + SHADERCALLCOHERENT = 702, /* SHADERCALLCOHERENT */ + NOPERSPECTIVE = 703, /* NOPERSPECTIVE */ + EXPLICITINTERPAMD = 704, /* EXPLICITINTERPAMD */ + PERVERTEXEXT = 705, /* PERVERTEXEXT */ + PERVERTEXNV = 706, /* PERVERTEXNV */ + PERPRIMITIVENV = 707, /* PERPRIMITIVENV */ + PERVIEWNV = 708, /* PERVIEWNV */ + PERTASKNV = 709, /* PERTASKNV */ + PERPRIMITIVEEXT = 710, /* PERPRIMITIVEEXT */ + TASKPAYLOADWORKGROUPEXT = 711, /* TASKPAYLOADWORKGROUPEXT */ + PRECISE = 712 /* PRECISE */ + }; + typedef enum yytokentype yytoken_kind_t; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +union YYSTYPE +{ +#line 97 "MachineIndependent/glslang.y" + + struct { + glslang::TSourceLoc loc; + union { + glslang::TString *string; + int i; + unsigned int u; + long long i64; + unsigned long long u64; + bool b; + double d; + }; + glslang::TSymbol* symbol; + } lex; + struct { + glslang::TSourceLoc loc; + glslang::TOperator op; + union { + TIntermNode* intermNode; + glslang::TIntermNodePair nodePair; + glslang::TIntermTyped* intermTypedNode; + glslang::TAttributes* attributes; + glslang::TSpirvRequirement* spirvReq; + glslang::TSpirvInstruction* spirvInst; + glslang::TSpirvTypeParameters* spirvTypeParams; + }; + union { + glslang::TPublicType type; + glslang::TFunction* function; + glslang::TParameter param; + glslang::TTypeLoc typeLine; + glslang::TTypeList* typeList; + glslang::TArraySizes* arraySizes; + glslang::TIdentifierList* identifierList; + }; + glslang::TArraySizes* typeParameters; + } interm; + +#line 560 "MachineIndependent/glslang_tab.cpp.h" + +}; +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + +int yyparse (glslang::TParseContext* pParseContext); + +#endif /* !YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/iomapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/iomapper.h new file mode 100644 index 0000000..0003a74 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/iomapper.h @@ -0,0 +1,361 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _IOMAPPER_INCLUDED +#define _IOMAPPER_INCLUDED + +#include +#include "LiveTraverser.h" +#include +#include +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +class TInfoSink; + +namespace glslang { + +class TIntermediate; +struct TVarEntryInfo { + long long id; + TIntermSymbol* symbol; + bool live; + int newBinding; + int newSet; + int newLocation; + int newComponent; + int newIndex; + EShLanguage stage; + + void clearNewAssignments() { + newBinding = -1; + newSet = -1; + newLocation = -1; + newComponent = -1; + newIndex = -1; + } + + struct TOrderById { + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { return l.id < r.id; } + }; + + struct TOrderByPriority { + // ordering: + // 1) has both binding and set + // 2) has binding but no set + // 3) has no binding but set + // 4) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (lPoints == rPoints) + return l.id < r.id; + return lPoints > rPoints; + } + }; + + struct TOrderByPriorityAndLive { + // ordering: + // 1) do live variables first + // 2) has both binding and set + // 3) has binding but no set + // 4) has no binding but set + // 5) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (l.live != r.live) + return l.live > r.live; + + if (lPoints != rPoints) + return lPoints > rPoints; + + return l.id < r.id; + } + }; +}; + +// Base class for shared TIoMapResolver services, used by several derivations. +struct TDefaultIoResolverBase : public glslang::TIoMapResolver { +public: + TDefaultIoResolverBase(const TIntermediate& intermediate); + typedef std::vector TSlotSet; + typedef std::unordered_map TSlotSetMap; + + // grow the reflection stage by stage + void notifyBinding(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void notifyInOut(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void beginNotifications(EShLanguage) override {} + void endNotifications(EShLanguage) override {} + void beginResolve(EShLanguage) override {} + void endResolve(EShLanguage) override {} + void beginCollect(EShLanguage) override {} + void endCollect(EShLanguage) override {} + void reserverResourceSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + void reserverStorageSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + int getBaseBinding(EShLanguage stage, TResourceType res, unsigned int set) const; + const std::vector& getResourceSetBinding(EShLanguage stage) const; + virtual TResourceType getResourceType(const glslang::TType& type) = 0; + bool doAutoBindingMapping() const; + bool doAutoLocationMapping() const; + TSlotSet::iterator findSlot(int set, int slot); + bool checkEmpty(int set, int slot); + bool validateInOut(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + int reserveSlot(int set, int slot, int size = 1); + int getFreeSlot(int set, int base, int size = 1); + int resolveSet(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveInOutComponent(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutIndex(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void addStage(EShLanguage stage, TIntermediate& stageIntermediate) override { + if (stage < EShLangCount) { + stageMask[stage] = true; + stageIntermediates[stage] = &stageIntermediate; + } + } + uint32_t computeTypeLocationSize(const TType& type, EShLanguage stage); + + TSlotSetMap slots; + bool hasError = false; + +protected: + TDefaultIoResolverBase(TDefaultIoResolverBase&); + TDefaultIoResolverBase& operator=(TDefaultIoResolverBase&); + const TIntermediate& referenceIntermediate; + int nextUniformLocation; + int nextInputLocation; + int nextOutputLocation; + bool stageMask[EShLangCount + 1]; + const TIntermediate* stageIntermediates[EShLangCount]; + + // Return descriptor set specific base if there is one, and the generic base otherwise. + int selectBaseBinding(int base, int descriptorSetBase) const { + return descriptorSetBase != -1 ? descriptorSetBase : base; + } + + static int getLayoutSet(const glslang::TType& type) { + if (type.getQualifier().hasSet()) + return type.getQualifier().layoutSet; + else + return 0; + } + + static bool isSamplerType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isPureSampler(); + } + + static bool isTextureType(const glslang::TType& type) { + return (type.getBasicType() == glslang::EbtSampler && + (type.getSampler().isTexture() || type.getSampler().isSubpass())); + } + + static bool isUboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqUniform; + } + + static bool isImageType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage(); + } + + static bool isSsboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a SRV (shader resource view) type: + static bool isSrvType(const glslang::TType& type) { + return isTextureType(type) || type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a UAV (unordered access view) type: + static bool isUavType(const glslang::TType& type) { + if (type.getQualifier().isReadOnly()) + return false; + return (type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage()) || + (type.getQualifier().storage == EvqBuffer); + } +}; + +// Default I/O resolver for OpenGL +struct TDefaultGlslIoResolver : public TDefaultIoResolverBase { +public: + typedef std::map TVarSlotMap; // + typedef std::map TSlotMap; // + TDefaultGlslIoResolver(const TIntermediate& intermediate); + bool validateBinding(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + TResourceType getResourceType(const glslang::TType& type) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveBinding(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void beginResolve(EShLanguage /*stage*/) override; + void endResolve(EShLanguage stage) override; + void beginCollect(EShLanguage) override; + void endCollect(EShLanguage) override; + void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + // in/out symbol and uniform symbol are stored in the same resourceSlotMap, the storage key is used to identify each type of symbol. + // We use stage and storage qualifier to construct a storage key. it can help us identify the same storage resource used in different stage. + // if a resource is a program resource and we don't need know it usage stage, we can use same stage to build storage key. + // Note: both stage and type must less then 0xffff. + int buildStorageKey(EShLanguage stage, TStorageQualifier type) { + assert(static_cast(stage) <= 0x0000ffff && static_cast(type) <= 0x0000ffff); + return (stage << 16) | type; + } + +protected: + // Use for mark pre stage, to get more interface symbol information. + EShLanguage preStage; + // Use for mark current shader stage for resolver + EShLanguage currentStage; + // Slot map for storage resource(location of uniform and interface symbol) It's a program share slot + TSlotMap resourceSlotMap; + // Slot map for other resource(image, ubo, ssbo), It's a program share slot. + TSlotMap storageSlotMap; +}; + +typedef std::map TVarLiveMap; + +// override function "operator=", if a vector being sort, +// when use vc++, the sort function will call : +// pair& operator=(const pair<_Other1, _Other2>& _Right) +// { +// first = _Right.first; +// second = _Right.second; +// return (*this); +// } +// that will make a const type handing on left. +// override this function can avoid a compiler error. +// In the future, if the vc++ compiler can handle such a situation, +// this part of the code will be removed. +struct TVarLivePair : std::pair { + TVarLivePair(const std::pair& _Right) : pair(_Right.first, _Right.second) {} + TVarLivePair& operator=(const TVarLivePair& _Right) { + const_cast(first) = _Right.first; + second = _Right.second; + return (*this); + } + TVarLivePair(const TVarLivePair& src) : pair(src) { } +}; +typedef std::vector TVarLiveVector; + +// I/O mapper +class TIoMapper { +public: + TIoMapper() {} + virtual ~TIoMapper() {} + // grow the reflection stage by stage + bool virtual addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*); + bool virtual doMap(TIoMapResolver*, TInfoSink&) { return true; } +}; + +// I/O mapper for GLSL +class TGlslIoMapper : public TIoMapper { +public: + TGlslIoMapper() { + memset(inVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(outVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(uniformVarMap, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(intermediates, 0, sizeof(TIntermediate*) * (EShLangCount + 1)); + profile = ENoProfile; + version = 0; + autoPushConstantMaxSize = 128; + autoPushConstantBlockPacking = ElpStd430; + } + virtual ~TGlslIoMapper() { + for (size_t stage = 0; stage < EShLangCount; stage++) { + if (inVarMaps[stage] != nullptr) { + delete inVarMaps[stage]; + inVarMaps[stage] = nullptr; + } + if (outVarMaps[stage] != nullptr) { + delete outVarMaps[stage]; + outVarMaps[stage] = nullptr; + } + if (uniformVarMap[stage] != nullptr) { + delete uniformVarMap[stage]; + uniformVarMap[stage] = nullptr; + } + if (intermediates[stage] != nullptr) + intermediates[stage] = nullptr; + } + } + // If set, the uniform block with the given name will be changed to be backed by + // push_constant if it's size is <= maxSize + void setAutoPushConstantBlock(const char* name, unsigned int maxSize, TLayoutPacking packing) { + autoPushConstantBlockName = name; + autoPushConstantMaxSize = maxSize; + autoPushConstantBlockPacking = packing; + } + // grow the reflection stage by stage + bool addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*) override; + bool doMap(TIoMapResolver*, TInfoSink&) override; + TVarLiveMap *inVarMaps[EShLangCount], *outVarMaps[EShLangCount], + *uniformVarMap[EShLangCount]; + TIntermediate* intermediates[EShLangCount]; + bool hadError = false; + EProfile profile; + int version; + +private: + TString autoPushConstantBlockName; + unsigned int autoPushConstantMaxSize; + TLayoutPacking autoPushConstantBlockPacking; +}; + +} // end namespace glslang + +#endif // _IOMAPPER_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/localintermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/localintermediate.h new file mode 100644 index 0000000..1bb4e97 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/localintermediate.h @@ -0,0 +1,1222 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _LOCAL_INTERMEDIATE_INCLUDED_ +#define _LOCAL_INTERMEDIATE_INCLUDED_ + +#include "../Include/intermediate.h" +#include "../Public/ShaderLang.h" +#include "Versions.h" + +#include +#include +#include +#include +#include + +class TInfoSink; + +namespace glslang { + +struct TMatrixSelector { + int coord1; // stay agnostic about column/row; this is parse order + int coord2; +}; + +typedef int TVectorSelector; + +const int MaxSwizzleSelectors = 4; + +template +class TSwizzleSelectors { +public: + TSwizzleSelectors() : size_(0) { } + + void push_back(selectorType comp) + { + if (size_ < MaxSwizzleSelectors) + components[size_++] = comp; + } + void resize(int s) + { + assert(s <= size_); + size_ = s; + } + int size() const { return size_; } + selectorType operator[](int i) const + { + assert(i < MaxSwizzleSelectors); + return components[i]; + } + +private: + int size_; + selectorType components[MaxSwizzleSelectors]; +}; + +// +// Some helper structures for TIntermediate. Their contents are encapsulated +// by TIntermediate. +// + +// Used for call-graph algorithms for detecting recursion, missing bodies, and dead bodies. +// A "call" is a pair: . +// There can be duplicates. General assumption is the list is small. +struct TCall { + TCall(const TString& pCaller, const TString& pCallee) : caller(pCaller), callee(pCallee) { } + TString caller; + TString callee; + bool visited; + bool currentPath; + bool errorGiven; + int calleeBodyPosition; +}; + +// A generic 1-D range. +struct TRange { + TRange(int start, int last) : start(start), last(last) { } + bool overlap(const TRange& rhs) const + { + return last >= rhs.start && start <= rhs.last; + } + int start; + int last; +}; + +// An IO range is a 3-D rectangle; the set of (location, component, index) triples all lying +// within the same location range, component range, and index value. Locations don't alias unless +// all other dimensions of their range overlap. +struct TIoRange { + TIoRange(TRange location, TRange component, TBasicType basicType, int index) + : location(location), component(component), basicType(basicType), index(index) { } + bool overlap(const TIoRange& rhs) const + { + return location.overlap(rhs.location) && component.overlap(rhs.component) && index == rhs.index; + } + TRange location; + TRange component; + TBasicType basicType; + int index; +}; + +// An offset range is a 2-D rectangle; the set of (binding, offset) pairs all lying +// within the same binding and offset range. +struct TOffsetRange { + TOffsetRange(TRange binding, TRange offset) + : binding(binding), offset(offset) { } + bool overlap(const TOffsetRange& rhs) const + { + return binding.overlap(rhs.binding) && offset.overlap(rhs.offset); + } + TRange binding; + TRange offset; +}; + +#ifndef GLSLANG_WEB +// Things that need to be tracked per xfb buffer. +struct TXfbBuffer { + TXfbBuffer() : stride(TQualifier::layoutXfbStrideEnd), implicitStride(0), contains64BitType(false), + contains32BitType(false), contains16BitType(false) { } + std::vector ranges; // byte offsets that have already been assigned + unsigned int stride; + unsigned int implicitStride; + bool contains64BitType; + bool contains32BitType; + bool contains16BitType; +}; +#endif + +// Track a set of strings describing how the module was processed. +// This includes command line options, transforms, etc., ideally inclusive enough +// to reproduce the steps used to transform the input source to the output. +// E.g., see SPIR-V OpModuleProcessed. +// Each "process" or "transform" uses is expressed in the form: +// process arg0 arg1 arg2 ... +// process arg0 arg1 arg2 ... +// where everything is textual, and there can be zero or more arguments +class TProcesses { +public: + TProcesses() {} + ~TProcesses() {} + + void addProcess(const char* process) + { + processes.push_back(process); + } + void addProcess(const std::string& process) + { + processes.push_back(process); + } + void addArgument(int arg) + { + processes.back().append(" "); + std::string argString = std::to_string(arg); + processes.back().append(argString); + } + void addArgument(const char* arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addArgument(const std::string& arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addIfNonZero(const char* process, int value) + { + if (value != 0) { + addProcess(process); + addArgument(value); + } + } + + const std::vector& getProcesses() const { return processes; } + +private: + std::vector processes; +}; + +class TSymbolTable; +class TSymbol; +class TVariable; + +// +// Texture and Sampler transformation mode. +// +enum ComputeDerivativeMode { + LayoutDerivativeNone, // default layout as SPV_NV_compute_shader_derivatives not enabled + LayoutDerivativeGroupQuads, // derivative_group_quadsNV + LayoutDerivativeGroupLinear, // derivative_group_linearNV +}; + +class TIdMaps { +public: + TMap& operator[](long long i) { return maps[i]; } + const TMap& operator[](long long i) const { return maps[i]; } +private: + TMap maps[EsiCount]; +}; + +class TNumericFeatures { +public: + TNumericFeatures() : features(0) { } + TNumericFeatures(const TNumericFeatures&) = delete; + TNumericFeatures& operator=(const TNumericFeatures&) = delete; + typedef enum : unsigned int { + shader_explicit_arithmetic_types = 1 << 0, + shader_explicit_arithmetic_types_int8 = 1 << 1, + shader_explicit_arithmetic_types_int16 = 1 << 2, + shader_explicit_arithmetic_types_int32 = 1 << 3, + shader_explicit_arithmetic_types_int64 = 1 << 4, + shader_explicit_arithmetic_types_float16 = 1 << 5, + shader_explicit_arithmetic_types_float32 = 1 << 6, + shader_explicit_arithmetic_types_float64 = 1 << 7, + shader_implicit_conversions = 1 << 8, + gpu_shader_fp64 = 1 << 9, + gpu_shader_int16 = 1 << 10, + gpu_shader_half_float = 1 << 11, + } feature; + void insert(feature f) { features |= f; } + void erase(feature f) { features &= ~f; } + bool contains(feature f) const { return (features & f) != 0; } +private: + unsigned int features; +}; + +// MustBeAssigned wraps a T, asserting that it has been assigned with +// operator =() before attempting to read with operator T() or operator ->(). +// Used to catch cases where fields are read before they have been assigned. +template +class MustBeAssigned +{ +public: + MustBeAssigned() = default; + MustBeAssigned(const T& v) : value(v) {} + operator const T&() const { assert(isSet); return value; } + const T* operator ->() const { assert(isSet); return &value; } + MustBeAssigned& operator = (const T& v) { value = v; isSet = true; return *this; } +private: + T value; + bool isSet = false; +}; + +// +// Set of helper functions to help parse and build the tree. +// +class TIntermediate { +public: + explicit TIntermediate(EShLanguage l, int v = 0, EProfile p = ENoProfile) : + language(l), + profile(p), version(v), + treeRoot(0), + resources(TBuiltInResource{}), + numEntryPoints(0), numErrors(0), numPushConstants(0), recursive(false), + invertY(false), + dxPositionW(false), + enhancedMsgs(false), + debugInfo(false), + useStorageBuffer(false), + invariantAll(false), + nanMinMaxClamp(false), + depthReplacing(false), + stencilReplacing(false), + uniqueId(0), + globalUniformBlockName(""), + atomicCounterBlockName(""), + globalUniformBlockSet(TQualifier::layoutSetEnd), + globalUniformBlockBinding(TQualifier::layoutBindingEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) +#ifndef GLSLANG_WEB + , + implicitThisName("@this"), implicitCounterName("@count"), + source(EShSourceNone), + useVulkanMemoryModel(false), + invocations(TQualifier::layoutNotSet), vertices(TQualifier::layoutNotSet), + inputPrimitive(ElgNone), outputPrimitive(ElgNone), + pixelCenterInteger(false), originUpperLeft(false),texCoordBuiltinRedeclared(false), + vertexSpacing(EvsNone), vertexOrder(EvoNone), interlockOrdering(EioNone), pointMode(false), earlyFragmentTests(false), + postDepthCoverage(false), earlyAndLateFragmentTestsAMD(false), depthLayout(EldNone), stencilLayout(ElsNone), + hlslFunctionality1(false), + blendEquations(0), xfbMode(false), multiStream(false), + layoutOverrideCoverage(false), + geoPassthroughEXT(false), + numShaderRecordBlocks(0), + computeDerivativeMode(LayoutDerivativeNone), + primitives(TQualifier::layoutNotSet), + numTaskNVBlocks(0), + layoutPrimitiveCulling(false), + numTaskEXTPayloads(0), + autoMapBindings(false), + autoMapLocations(false), + flattenUniformArrays(false), + useUnknownFormat(false), + hlslOffsets(false), + hlslIoMapping(false), + useVariablePointers(false), + textureSamplerTransformMode(EShTexSampTransKeep), + needToLegalize(false), + binaryDoubleOutput(false), + subgroupUniformControlFlow(false), + usePhysicalStorageBuffer(false), + spirvRequirement(nullptr), + spirvExecutionMode(nullptr), + uniformLocationBase(0) +#endif + { + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + xfbBuffers.resize(TQualifier::layoutXfbBufferEnd); + shiftBinding.fill(0); +#endif + } + + void setVersion(int v) + { + version = v; + } + void setProfile(EProfile p) + { + profile = p; + } + + int getVersion() const { return version; } + EProfile getProfile() const { return profile; } + void setSpv(const SpvVersion& s) + { + spvVersion = s; + + // client processes + if (spvVersion.vulkan > 0) + processes.addProcess("client vulkan100"); + if (spvVersion.openGl > 0) + processes.addProcess("client opengl100"); + + // target SPV + switch (spvVersion.spv) { + case 0: + break; + case EShTargetSpv_1_0: + break; + case EShTargetSpv_1_1: + processes.addProcess("target-env spirv1.1"); + break; + case EShTargetSpv_1_2: + processes.addProcess("target-env spirv1.2"); + break; + case EShTargetSpv_1_3: + processes.addProcess("target-env spirv1.3"); + break; + case EShTargetSpv_1_4: + processes.addProcess("target-env spirv1.4"); + break; + case EShTargetSpv_1_5: + processes.addProcess("target-env spirv1.5"); + break; + case EShTargetSpv_1_6: + processes.addProcess("target-env spirv1.6"); + break; + default: + processes.addProcess("target-env spirvUnknown"); + break; + } + + // target-environment processes + switch (spvVersion.vulkan) { + case 0: + break; + case EShTargetVulkan_1_0: + processes.addProcess("target-env vulkan1.0"); + break; + case EShTargetVulkan_1_1: + processes.addProcess("target-env vulkan1.1"); + break; + case EShTargetVulkan_1_2: + processes.addProcess("target-env vulkan1.2"); + break; + case EShTargetVulkan_1_3: + processes.addProcess("target-env vulkan1.3"); + break; + default: + processes.addProcess("target-env vulkanUnknown"); + break; + } + if (spvVersion.openGl > 0) + processes.addProcess("target-env opengl"); + } + const SpvVersion& getSpv() const { return spvVersion; } + EShLanguage getStage() const { return language; } + void addRequestedExtension(const char* extension) { requestedExtensions.insert(extension); } + const std::set& getRequestedExtensions() const { return requestedExtensions; } + bool isRayTracingStage() const { + return language >= EShLangRayGen && language <= EShLangCallableNV; + } + + void setTreeRoot(TIntermNode* r) { treeRoot = r; } + TIntermNode* getTreeRoot() const { return treeRoot; } + void incrementEntryPointCount() { ++numEntryPoints; } + int getNumEntryPoints() const { return numEntryPoints; } + int getNumErrors() const { return numErrors; } + void addPushConstantCount() { ++numPushConstants; } + void setLimits(const TBuiltInResource& r) { resources = r; } + const TBuiltInResource& getLimits() const { return resources; } + + bool postProcess(TIntermNode*, EShLanguage); + void removeTree(); + + void setEntryPointName(const char* ep) + { + entryPointName = ep; + processes.addProcess("entry-point"); + processes.addArgument(entryPointName); + } + void setEntryPointMangledName(const char* ep) { entryPointMangledName = ep; } + const std::string& getEntryPointName() const { return entryPointName; } + const std::string& getEntryPointMangledName() const { return entryPointMangledName; } + + void setDebugInfo(bool debuginfo) + { + debugInfo = debuginfo; + } + bool getDebugInfo() const { return debugInfo; } + + void setInvertY(bool invert) + { + invertY = invert; + if (invertY) + processes.addProcess("invert-y"); + } + bool getInvertY() const { return invertY; } + + void setDxPositionW(bool dxPosW) + { + dxPositionW = dxPosW; + if (dxPositionW) + processes.addProcess("dx-position-w"); + } + bool getDxPositionW() const { return dxPositionW; } + + void setEnhancedMsgs() + { + enhancedMsgs = true; + } + bool getEnhancedMsgs() const { return enhancedMsgs && getSource() == EShSourceGlsl; } + +#ifdef ENABLE_HLSL + void setSource(EShSource s) { source = s; } + EShSource getSource() const { return source; } +#else + void setSource(EShSource s) { assert(s == EShSourceGlsl); (void)s; } + EShSource getSource() const { return EShSourceGlsl; } +#endif + + bool isRecursive() const { return recursive; } + + TIntermSymbol* addSymbol(const TVariable&); + TIntermSymbol* addSymbol(const TVariable&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TType&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TIntermSymbol&); + TIntermTyped* addConversion(TOperator, const TType&, TIntermTyped*); + std::tuple addPairConversion(TOperator op, TIntermTyped* node0, TIntermTyped* node1); + TIntermTyped* addUniShapeConversion(TOperator, const TType&, TIntermTyped*); + TIntermTyped* addConversion(TBasicType convertTo, TIntermTyped* node) const; + void addBiShapeConversion(TOperator, TIntermTyped*& lhsNode, TIntermTyped*& rhsNode); + TIntermTyped* addShapeConversion(const TType&, TIntermTyped*); + TIntermTyped* addBinaryMath(TOperator, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addAssign(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addIndex(TOperator op, TIntermTyped* base, TIntermTyped* index, const TSourceLoc&); + TIntermTyped* addUnaryMath(TOperator, TIntermTyped* child, const TSourceLoc&); + TIntermTyped* addBuiltInFunctionCall(const TSourceLoc& line, TOperator, bool unary, TIntermNode*, const TType& returnType); + bool canImplicitlyPromote(TBasicType from, TBasicType to, TOperator op = EOpNull) const; + bool isIntegralPromotion(TBasicType from, TBasicType to) const; + bool isFPPromotion(TBasicType from, TBasicType to) const; + bool isIntegralConversion(TBasicType from, TBasicType to) const; + bool isFPConversion(TBasicType from, TBasicType to) const; + bool isFPIntegralConversion(TBasicType from, TBasicType to) const; + TOperator mapTypeToConstructorOp(const TType&) const; + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right); + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right, const TSourceLoc&); + TIntermAggregate* makeAggregate(TIntermNode* node); + TIntermAggregate* makeAggregate(TIntermNode* node, const TSourceLoc&); + TIntermAggregate* makeAggregate(const TSourceLoc&); + TIntermTyped* setAggregateOperator(TIntermNode*, TOperator, const TType& type, const TSourceLoc&); + bool areAllChildConst(TIntermAggregate* aggrNode); + TIntermSelection* addSelection(TIntermTyped* cond, TIntermNodePair code, const TSourceLoc&); + TIntermTyped* addSelection(TIntermTyped* cond, TIntermTyped* trueBlock, TIntermTyped* falseBlock, const TSourceLoc&); + TIntermTyped* addComma(TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addMethod(TIntermTyped*, const TType&, const TString*, const TSourceLoc&); + TIntermConstantUnion* addConstantUnion(const TConstUnionArray&, const TType&, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(bool, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(double, TBasicType, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(const TString*, const TSourceLoc&, bool literal = false) const; + TIntermTyped* promoteConstantUnion(TBasicType, TIntermConstantUnion*) const; + bool parseConstTree(TIntermNode*, TConstUnionArray, TOperator, const TType&, bool singleConstantParam = false); + TIntermLoop* addLoop(TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, const TSourceLoc&); + TIntermAggregate* addForLoop(TIntermNode*, TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, + const TSourceLoc&, TIntermLoop*&); + TIntermBranch* addBranch(TOperator, const TSourceLoc&); + TIntermBranch* addBranch(TOperator, TIntermTyped*, const TSourceLoc&); + template TIntermTyped* addSwizzle(TSwizzleSelectors&, const TSourceLoc&); + + // Low level functions to add nodes (no conversions or other higher level transformations) + // If a type is provided, the node's type will be set to it. + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&) const; + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&, + const TType&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&, const TType&) const; + + // Constant folding (in Constant.cpp) + TIntermTyped* fold(TIntermAggregate* aggrNode); + TIntermTyped* foldConstructor(TIntermAggregate* aggrNode); + TIntermTyped* foldDereference(TIntermTyped* node, int index, const TSourceLoc&); + TIntermTyped* foldSwizzle(TIntermTyped* node, TSwizzleSelectors& fields, const TSourceLoc&); + + // Tree ops + static const TIntermTyped* findLValueBase(const TIntermTyped*, bool swizzleOkay , bool BufferReferenceOk = false); + + // Linkage related + void addSymbolLinkageNodes(TIntermAggregate*& linkage, EShLanguage, TSymbolTable&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, const TSymbol&); + TIntermAggregate* findLinkerObjects() const; + + void setGlobalUniformBlockName(const char* name) { globalUniformBlockName = std::string(name); } + const char* getGlobalUniformBlockName() const { return globalUniformBlockName.c_str(); } + void setGlobalUniformSet(unsigned int set) { globalUniformBlockSet = set; } + unsigned int getGlobalUniformSet() const { return globalUniformBlockSet; } + void setGlobalUniformBinding(unsigned int binding) { globalUniformBlockBinding = binding; } + unsigned int getGlobalUniformBinding() const { return globalUniformBlockBinding; } + + void setAtomicCounterBlockName(const char* name) { atomicCounterBlockName = std::string(name); } + const char* getAtomicCounterBlockName() const { return atomicCounterBlockName.c_str(); } + void setAtomicCounterBlockSet(unsigned int set) { atomicCounterBlockSet = set; } + unsigned int getAtomicCounterBlockSet() const { return atomicCounterBlockSet; } + + + void setUseStorageBuffer() { useStorageBuffer = true; } + bool usingStorageBuffer() const { return useStorageBuffer; } + void setInvariantAll() { invariantAll = true; } + bool isInvariantAll() const { return invariantAll; } + void setDepthReplacing() { depthReplacing = true; } + bool isDepthReplacing() const { return depthReplacing; } + void setStencilReplacing() { stencilReplacing = true; } + bool isStencilReplacing() const { return stencilReplacing; } + bool setLocalSize(int dim, int size) + { + if (localSizeNotDefault[dim]) + return size == localSize[dim]; + localSizeNotDefault[dim] = true; + localSize[dim] = size; + return true; + } + unsigned int getLocalSize(int dim) const { return localSize[dim]; } + bool isLocalSizeSet() const + { + // Return true if any component has been set (i.e. any component is not default). + return localSizeNotDefault[0] || localSizeNotDefault[1] || localSizeNotDefault[2]; + } + bool setLocalSizeSpecId(int dim, int id) + { + if (localSizeSpecId[dim] != TQualifier::layoutNotSet) + return id == localSizeSpecId[dim]; + localSizeSpecId[dim] = id; + return true; + } + int getLocalSizeSpecId(int dim) const { return localSizeSpecId[dim]; } + bool isLocalSizeSpecialized() const + { + // Return true if any component has been specialized. + return localSizeSpecId[0] != TQualifier::layoutNotSet || + localSizeSpecId[1] != TQualifier::layoutNotSet || + localSizeSpecId[2] != TQualifier::layoutNotSet; + } +#ifdef GLSLANG_WEB + void output(TInfoSink&, bool tree) { } + + bool isEsProfile() const { return false; } + bool getXfbMode() const { return false; } + bool isMultiStream() const { return false; } + TLayoutGeometry getOutputPrimitive() const { return ElgNone; } + bool getPostDepthCoverage() const { return false; } + bool getEarlyFragmentTests() const { return false; } + TLayoutDepth getDepth() const { return EldNone; } + bool getPixelCenterInteger() const { return false; } + void setOriginUpperLeft() { } + bool getOriginUpperLeft() const { return true; } + TInterlockOrdering getInterlockOrdering() const { return EioNone; } + + bool getAutoMapBindings() const { return false; } + bool getAutoMapLocations() const { return false; } + int getNumPushConstants() const { return 0; } + void addShaderRecordCount() { } + void addTaskNVCount() { } + void addTaskPayloadEXTCount() { } + void setUseVulkanMemoryModel() { } + bool usingVulkanMemoryModel() const { return false; } + bool usingPhysicalStorageBuffer() const { return false; } + bool usingVariablePointers() const { return false; } + unsigned getXfbStride(int buffer) const { return 0; } + bool hasLayoutDerivativeModeNone() const { return false; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return LayoutDerivativeNone; } +#else + void output(TInfoSink&, bool tree); + + bool isEsProfile() const { return profile == EEsProfile; } + + void setShiftBinding(TResourceType res, unsigned int shift) + { + shiftBinding[res] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) + processes.addIfNonZero(name, shift); + } + + unsigned int getShiftBinding(TResourceType res) const { return shiftBinding[res]; } + + void setShiftBindingForSet(TResourceType res, unsigned int shift, unsigned int set) + { + if (shift == 0) // ignore if there's no shift: it's a no-op. + return; + + shiftBindingForSet[res][set] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) { + processes.addProcess(name); + processes.addArgument(shift); + processes.addArgument(set); + } + } + + int getShiftBindingForSet(TResourceType res, unsigned int set) const + { + const auto shift = shiftBindingForSet[res].find(set); + return shift == shiftBindingForSet[res].end() ? -1 : shift->second; + } + bool hasShiftBindingForSet(TResourceType res) const { return !shiftBindingForSet[res].empty(); } + + void setResourceSetBinding(const std::vector& shift) + { + resourceSetBinding = shift; + if (shift.size() > 0) { + processes.addProcess("resource-set-binding"); + for (int s = 0; s < (int)shift.size(); ++s) + processes.addArgument(shift[s]); + } + } + const std::vector& getResourceSetBinding() const { return resourceSetBinding; } + void setAutoMapBindings(bool map) + { + autoMapBindings = map; + if (autoMapBindings) + processes.addProcess("auto-map-bindings"); + } + bool getAutoMapBindings() const { return autoMapBindings; } + void setAutoMapLocations(bool map) + { + autoMapLocations = map; + if (autoMapLocations) + processes.addProcess("auto-map-locations"); + } + bool getAutoMapLocations() const { return autoMapLocations; } + +#ifdef ENABLE_HLSL + void setFlattenUniformArrays(bool flatten) + { + flattenUniformArrays = flatten; + if (flattenUniformArrays) + processes.addProcess("flatten-uniform-arrays"); + } + bool getFlattenUniformArrays() const { return flattenUniformArrays; } +#endif + void setNoStorageFormat(bool b) + { + useUnknownFormat = b; + if (useUnknownFormat) + processes.addProcess("no-storage-format"); + } + bool getNoStorageFormat() const { return useUnknownFormat; } + void setUseVulkanMemoryModel() + { + useVulkanMemoryModel = true; + processes.addProcess("use-vulkan-memory-model"); + } + bool usingVulkanMemoryModel() const { return useVulkanMemoryModel; } + void setUsePhysicalStorageBuffer() + { + usePhysicalStorageBuffer = true; + } + bool usingPhysicalStorageBuffer() const { return usePhysicalStorageBuffer; } + void setUseVariablePointers() + { + useVariablePointers = true; + processes.addProcess("use-variable-pointers"); + } + bool usingVariablePointers() const { return useVariablePointers; } + +#ifdef ENABLE_HLSL + template T addCounterBufferName(const T& name) const { return name + implicitCounterName; } + bool hasCounterBufferName(const TString& name) const { + size_t len = strlen(implicitCounterName); + return name.size() > len && + name.compare(name.size() - len, len, implicitCounterName) == 0; + } +#endif + + void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode) { textureSamplerTransformMode = mode; } + int getNumPushConstants() const { return numPushConstants; } + void addShaderRecordCount() { ++numShaderRecordBlocks; } + void addTaskNVCount() { ++numTaskNVBlocks; } + void addTaskPayloadEXTCount() { ++numTaskEXTPayloads; } + + bool setInvocations(int i) + { + if (invocations != TQualifier::layoutNotSet) + return invocations == i; + invocations = i; + return true; + } + int getInvocations() const { return invocations; } + bool setVertices(int m) + { + if (vertices != TQualifier::layoutNotSet) + return vertices == m; + vertices = m; + return true; + } + int getVertices() const { return vertices; } + bool setInputPrimitive(TLayoutGeometry p) + { + if (inputPrimitive != ElgNone) + return inputPrimitive == p; + inputPrimitive = p; + return true; + } + TLayoutGeometry getInputPrimitive() const { return inputPrimitive; } + bool setVertexSpacing(TVertexSpacing s) + { + if (vertexSpacing != EvsNone) + return vertexSpacing == s; + vertexSpacing = s; + return true; + } + TVertexSpacing getVertexSpacing() const { return vertexSpacing; } + bool setVertexOrder(TVertexOrder o) + { + if (vertexOrder != EvoNone) + return vertexOrder == o; + vertexOrder = o; + return true; + } + TVertexOrder getVertexOrder() const { return vertexOrder; } + void setPointMode() { pointMode = true; } + bool getPointMode() const { return pointMode; } + + bool setInterlockOrdering(TInterlockOrdering o) + { + if (interlockOrdering != EioNone) + return interlockOrdering == o; + interlockOrdering = o; + return true; + } + TInterlockOrdering getInterlockOrdering() const { return interlockOrdering; } + + void setXfbMode() { xfbMode = true; } + bool getXfbMode() const { return xfbMode; } + void setMultiStream() { multiStream = true; } + bool isMultiStream() const { return multiStream; } + bool setOutputPrimitive(TLayoutGeometry p) + { + if (outputPrimitive != ElgNone) + return outputPrimitive == p; + outputPrimitive = p; + return true; + } + TLayoutGeometry getOutputPrimitive() const { return outputPrimitive; } + void setPostDepthCoverage() { postDepthCoverage = true; } + bool getPostDepthCoverage() const { return postDepthCoverage; } + void setEarlyFragmentTests() { earlyFragmentTests = true; } + void setEarlyAndLateFragmentTestsAMD() { earlyAndLateFragmentTestsAMD = true; } + bool getEarlyFragmentTests() const { return earlyFragmentTests; } + bool getEarlyAndLateFragmentTestsAMD() const { return earlyAndLateFragmentTestsAMD; } + bool setDepth(TLayoutDepth d) + { + if (depthLayout != EldNone) + return depthLayout == d; + depthLayout = d; + return true; + } + bool setStencil(TLayoutStencil s) + { + if (stencilLayout != ElsNone) + return stencilLayout == s; + stencilLayout = s; + return true; + } + TLayoutDepth getDepth() const { return depthLayout; } + TLayoutStencil getStencil() const { return stencilLayout; } + void setOriginUpperLeft() { originUpperLeft = true; } + bool getOriginUpperLeft() const { return originUpperLeft; } + void setPixelCenterInteger() { pixelCenterInteger = true; } + bool getPixelCenterInteger() const { return pixelCenterInteger; } + void setTexCoordRedeclared() { texCoordBuiltinRedeclared = true; } + bool getTexCoordRedeclared() const { return texCoordBuiltinRedeclared; } + void addBlendEquation(TBlendEquationShift b) { blendEquations |= (1 << b); } + unsigned int getBlendEquations() const { return blendEquations; } + bool setXfbBufferStride(int buffer, unsigned stride) + { + if (xfbBuffers[buffer].stride != TQualifier::layoutXfbStrideEnd) + return xfbBuffers[buffer].stride == stride; + xfbBuffers[buffer].stride = stride; + return true; + } + unsigned getXfbStride(int buffer) const { return xfbBuffers[buffer].stride; } + int addXfbBufferOffset(const TType&); + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType, bool& contains32BitType, bool& contains16BitType) const; + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType) const; + void setLayoutOverrideCoverage() { layoutOverrideCoverage = true; } + bool getLayoutOverrideCoverage() const { return layoutOverrideCoverage; } + void setGeoPassthroughEXT() { geoPassthroughEXT = true; } + bool getGeoPassthroughEXT() const { return geoPassthroughEXT; } + void setLayoutDerivativeMode(ComputeDerivativeMode mode) { computeDerivativeMode = mode; } + bool hasLayoutDerivativeModeNone() const { return computeDerivativeMode != LayoutDerivativeNone; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return computeDerivativeMode; } + void setLayoutPrimitiveCulling() { layoutPrimitiveCulling = true; } + bool getLayoutPrimitiveCulling() const { return layoutPrimitiveCulling; } + bool setPrimitives(int m) + { + if (primitives != TQualifier::layoutNotSet) + return primitives == m; + primitives = m; + return true; + } + int getPrimitives() const { return primitives; } + const char* addSemanticName(const TString& name) + { + return semanticNameSet.insert(name).first->c_str(); + } + void addUniformLocationOverride(const char* nameStr, int location) + { + std::string name = nameStr; + uniformLocationOverrides[name] = location; + } + + int getUniformLocationOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = uniformLocationOverrides.find(name); + if (pos == uniformLocationOverrides.end()) + return -1; + else + return pos->second; + } + + void setUniformLocationBase(int base) { uniformLocationBase = base; } + int getUniformLocationBase() const { return uniformLocationBase; } + + void setNeedsLegalization() { needToLegalize = true; } + bool needsLegalization() const { return needToLegalize; } + + void setBinaryDoubleOutput() { binaryDoubleOutput = true; } + bool getBinaryDoubleOutput() { return binaryDoubleOutput; } + + void setSubgroupUniformControlFlow() { subgroupUniformControlFlow = true; } + bool getSubgroupUniformControlFlow() const { return subgroupUniformControlFlow; } + + // GL_EXT_spirv_intrinsics + void insertSpirvRequirement(const TSpirvRequirement* spirvReq); + bool hasSpirvRequirement() const { return spirvRequirement != nullptr; } + const TSpirvRequirement& getSpirvRequirement() const { return *spirvRequirement; } + void insertSpirvExecutionMode(int executionMode, const TIntermAggregate* args = nullptr); + void insertSpirvExecutionModeId(int executionMode, const TIntermAggregate* args); + bool hasSpirvExecutionMode() const { return spirvExecutionMode != nullptr; } + const TSpirvExecutionMode& getSpirvExecutionMode() const { return *spirvExecutionMode; } +#endif // GLSLANG_WEB + + void addBlockStorageOverride(const char* nameStr, TBlockStorageClass backing) + { + std::string name(nameStr); + blockBackingOverrides[name] = backing; + } + TBlockStorageClass getBlockStorageOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = blockBackingOverrides.find(name); + if (pos == blockBackingOverrides.end()) + return EbsNone; + else + return pos->second; + } +#ifdef ENABLE_HLSL + void setHlslFunctionality1() { hlslFunctionality1 = true; } + bool getHlslFunctionality1() const { return hlslFunctionality1; } + void setHlslOffsets() + { + hlslOffsets = true; + if (hlslOffsets) + processes.addProcess("hlsl-offsets"); + } + bool usingHlslOffsets() const { return hlslOffsets; } + void setHlslIoMapping(bool b) + { + hlslIoMapping = b; + if (hlslIoMapping) + processes.addProcess("hlsl-iomap"); + } + bool usingHlslIoMapping() { return hlslIoMapping; } +#else + bool getHlslFunctionality1() const { return false; } + bool usingHlslOffsets() const { return false; } + bool usingHlslIoMapping() { return false; } +#endif + + bool usingScalarBlockLayout() const { + for (auto extIt = requestedExtensions.begin(); extIt != requestedExtensions.end(); ++extIt) { + if (*extIt == E_GL_EXT_scalar_block_layout) + return true; + } + return false; + } + + bool IsRequestedExtension(const char* extension) const + { + return (requestedExtensions.find(extension) != requestedExtensions.end()); + } + + void addToCallGraph(TInfoSink&, const TString& caller, const TString& callee); + void merge(TInfoSink&, TIntermediate&); + void finalCheck(TInfoSink&, bool keepUncalled); + + void mergeGlobalUniformBlocks(TInfoSink& infoSink, TIntermediate& unit, bool mergeExistingOnly); + void mergeUniformObjects(TInfoSink& infoSink, TIntermediate& unit); + void checkStageIO(TInfoSink&, TIntermediate&); + + bool buildConvertOp(TBasicType dst, TBasicType src, TOperator& convertOp) const; + TIntermTyped* createConversion(TBasicType convertTo, TIntermTyped* node) const; + + void addIoAccessed(const TString& name) { ioAccessed.insert(name); } + bool inIoAccessed(const TString& name) const { return ioAccessed.find(name) != ioAccessed.end(); } + + int addUsedLocation(const TQualifier&, const TType&, bool& typeCollision); + int checkLocationRange(int set, const TIoRange& range, const TType&, bool& typeCollision); + int checkLocationRT(int set, int location); + int addUsedOffsets(int binding, int offset, int numOffsets); + bool addUsedConstantId(int id); + static int computeTypeLocationSize(const TType&, EShLanguage); + static int computeTypeUniformLocationSize(const TType&); + + static int getBaseAlignmentScalar(const TType&, int& size); + static int getBaseAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static int getScalarAlignment(const TType&, int& size, int& stride, bool rowMajor); + static int getMemberAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static bool improperStraddle(const TType& type, int size, int offset); + static void updateOffset(const TType& parentType, const TType& memberType, int& offset, int& memberSize); + static int getOffset(const TType& type, int index); + static int getBlockSize(const TType& blockType); + static int computeBufferReferenceTypeSize(const TType&); + static bool isIoResizeArray(const TType& type, EShLanguage language); + + bool promote(TIntermOperator*); + void setNanMinMaxClamp(bool setting) { nanMinMaxClamp = setting; } + bool getNanMinMaxClamp() const { return nanMinMaxClamp; } + + void setSourceFile(const char* file) { if (file != nullptr) sourceFile = file; } + const std::string& getSourceFile() const { return sourceFile; } + void addSourceText(const char* text, size_t len) { sourceText.append(text, len); } + const std::string& getSourceText() const { return sourceText; } + const std::map& getIncludeText() const { return includeText; } + void addIncludeText(const char* name, const char* text, size_t len) { includeText[name].assign(text,len); } + void addProcesses(const std::vector& p) + { + for (int i = 0; i < (int)p.size(); ++i) + processes.addProcess(p[i]); + } + void addProcess(const std::string& process) { processes.addProcess(process); } + void addProcessArgument(const std::string& arg) { processes.addArgument(arg); } + const std::vector& getProcesses() const { return processes.getProcesses(); } + unsigned long long getUniqueId() const { return uniqueId; } + void setUniqueId(unsigned long long id) { uniqueId = id; } + + // Certain explicit conversions are allowed conditionally +#ifdef GLSLANG_WEB + bool getArithemeticInt8Enabled() const { return false; } + bool getArithemeticInt16Enabled() const { return false; } + bool getArithemeticFloat16Enabled() const { return false; } + void updateNumericFeature(TNumericFeatures::feature f, bool on) { } +#else + bool getArithemeticInt8Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int8); + } + bool getArithemeticInt16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int16); + } + + bool getArithemeticFloat16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_half_float) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_float16); + } + void updateNumericFeature(TNumericFeatures::feature f, bool on) + { on ? numericFeatures.insert(f) : numericFeatures.erase(f); } +#endif + +protected: + TIntermSymbol* addSymbol(long long Id, const TString&, const TType&, const TConstUnionArray&, TIntermTyped* subtree, const TSourceLoc&); + void error(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void warn(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void mergeCallGraphs(TInfoSink&, TIntermediate&); + void mergeModes(TInfoSink&, TIntermediate&); + void mergeTrees(TInfoSink&, TIntermediate&); + void seedIdMap(TIdMaps& idMaps, long long& IdShift); + void remapIds(const TIdMaps& idMaps, long long idShift, TIntermediate&); + void mergeBodies(TInfoSink&, TIntermSequence& globals, const TIntermSequence& unitGlobals); + void mergeLinkerObjects(TInfoSink&, TIntermSequence& linkerObjects, const TIntermSequence& unitLinkerObjects, EShLanguage); + void mergeBlockDefinitions(TInfoSink&, TIntermSymbol* block, TIntermSymbol* unitBlock, TIntermediate* unitRoot); + void mergeImplicitArraySizes(TType&, const TType&); + void mergeErrorCheck(TInfoSink&, const TIntermSymbol&, const TIntermSymbol&, EShLanguage); + void checkCallGraphCycles(TInfoSink&); + void checkCallGraphBodies(TInfoSink&, bool keepUncalled); + void inOutLocationCheck(TInfoSink&); + void sharedBlockCheck(TInfoSink&); + bool userOutputUsed() const; + bool isSpecializationOperation(const TIntermOperator&) const; + bool isNonuniformPropagating(TOperator) const; + bool promoteUnary(TIntermUnary&); + bool promoteBinary(TIntermBinary&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, TSymbolTable&, const TString&); + bool promoteAggregate(TIntermAggregate&); + void pushSelector(TIntermSequence&, const TVectorSelector&, const TSourceLoc&); + void pushSelector(TIntermSequence&, const TMatrixSelector&, const TSourceLoc&); + bool specConstantPropagates(const TIntermTyped&, const TIntermTyped&); + void performTextureUpgradeAndSamplerRemovalTransformation(TIntermNode* root); + bool isConversionAllowed(TOperator op, TIntermTyped* node) const; + std::tuple getConversionDestinationType(TBasicType type0, TBasicType type1, TOperator op) const; + + static const char* getResourceName(TResourceType); + + const EShLanguage language; // stage, known at construction time + std::string entryPointName; + std::string entryPointMangledName; + typedef std::list TGraph; + TGraph callGraph; + + EProfile profile; // source profile + int version; // source version + SpvVersion spvVersion; + TIntermNode* treeRoot; + std::set requestedExtensions; // cumulation of all enabled or required extensions; not connected to what subset of the shader used them + MustBeAssigned resources; + int numEntryPoints; + int numErrors; + int numPushConstants; + bool recursive; + bool invertY; + bool dxPositionW; + bool enhancedMsgs; + bool debugInfo; + bool useStorageBuffer; + bool invariantAll; + bool nanMinMaxClamp; // true if desiring min/max/clamp to favor non-NaN over NaN + bool depthReplacing; + bool stencilReplacing; + int localSize[3]; + bool localSizeNotDefault[3]; + int localSizeSpecId[3]; + unsigned long long uniqueId; + + std::string globalUniformBlockName; + std::string atomicCounterBlockName; + unsigned int globalUniformBlockSet; + unsigned int globalUniformBlockBinding; + unsigned int atomicCounterBlockSet; + +#ifndef GLSLANG_WEB +public: + const char* const implicitThisName; + const char* const implicitCounterName; +protected: + EShSource source; // source language, known a bit later + bool useVulkanMemoryModel; + int invocations; + int vertices; + TLayoutGeometry inputPrimitive; + TLayoutGeometry outputPrimitive; + bool pixelCenterInteger; + bool originUpperLeft; + bool texCoordBuiltinRedeclared; + TVertexSpacing vertexSpacing; + TVertexOrder vertexOrder; + TInterlockOrdering interlockOrdering; + bool pointMode; + bool earlyFragmentTests; + bool postDepthCoverage; + bool earlyAndLateFragmentTestsAMD; + TLayoutDepth depthLayout; + TLayoutStencil stencilLayout; + bool hlslFunctionality1; + int blendEquations; // an 'or'ing of masks of shifts of TBlendEquationShift + bool xfbMode; + std::vector xfbBuffers; // all the data we need to track per xfb buffer + bool multiStream; + bool layoutOverrideCoverage; + bool geoPassthroughEXT; + int numShaderRecordBlocks; + ComputeDerivativeMode computeDerivativeMode; + int primitives; + int numTaskNVBlocks; + bool layoutPrimitiveCulling; + int numTaskEXTPayloads; + + // Base shift values + std::array shiftBinding; + + // Per-descriptor-set shift values + std::array, EResCount> shiftBindingForSet; + + std::vector resourceSetBinding; + bool autoMapBindings; + bool autoMapLocations; + bool flattenUniformArrays; + bool useUnknownFormat; + bool hlslOffsets; + bool hlslIoMapping; + bool useVariablePointers; + + std::set semanticNameSet; + + EShTextureSamplerTransformMode textureSamplerTransformMode; + + bool needToLegalize; + bool binaryDoubleOutput; + bool subgroupUniformControlFlow; + bool usePhysicalStorageBuffer; + + TSpirvRequirement* spirvRequirement; + TSpirvExecutionMode* spirvExecutionMode; + + std::unordered_map uniformLocationOverrides; + int uniformLocationBase; + TNumericFeatures numericFeatures; +#endif + std::unordered_map blockBackingOverrides; + + std::unordered_set usedConstantId; // specialization constant ids used + std::vector usedAtomics; // sets of bindings used by atomic counters + std::vector usedIo[4]; // sets of used locations, one for each of in, out, uniform, and buffers + std::vector usedIoRT[2]; // sets of used location, one for rayPayload/rayPayloadIN and other + // for callableData/callableDataIn + // set of names of statically read/written I/O that might need extra checking + std::set ioAccessed; + + // source code of shader, useful as part of debug information + std::string sourceFile; + std::string sourceText; + + // Included text. First string is a name, second is the included text + std::map includeText; + + // for OpModuleProcessed, or equivalent + TProcesses processes; + +private: + void operator=(TIntermediate&); // prevent assignments +}; + +} // end namespace glslang + +#endif // _LOCAL_INTERMEDIATE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/parseVersions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/parseVersions.h new file mode 100644 index 0000000..3c52ff1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/parseVersions.h @@ -0,0 +1,240 @@ +// +// Copyright (C) 2015-2018 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// This is implemented in Versions.cpp + +#ifndef _PARSE_VERSIONS_INCLUDED_ +#define _PARSE_VERSIONS_INCLUDED_ + +#include "../Public/ShaderLang.h" +#include "../Include/InfoSink.h" +#include "Scan.h" + +#include + +namespace glslang { + +// +// Base class for parse helpers. +// This just has version-related information and checking. +// This class should be sufficient for preprocessing. +// +class TParseVersions { +public: + TParseVersions(TIntermediate& interm, int version, EProfile profile, + const SpvVersion& spvVersion, EShLanguage language, TInfoSink& infoSink, + bool forwardCompatible, EShMessages messages) + : +#if !defined(GLSLANG_WEB) + forwardCompatible(forwardCompatible), + profile(profile), +#endif + infoSink(infoSink), version(version), + language(language), + spvVersion(spvVersion), + intermediate(interm), messages(messages), numErrors(0), currentScanner(0) { } + virtual ~TParseVersions() { } + void requireStage(const TSourceLoc&, EShLanguageMask, const char* featureDesc); + void requireStage(const TSourceLoc&, EShLanguage, const char* featureDesc); +#ifdef GLSLANG_WEB + const EProfile profile = EEsProfile; + bool isEsProfile() const { return true; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc) + { + if (! (EEsProfile & profileMask)) + error(loc, "not supported with this profile:", featureDesc, ProfileName(profile)); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc) + { + if ((EEsProfile & profileMask) && (minVersion == 0 || version < minVersion)) + error(loc, "not supported for this version or the enabled extensions", featureDesc, ""); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc) + { + profileRequires(loc, profileMask, minVersion, extension ? 1 : 0, &extension, featureDesc); + } + void initializeExtensionBehavior() { } + void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc) { } + void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc) { } + void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + TExtensionBehavior getExtensionBehavior(const char*) { return EBhMissing; } + bool extensionTurnedOn(const char* const extension) { return false; } + bool extensionsTurnedOn(int numExtensions, const char* const extensions[]) { return false; } + void updateExtensionBehavior(int line, const char* const extension, const char* behavior) { } + void updateExtensionBehavior(const char* const extension, TExtensionBehavior) { } + void checkExtensionStage(const TSourceLoc&, const char* const extension) { } + void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior) { } + void fullIntegerCheck(const TSourceLoc&, const char* op) { } + void doubleCheck(const TSourceLoc&, const char* op) { } + bool float16Arithmetic() { return false; } + void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int16Arithmetic() { return false; } + void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int8Arithmetic() { return false; } + void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + void int64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + bool relaxedErrors() const { return false; } + bool suppressWarnings() const { return true; } + bool isForwardCompatible() const { return false; } +#else + bool forwardCompatible; // true if errors are to be given for use of deprecated features + EProfile profile; // the declared profile in the shader (core by default) + bool isEsProfile() const { return profile == EEsProfile; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc); + virtual void initializeExtensionBehavior(); + virtual void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc); + virtual void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc); + virtual void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual TExtensionBehavior getExtensionBehavior(const char*); + virtual bool extensionTurnedOn(const char* const extension); + virtual bool extensionsTurnedOn(int numExtensions, const char* const extensions[]); + virtual void updateExtensionBehavior(int line, const char* const extension, const char* behavior); + virtual void updateExtensionBehavior(const char* const extension, TExtensionBehavior); + virtual bool checkExtensionsRequested(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void checkExtensionStage(const TSourceLoc&, const char* const extension); + virtual void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior); + virtual void fullIntegerCheck(const TSourceLoc&, const char* op); + + virtual void unimplemented(const TSourceLoc&, const char* featureDesc); + virtual void doubleCheck(const TSourceLoc&, const char* op); + virtual void float16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void float16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool float16Arithmetic(); + virtual void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int16Arithmetic(); + virtual void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int8ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int8Arithmetic(); + virtual void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void float16OpaqueCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void int64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt8Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void fcoopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void intcoopmatCheck(const TSourceLoc&, const char *op, bool builtIn = false); + bool relaxedErrors() const { return (messages & EShMsgRelaxedErrors) != 0; } + bool suppressWarnings() const { return (messages & EShMsgSuppressWarnings) != 0; } + bool isForwardCompatible() const { return forwardCompatible; } +#endif // GLSLANG_WEB + virtual void spvRemoved(const TSourceLoc&, const char* op); + virtual void vulkanRemoved(const TSourceLoc&, const char* op); + virtual void requireVulkan(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char *op, unsigned int version); + + +#if defined(GLSLANG_WEB) && !defined(GLSLANG_WEB_DEVEL) + void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } + void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } +#else + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; +#endif + + void addError() { ++numErrors; } + int getNumErrors() const { return numErrors; } + + void setScanner(TInputScanner* scanner) { currentScanner = scanner; } + TInputScanner* getScanner() const { return currentScanner; } + const TSourceLoc& getCurrentLoc() const { return currentScanner->getSourceLoc(); } + void setCurrentLine(int line) { currentScanner->setLine(line); } + void setCurrentColumn(int col) { currentScanner->setColumn(col); } + void setCurrentSourceName(const char* name) { currentScanner->setFile(name); } + void setCurrentString(int string) { currentScanner->setString(string); } + + void getPreamble(std::string&); +#ifdef ENABLE_HLSL + bool isReadingHLSL() const { return (messages & EShMsgReadHlsl) == EShMsgReadHlsl; } + bool hlslEnable16BitTypes() const { return (messages & EShMsgHlslEnable16BitTypes) != 0; } + bool hlslDX9Compatible() const { return (messages & EShMsgHlslDX9Compatible) != 0; } +#else + bool isReadingHLSL() const { return false; } +#endif + + TInfoSink& infoSink; + + // compilation mode + int version; // version, updated by #version in the shader + EShLanguage language; // really the stage + SpvVersion spvVersion; + TIntermediate& intermediate; // helper for making and hooking up pieces of the parse tree + +protected: + TMap extensionBehavior; // for each extension string, what its current behavior is + TMap extensionMinSpv; // for each extension string, store minimum spirv required + EShMessages messages; // errors/warnings/rule-sets + int numErrors; // number of compile-time errors encountered + TInputScanner* currentScanner; + +private: + explicit TParseVersions(const TParseVersions&); + TParseVersions& operator=(const TParseVersions&); +}; + +} // end namespace glslang + +#endif // _PARSE_VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/preprocessor/PpContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/preprocessor/PpContext.h new file mode 100644 index 0000000..714b5ea --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/preprocessor/PpContext.h @@ -0,0 +1,703 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PPCONTEXT_H +#define PPCONTEXT_H + +#include +#include +#include + +#include "../ParseHelper.h" +#include "PpTokens.h" + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4127) +#endif + +namespace glslang { + +class TPpToken { +public: + TPpToken() { clear(); } + void clear() + { + space = false; + i64val = 0; + loc.init(); + name[0] = 0; + } + + // Used for comparing macro definitions, so checks what is relevant for that. + bool operator==(const TPpToken& right) const + { + return space == right.space && + ival == right.ival && dval == right.dval && i64val == right.i64val && + strncmp(name, right.name, MaxTokenLength) == 0; + } + bool operator!=(const TPpToken& right) const { return ! operator==(right); } + + TSourceLoc loc; + // True if a space (for white space or a removed comment) should also be + // recognized, in front of the token returned: + bool space; + // Numeric value of the token: + union { + int ival; + double dval; + long long i64val; + }; + // Text string of the token: + char name[MaxTokenLength + 1]; +}; + +class TStringAtomMap { +// +// Implementation is in PpAtom.cpp +// +// Maintain a bi-directional mapping between relevant preprocessor strings and +// "atoms" which a unique integers (small, contiguous, not hash-like) per string. +// +public: + TStringAtomMap(); + + // Map string -> atom. + // Return 0 if no existing string. + int getAtom(const char* s) const + { + auto it = atomMap.find(s); + return it == atomMap.end() ? 0 : it->second; + } + + // Map a new or existing string -> atom, inventing a new atom if necessary. + int getAddAtom(const char* s) + { + int atom = getAtom(s); + if (atom == 0) { + atom = nextAtom++; + addAtomFixed(s, atom); + } + return atom; + } + + // Map atom -> string. + const char* getString(int atom) const { return stringMap[atom]->c_str(); } + +protected: + TStringAtomMap(TStringAtomMap&); + TStringAtomMap& operator=(TStringAtomMap&); + + TUnorderedMap atomMap; + TVector stringMap; // these point into the TString in atomMap + int nextAtom; + + // Bad source characters can lead to bad atoms, so gracefully handle those by + // pre-filling the table with them (to avoid if tests later). + TString badToken; + + // Add bi-directional mappings: + // - string -> atom + // - atom -> string + void addAtomFixed(const char* s, int atom) + { + auto it = atomMap.insert(std::pair(s, atom)).first; + if (stringMap.size() < (size_t)atom + 1) + stringMap.resize(atom + 100, &badToken); + stringMap[atom] = &it->first; + } +}; + +class TInputScanner; + +enum MacroExpandResult { + MacroExpandNotStarted, // macro not expanded, which might not be an error + MacroExpandError, // a clear error occurred while expanding, no expansion + MacroExpandStarted, // macro expansion process has started + MacroExpandUndef // macro is undefined and will be expanded +}; + +// This class is the result of turning a huge pile of C code communicating through globals +// into a class. This was done to allowing instancing to attain thread safety. +// Don't expect too much in terms of OO design. +class TPpContext { +public: + TPpContext(TParseContextBase&, const std::string& rootFileName, TShader::Includer&); + virtual ~TPpContext(); + + void setPreamble(const char* preamble, size_t length); + + int tokenize(TPpToken& ppToken); + int tokenPaste(int token, TPpToken&); + + class tInput { + public: + tInput(TPpContext* p) : done(false), pp(p) { } + virtual ~tInput() { } + + virtual int scan(TPpToken*) = 0; + virtual int getch() = 0; + virtual void ungetch() = 0; + virtual bool peekPasting() { return false; } // true when about to see ## + virtual bool peekContinuedPasting(int) { return false; } // true when non-spaced tokens can paste + virtual bool endOfReplacementList() { return false; } // true when at the end of a macro replacement list (RHS of #define) + virtual bool isMacroInput() { return false; } + + // Will be called when we start reading tokens from this instance + virtual void notifyActivated() {} + // Will be called when we do not read tokens from this instance anymore + virtual void notifyDeleted() {} + protected: + bool done; + TPpContext* pp; + }; + + void setInput(TInputScanner& input, bool versionWillBeError); + + void pushInput(tInput* in) + { + inputStack.push_back(in); + in->notifyActivated(); + } + void popInput() + { + inputStack.back()->notifyDeleted(); + delete inputStack.back(); + inputStack.pop_back(); + } + + // + // From PpTokens.cpp + // + + // Capture the needed parts of a token stream for macro recording/playback. + class TokenStream { + public: + // Manage a stream of these 'Token', which capture the relevant parts + // of a TPpToken, plus its atom. + class Token { + public: + Token(int atom, const TPpToken& ppToken) : + atom(atom), + space(ppToken.space), + i64val(ppToken.i64val), + name(ppToken.name) { } + int get(TPpToken& ppToken) + { + ppToken.clear(); + ppToken.space = space; + ppToken.i64val = i64val; + snprintf(ppToken.name, sizeof(ppToken.name), "%s", name.c_str()); + return atom; + } + bool isAtom(int a) const { return atom == a; } + int getAtom() const { return atom; } + bool nonSpaced() const { return !space; } + protected: + Token() {} + int atom; + bool space; // did a space precede the token? + long long i64val; + TString name; + }; + + TokenStream() : currentPos(0) { } + + void putToken(int token, TPpToken* ppToken); + bool peekToken(int atom) { return !atEnd() && stream[currentPos].isAtom(atom); } + bool peekContinuedPasting(int atom) + { + // This is basically necessary because, for example, the PP + // tokenizer only accepts valid numeric-literals plus suffixes, so + // separates numeric-literals plus bad suffix into two tokens, which + // should get both pasted together as one token when token pasting. + // + // The following code is a bit more generalized than the above example. + if (!atEnd() && atom == PpAtomIdentifier && stream[currentPos].nonSpaced()) { + switch(stream[currentPos].getAtom()) { + case PpAtomConstInt: + case PpAtomConstUint: + case PpAtomConstInt64: + case PpAtomConstUint64: + case PpAtomConstInt16: + case PpAtomConstUint16: + case PpAtomConstFloat: + case PpAtomConstDouble: + case PpAtomConstFloat16: + case PpAtomConstString: + case PpAtomIdentifier: + return true; + default: + break; + } + } + + return false; + } + int getToken(TParseContextBase&, TPpToken*); + bool atEnd() { return currentPos >= stream.size(); } + bool peekTokenizedPasting(bool lastTokenPastes); + bool peekUntokenizedPasting(); + void reset() { currentPos = 0; } + + protected: + TVector stream; + size_t currentPos; + }; + + // + // From Pp.cpp + // + + struct MacroSymbol { + MacroSymbol() : functionLike(0), busy(0), undef(0) { } + TVector args; + TokenStream body; + unsigned functionLike : 1; // 0 means object-like, 1 means function-like + unsigned busy : 1; + unsigned undef : 1; + }; + + typedef TMap TSymbolMap; + TSymbolMap macroDefs; // map atoms to macro definitions + MacroSymbol* lookupMacroDef(int atom) + { + auto existingMacroIt = macroDefs.find(atom); + return (existingMacroIt == macroDefs.end()) ? nullptr : &(existingMacroIt->second); + } + void addMacroDef(int atom, MacroSymbol& macroDef) { macroDefs[atom] = macroDef; } + +protected: + TPpContext(TPpContext&); + TPpContext& operator=(TPpContext&); + + TStringAtomMap atomStrings; + char* preamble; // string to parse, all before line 1 of string 0, it is 0 if no preamble + int preambleLength; + char** strings; // official strings of shader, starting a string 0 line 1 + size_t* lengths; + int numStrings; // how many official strings there are + int currentString; // which string we're currently parsing (-1 for preamble) + + // Scanner data: + int previous_token; + TParseContextBase& parseContext; + + // Get the next token from *stack* of input sources, popping input sources + // that are out of tokens, down until an input source is found that has a token. + // Return EndOfInput when there are no more tokens to be found by doing this. + int scanToken(TPpToken* ppToken) + { + int token = EndOfInput; + + while (! inputStack.empty()) { + token = inputStack.back()->scan(ppToken); + if (token != EndOfInput || inputStack.empty()) + break; + popInput(); + } + + return token; + } + int getChar() { return inputStack.back()->getch(); } + void ungetChar() { inputStack.back()->ungetch(); } + bool peekPasting() { return !inputStack.empty() && inputStack.back()->peekPasting(); } + bool peekContinuedPasting(int a) + { + return !inputStack.empty() && inputStack.back()->peekContinuedPasting(a); + } + bool endOfReplacementList() { return inputStack.empty() || inputStack.back()->endOfReplacementList(); } + bool isMacroInput() { return inputStack.size() > 0 && inputStack.back()->isMacroInput(); } + + static const int maxIfNesting = 65; + + int ifdepth; // current #if-#else-#endif nesting in the cpp.c file (pre-processor) + bool elseSeen[maxIfNesting]; // Keep a track of whether an else has been seen at a particular depth + int elsetracker; // #if-#else and #endif constructs...Counter. + + class tMacroInput : public tInput { + public: + tMacroInput(TPpContext* pp) : tInput(pp), prepaste(false), postpaste(false) { } + virtual ~tMacroInput() + { + for (size_t i = 0; i < args.size(); ++i) + delete args[i]; + for (size_t i = 0; i < expandedArgs.size(); ++i) + delete expandedArgs[i]; + } + + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + bool peekPasting() override { return prepaste; } + bool peekContinuedPasting(int a) override { return mac->body.peekContinuedPasting(a); } + bool endOfReplacementList() override { return mac->body.atEnd(); } + bool isMacroInput() override { return true; } + + MacroSymbol *mac; + TVector args; + TVector expandedArgs; + + protected: + bool prepaste; // true if we are just before ## + bool postpaste; // true if we are right after ## + }; + + class tMarkerInput : public tInput { + public: + tMarkerInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override + { + if (done) + return EndOfInput; + done = true; + + return marker; + } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + static const int marker = -3; + }; + + class tZeroInput : public tInput { + public: + tZeroInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + }; + + std::vector inputStack; + bool errorOnVersion; + bool versionSeen; + + // + // from Pp.cpp + // + + // Used to obtain #include content. + TShader::Includer& includer; + + int CPPdefine(TPpToken * ppToken); + int CPPundef(TPpToken * ppToken); + int CPPelse(int matchelse, TPpToken * ppToken); + int extraTokenCheck(int atom, TPpToken* ppToken, int token); + int eval(int token, int precedence, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int evalToToken(int token, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int CPPif (TPpToken * ppToken); + int CPPifdef(int defined, TPpToken * ppToken); + int CPPinclude(TPpToken * ppToken); + int CPPline(TPpToken * ppToken); + int CPPerror(TPpToken * ppToken); + int CPPpragma(TPpToken * ppToken); + int CPPversion(TPpToken * ppToken); + int CPPextension(TPpToken * ppToken); + int readCPPline(TPpToken * ppToken); + int scanHeaderName(TPpToken* ppToken, char delimit); + TokenStream* PrescanMacroArg(TokenStream&, TPpToken*, bool newLineOkay); + MacroExpandResult MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOkay); + + // + // From PpTokens.cpp + // + void pushTokenStreamInput(TokenStream&, bool pasting = false); + void UngetToken(int token, TPpToken*); + + class tTokenInput : public tInput { + public: + tTokenInput(TPpContext* pp, TokenStream* t, bool prepasting) : + tInput(pp), + tokens(t), + lastTokenPastes(prepasting) { } + virtual int scan(TPpToken *ppToken) override { return tokens->getToken(pp->parseContext, ppToken); } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + virtual bool peekPasting() override { return tokens->peekTokenizedPasting(lastTokenPastes); } + bool peekContinuedPasting(int a) override { return tokens->peekContinuedPasting(a); } + protected: + TokenStream* tokens; + bool lastTokenPastes; // true if the last token in the input is to be pasted, rather than consumed as a token + }; + + class tUngotTokenInput : public tInput { + public: + tUngotTokenInput(TPpContext* pp, int t, TPpToken* p) : tInput(pp), token(t), lval(*p) { } + virtual int scan(TPpToken *) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + protected: + int token; + TPpToken lval; + }; + + // + // From PpScanner.cpp + // + class tStringInput : public tInput { + public: + tStringInput(TPpContext* pp, TInputScanner& i) : tInput(pp), input(&i) { } + virtual int scan(TPpToken*) override; + + // Scanner used to get source stream characters. + // - Escaped newlines are handled here, invisibly to the caller. + // - All forms of newline are handled, and turned into just a '\n'. + int getch() override + { + int ch = input->get(); + + if (ch == '\\') { + // Move past escaped newlines, as many as sequentially exist + do { + if (input->peek() == '\r' || input->peek() == '\n') { + bool allowed = pp->parseContext.lineContinuationCheck(input->getSourceLoc(), pp->inComment); + if (! allowed && pp->inComment) + return '\\'; + + // escape one newline now + ch = input->get(); + int nextch = input->get(); + if (ch == '\r' && nextch == '\n') + ch = input->get(); + else + ch = nextch; + } else + return '\\'; + } while (ch == '\\'); + } + + // handle any non-escaped newline + if (ch == '\r' || ch == '\n') { + if (ch == '\r' && input->peek() == '\n') + input->get(); + return '\n'; + } + + return ch; + } + + // Scanner used to backup the source stream characters. Newlines are + // handled here, invisibly to the caller, meaning have to undo exactly + // what getch() above does (e.g., don't leave things in the middle of a + // sequence of escaped newlines). + void ungetch() override + { + input->unget(); + + do { + int ch = input->peek(); + if (ch == '\r' || ch == '\n') { + if (ch == '\n') { + // correct for two-character newline + input->unget(); + if (input->peek() != '\r') + input->get(); + } + // now in front of a complete newline, move past an escape character + input->unget(); + if (input->peek() == '\\') + input->unget(); + else { + input->get(); + break; + } + } else + break; + } while (true); + } + + protected: + TInputScanner* input; + }; + + // Holds a reference to included file data, as well as a + // prologue and an epilogue string. This can be scanned using the tInput + // interface and acts as a single source string. + class TokenizableIncludeFile : public tInput { + public: + // Copies prologue and epilogue. The includedFile must remain valid + // until this TokenizableIncludeFile is no longer used. + TokenizableIncludeFile(const TSourceLoc& startLoc, + const std::string& prologue, + TShader::Includer::IncludeResult* includedFile, + const std::string& epilogue, + TPpContext* pp) + : tInput(pp), + prologue_(prologue), + epilogue_(epilogue), + includedFile_(includedFile), + scanner(3, strings, lengths, nullptr, 0, 0, true), + prevScanner(nullptr), + stringInput(pp, scanner) + { + strings[0] = prologue_.data(); + strings[1] = includedFile_->headerData; + strings[2] = epilogue_.data(); + + lengths[0] = prologue_.size(); + lengths[1] = includedFile_->headerLength; + lengths[2] = epilogue_.size(); + + scanner.setLine(startLoc.line); + scanner.setString(startLoc.string); + + scanner.setFile(startLoc.getFilenameStr(), 0); + scanner.setFile(startLoc.getFilenameStr(), 1); + scanner.setFile(startLoc.getFilenameStr(), 2); + } + + // tInput methods: + int scan(TPpToken* t) override { return stringInput.scan(t); } + int getch() override { return stringInput.getch(); } + void ungetch() override { stringInput.ungetch(); } + + void notifyActivated() override + { + prevScanner = pp->parseContext.getScanner(); + pp->parseContext.setScanner(&scanner); + pp->push_include(includedFile_); + } + + void notifyDeleted() override + { + pp->parseContext.setScanner(prevScanner); + pp->pop_include(); + } + + private: + TokenizableIncludeFile& operator=(const TokenizableIncludeFile&); + + // Stores the prologue for this string. + const std::string prologue_; + + // Stores the epilogue for this string. + const std::string epilogue_; + + // Points to the IncludeResult that this TokenizableIncludeFile represents. + TShader::Includer::IncludeResult* includedFile_; + + // Will point to prologue_, includedFile_->headerData and epilogue_ + // This is passed to scanner constructor. + // These do not own the storage and it must remain valid until this + // object has been destroyed. + const char* strings[3]; + // Length of str_, passed to scanner constructor. + size_t lengths[3]; + // Scans over str_. + TInputScanner scanner; + // The previous effective scanner before the scanner in this instance + // has been activated. + TInputScanner* prevScanner; + // Delegate object implementing the tInput interface. + tStringInput stringInput; + }; + + int ScanFromString(char* s); + void missingEndifCheck(); + int lFloatConst(int len, int ch, TPpToken* ppToken); + int characterLiteral(TPpToken* ppToken); + + void push_include(TShader::Includer::IncludeResult* result) + { + currentSourceFile = result->headerName; + includeStack.push(result); + } + + void pop_include() + { + TShader::Includer::IncludeResult* include = includeStack.top(); + includeStack.pop(); + includer.releaseInclude(include); + if (includeStack.empty()) { + currentSourceFile = rootFileName; + } else { + currentSourceFile = includeStack.top()->headerName; + } + } + + bool inComment; + std::string rootFileName; + std::stack includeStack; + std::string currentSourceFile; + + std::istringstream strtodStream; + bool disableEscapeSequences; +}; + +} // end namespace glslang + +#endif // PPCONTEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/preprocessor/PpTokens.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/preprocessor/PpTokens.h new file mode 100644 index 0000000..7b0f815 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/preprocessor/PpTokens.h @@ -0,0 +1,179 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PARSER_H +#define PARSER_H + +namespace glslang { + +// Multi-character tokens +enum EFixedAtoms { + // single character tokens get their own char value as their token; start here for multi-character tokens + PpAtomMaxSingle = 127, + + // replace bad character tokens with this, to avoid accidental aliasing with the below + PpAtomBadToken, + + // Operators + + PPAtomAddAssign, + PPAtomSubAssign, + PPAtomMulAssign, + PPAtomDivAssign, + PPAtomModAssign, + + PpAtomRight, + PpAtomLeft, + + PpAtomRightAssign, + PpAtomLeftAssign, + PpAtomAndAssign, + PpAtomOrAssign, + PpAtomXorAssign, + + PpAtomAnd, + PpAtomOr, + PpAtomXor, + + PpAtomEQ, + PpAtomNE, + PpAtomGE, + PpAtomLE, + + PpAtomDecrement, + PpAtomIncrement, + + PpAtomColonColon, + + PpAtomPaste, + + // Constants + + PpAtomConstInt, + PpAtomConstUint, + PpAtomConstInt64, + PpAtomConstUint64, + PpAtomConstInt16, + PpAtomConstUint16, + PpAtomConstFloat, + PpAtomConstDouble, + PpAtomConstFloat16, + PpAtomConstString, + + // Identifiers + PpAtomIdentifier, + + // preprocessor "keywords" + + PpAtomDefine, + PpAtomUndef, + + PpAtomIf, + PpAtomIfdef, + PpAtomIfndef, + PpAtomElse, + PpAtomElif, + PpAtomEndif, + + PpAtomLine, + PpAtomPragma, + PpAtomError, + + // #version ... + PpAtomVersion, + PpAtomCore, + PpAtomCompatibility, + PpAtomEs, + + // #extension + PpAtomExtension, + + // __LINE__, __FILE__, __VERSION__ + + PpAtomLineMacro, + PpAtomFileMacro, + PpAtomVersionMacro, + + // #include + PpAtomInclude, + + PpAtomLast, +}; + +} // end namespace glslang + +#endif /* not PARSER_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/propagateNoContraction.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/propagateNoContraction.h new file mode 100644 index 0000000..8521ad7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/propagateNoContraction.h @@ -0,0 +1,55 @@ +// +// Copyright (C) 2015-2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Visit the nodes in the glslang intermediate tree representation to +// propagate 'noContraction' qualifier. +// + +#pragma once + +#include "../Include/intermediate.h" + +namespace glslang { + +// Propagates the 'precise' qualifier for objects (objects marked with +// 'noContraction' qualifier) from the shader source specified 'precise' +// variables to all the involved objects, and add 'noContraction' qualifier for +// the involved arithmetic operations. +// Note that the same qualifier: 'noContraction' is used in both object nodes +// and arithmetic operation nodes, but has different meaning. For object nodes, +// 'noContraction' means the object is 'precise'; and for arithmetic operation +// nodes, it means the operation should not be contracted. +void PropagateNoContraction(const glslang::TIntermediate& intermediate); +}; diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/reflection.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/reflection.h new file mode 100644 index 0000000..bfd5452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/MachineIndependent/reflection.h @@ -0,0 +1,223 @@ +// +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _REFLECTION_INCLUDED +#define _REFLECTION_INCLUDED + +#include "../Public/ShaderLang.h" +#include "../Include/Types.h" + +#include +#include + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +namespace glslang { + +class TIntermediate; +class TIntermAggregate; +class TReflectionTraverser; + +// The full reflection database +class TReflection { +public: + TReflection(EShReflectionOptions opts, EShLanguage first, EShLanguage last) + : options(opts), firstStage(first), lastStage(last), badReflection(TObjectReflection::badReflection()) + { + for (int dim=0; dim<3; ++dim) + localSize[dim] = 0; + } + + virtual ~TReflection() {} + + // grow the reflection stage by stage + bool addStage(EShLanguage, const TIntermediate&); + + // for mapping a uniform index to a uniform object's description + int getNumUniforms() { return (int)indexToUniform.size(); } + const TObjectReflection& getUniform(int i) const + { + if (i >= 0 && i < (int)indexToUniform.size()) + return indexToUniform[i]; + else + return badReflection; + } + + // for mapping a block index to the block's description + int getNumUniformBlocks() const { return (int)indexToUniformBlock.size(); } + const TObjectReflection& getUniformBlock(int i) const + { + if (i >= 0 && i < (int)indexToUniformBlock.size()) + return indexToUniformBlock[i]; + else + return badReflection; + } + + // for mapping an pipeline input index to the input's description + int getNumPipeInputs() { return (int)indexToPipeInput.size(); } + const TObjectReflection& getPipeInput(int i) const + { + if (i >= 0 && i < (int)indexToPipeInput.size()) + return indexToPipeInput[i]; + else + return badReflection; + } + + // for mapping an pipeline output index to the output's description + int getNumPipeOutputs() { return (int)indexToPipeOutput.size(); } + const TObjectReflection& getPipeOutput(int i) const + { + if (i >= 0 && i < (int)indexToPipeOutput.size()) + return indexToPipeOutput[i]; + else + return badReflection; + } + + // for mapping from an atomic counter to the uniform index + int getNumAtomicCounters() const { return (int)atomicCounterUniformIndices.size(); } + const TObjectReflection& getAtomicCounter(int i) const + { + if (i >= 0 && i < (int)atomicCounterUniformIndices.size()) + return getUniform(atomicCounterUniformIndices[i]); + else + return badReflection; + } + + // for mapping a buffer variable index to a buffer variable object's description + int getNumBufferVariables() { return (int)indexToBufferVariable.size(); } + const TObjectReflection& getBufferVariable(int i) const + { + if (i >= 0 && i < (int)indexToBufferVariable.size()) + return indexToBufferVariable[i]; + else + return badReflection; + } + + // for mapping a storage block index to the storage block's description + int getNumStorageBuffers() const { return (int)indexToBufferBlock.size(); } + const TObjectReflection& getStorageBufferBlock(int i) const + { + if (i >= 0 && i < (int)indexToBufferBlock.size()) + return indexToBufferBlock[i]; + else + return badReflection; + } + + // for mapping any name to its index (block names, uniform names and input/output names) + int getIndex(const char* name) const + { + TNameToIndex::const_iterator it = nameToIndex.find(name); + if (it == nameToIndex.end()) + return -1; + else + return it->second; + } + + // see getIndex(const char*) + int getIndex(const TString& name) const { return getIndex(name.c_str()); } + + + // for mapping any name to its index (only pipe input/output names) + int getPipeIOIndex(const char* name, const bool inOrOut) const + { + TNameToIndex::const_iterator it = inOrOut ? pipeInNameToIndex.find(name) : pipeOutNameToIndex.find(name); + if (it == (inOrOut ? pipeInNameToIndex.end() : pipeOutNameToIndex.end())) + return -1; + else + return it->second; + } + + // see gePipeIOIndex(const char*, const bool) + int getPipeIOIndex(const TString& name, const bool inOrOut) const { return getPipeIOIndex(name.c_str(), inOrOut); } + + // Thread local size + unsigned getLocalSize(int dim) const { return dim <= 2 ? localSize[dim] : 0; } + + void dump(); + +protected: + friend class glslang::TReflectionTraverser; + + void buildCounterIndices(const TIntermediate&); + void buildUniformStageMask(const TIntermediate& intermediate); + void buildAttributeReflection(EShLanguage, const TIntermediate&); + + // Need a TString hash: typedef std::unordered_map TNameToIndex; + typedef std::map TNameToIndex; + typedef std::vector TMapIndexToReflection; + typedef std::vector TIndices; + + TMapIndexToReflection& GetBlockMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferBlock; + return indexToUniformBlock; + } + TMapIndexToReflection& GetVariableMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferVariable; + return indexToUniform; + } + + EShReflectionOptions options; + + EShLanguage firstStage; + EShLanguage lastStage; + + TObjectReflection badReflection; // return for queries of -1 or generally out of range; has expected descriptions with in it for this + TNameToIndex nameToIndex; // maps names to indexes; can hold all types of data: uniform/buffer and which function names have been processed + TNameToIndex pipeInNameToIndex; // maps pipe in names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TNameToIndex pipeOutNameToIndex; // maps pipe out names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TMapIndexToReflection indexToUniform; + TMapIndexToReflection indexToUniformBlock; + TMapIndexToReflection indexToBufferVariable; + TMapIndexToReflection indexToBufferBlock; + TMapIndexToReflection indexToPipeInput; + TMapIndexToReflection indexToPipeOutput; + TIndices atomicCounterUniformIndices; + + unsigned int localSize[3]; +}; + +} // end namespace glslang + +#endif // _REFLECTION_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/ResourceLimits.h new file mode 100644 index 0000000..f70be81 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/ResourceLimits.h @@ -0,0 +1,57 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ + +#include + +#include "../Include/ResourceLimits.h" + +// Return pointer to user-writable Resource to pass through API in +// future-proof way. +extern TBuiltInResource* GetResources(); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +extern const TBuiltInResource* GetDefaultResources(); + +// Returns the DefaultTBuiltInResource as a human-readable string. +std::string GetDefaultTBuiltInResourceString(); + +// Decodes the resource limits from |config| to |resources|. +void DecodeResourceLimits(TBuiltInResource* resources, char* config); + +#endif // _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/ShaderLang.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/ShaderLang.h new file mode 100644 index 0000000..90a5302 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/ShaderLang.h @@ -0,0 +1,987 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _COMPILER_INTERFACE_INCLUDED_ +#define _COMPILER_INTERFACE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../MachineIndependent/Versions.h" + +#include +#include + +#ifdef _WIN32 + #define C_DECL __cdecl +#else + #define C_DECL +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +// +// This is the platform independent interface between an OGL driver +// and the shading language compiler/linker. +// + +#ifdef __cplusplus + extern "C" { +#endif + +// +// Call before doing any other compiler/linker operations. +// +// (Call once per process, not once per thread.) +// +GLSLANG_EXPORT int ShInitialize(); + +// +// Call this at process shutdown to clean up memory. +// +GLSLANG_EXPORT int ShFinalize(); + +// +// Types of languages the compiler can consume. +// +typedef enum { + EShLangVertex, + EShLangTessControl, + EShLangTessEvaluation, + EShLangGeometry, + EShLangFragment, + EShLangCompute, + EShLangRayGen, + EShLangRayGenNV = EShLangRayGen, + EShLangIntersect, + EShLangIntersectNV = EShLangIntersect, + EShLangAnyHit, + EShLangAnyHitNV = EShLangAnyHit, + EShLangClosestHit, + EShLangClosestHitNV = EShLangClosestHit, + EShLangMiss, + EShLangMissNV = EShLangMiss, + EShLangCallable, + EShLangCallableNV = EShLangCallable, + EShLangTask, + EShLangTaskNV = EShLangTask, + EShLangMesh, + EShLangMeshNV = EShLangMesh, + LAST_ELEMENT_MARKER(EShLangCount), +} EShLanguage; // would be better as stage, but this is ancient now + +typedef enum : unsigned { + EShLangVertexMask = (1 << EShLangVertex), + EShLangTessControlMask = (1 << EShLangTessControl), + EShLangTessEvaluationMask = (1 << EShLangTessEvaluation), + EShLangGeometryMask = (1 << EShLangGeometry), + EShLangFragmentMask = (1 << EShLangFragment), + EShLangComputeMask = (1 << EShLangCompute), + EShLangRayGenMask = (1 << EShLangRayGen), + EShLangRayGenNVMask = EShLangRayGenMask, + EShLangIntersectMask = (1 << EShLangIntersect), + EShLangIntersectNVMask = EShLangIntersectMask, + EShLangAnyHitMask = (1 << EShLangAnyHit), + EShLangAnyHitNVMask = EShLangAnyHitMask, + EShLangClosestHitMask = (1 << EShLangClosestHit), + EShLangClosestHitNVMask = EShLangClosestHitMask, + EShLangMissMask = (1 << EShLangMiss), + EShLangMissNVMask = EShLangMissMask, + EShLangCallableMask = (1 << EShLangCallable), + EShLangCallableNVMask = EShLangCallableMask, + EShLangTaskMask = (1 << EShLangTask), + EShLangTaskNVMask = EShLangTaskMask, + EShLangMeshMask = (1 << EShLangMesh), + EShLangMeshNVMask = EShLangMeshMask, + LAST_ELEMENT_MARKER(EShLanguageMaskCount), +} EShLanguageMask; + +namespace glslang { + +class TType; + +typedef enum { + EShSourceNone, + EShSourceGlsl, // GLSL, includes ESSL (OpenGL ES GLSL) + EShSourceHlsl, // HLSL + LAST_ELEMENT_MARKER(EShSourceCount), +} EShSource; // if EShLanguage were EShStage, this could be EShLanguage instead + +typedef enum { + EShClientNone, // use when there is no client, e.g. for validation + EShClientVulkan, // as GLSL dialect, specifies KHR_vulkan_glsl extension + EShClientOpenGL, // as GLSL dialect, specifies ARB_gl_spirv extension + LAST_ELEMENT_MARKER(EShClientCount), +} EShClient; + +typedef enum { + EShTargetNone, + EShTargetSpv, // SPIR-V (preferred spelling) + EshTargetSpv = EShTargetSpv, // legacy spelling + LAST_ELEMENT_MARKER(EShTargetCount), +} EShTargetLanguage; + +typedef enum { + EShTargetVulkan_1_0 = (1 << 22), // Vulkan 1.0 + EShTargetVulkan_1_1 = (1 << 22) | (1 << 12), // Vulkan 1.1 + EShTargetVulkan_1_2 = (1 << 22) | (2 << 12), // Vulkan 1.2 + EShTargetVulkan_1_3 = (1 << 22) | (3 << 12), // Vulkan 1.3 + EShTargetOpenGL_450 = 450, // OpenGL + LAST_ELEMENT_MARKER(EShTargetClientVersionCount = 5), +} EShTargetClientVersion; + +typedef EShTargetClientVersion EshTargetClientVersion; + +typedef enum { + EShTargetSpv_1_0 = (1 << 16), // SPIR-V 1.0 + EShTargetSpv_1_1 = (1 << 16) | (1 << 8), // SPIR-V 1.1 + EShTargetSpv_1_2 = (1 << 16) | (2 << 8), // SPIR-V 1.2 + EShTargetSpv_1_3 = (1 << 16) | (3 << 8), // SPIR-V 1.3 + EShTargetSpv_1_4 = (1 << 16) | (4 << 8), // SPIR-V 1.4 + EShTargetSpv_1_5 = (1 << 16) | (5 << 8), // SPIR-V 1.5 + EShTargetSpv_1_6 = (1 << 16) | (6 << 8), // SPIR-V 1.6 + LAST_ELEMENT_MARKER(EShTargetLanguageVersionCount = 7), +} EShTargetLanguageVersion; + +struct TInputLanguage { + EShSource languageFamily; // redundant information with other input, this one overrides when not EShSourceNone + EShLanguage stage; // redundant information with other input, this one overrides when not EShSourceNone + EShClient dialect; + int dialectVersion; // version of client's language definition, not the client (when not EShClientNone) + bool vulkanRulesRelaxed; +}; + +struct TClient { + EShClient client; + EShTargetClientVersion version; // version of client itself (not the client's input dialect) +}; + +struct TTarget { + EShTargetLanguage language; + EShTargetLanguageVersion version; // version to target, if SPIR-V, defined by "word 1" of the SPIR-V header + bool hlslFunctionality1; // can target hlsl_functionality1 extension(s) +}; + +// All source/client/target versions and settings. +// Can override previous methods of setting, when items are set here. +// Expected to grow, as more are added, rather than growing parameter lists. +struct TEnvironment { + TInputLanguage input; // definition of the input language + TClient client; // what client is the overall compilation being done for? + TTarget target; // what to generate +}; + +GLSLANG_EXPORT const char* StageName(EShLanguage); + +} // end namespace glslang + +// +// Types of output the linker will create. +// +typedef enum { + EShExVertexFragment, + EShExFragment +} EShExecutable; + +// +// Optimization level for the compiler. +// +typedef enum { + EShOptNoGeneration, + EShOptNone, + EShOptSimple, // Optimizations that can be done quickly + EShOptFull, // Optimizations that will take more time + LAST_ELEMENT_MARKER(EshOptLevelCount), +} EShOptimizationLevel; + +// +// Texture and Sampler transformation mode. +// +typedef enum { + EShTexSampTransKeep, // keep textures and samplers as is (default) + EShTexSampTransUpgradeTextureRemoveSampler, // change texture w/o embeded sampler into sampled texture and throw away all samplers + LAST_ELEMENT_MARKER(EShTexSampTransCount), +} EShTextureSamplerTransformMode; + +// +// Message choices for what errors and warnings are given. +// +enum EShMessages : unsigned { + EShMsgDefault = 0, // default is to give all required errors and extra warnings + EShMsgRelaxedErrors = (1 << 0), // be liberal in accepting input + EShMsgSuppressWarnings = (1 << 1), // suppress all warnings, except those required by the specification + EShMsgAST = (1 << 2), // print the AST intermediate representation + EShMsgSpvRules = (1 << 3), // issue messages for SPIR-V generation + EShMsgVulkanRules = (1 << 4), // issue messages for Vulkan-requirements of GLSL for SPIR-V + EShMsgOnlyPreprocessor = (1 << 5), // only print out errors produced by the preprocessor + EShMsgReadHlsl = (1 << 6), // use HLSL parsing rules and semantics + EShMsgCascadingErrors = (1 << 7), // get cascading errors; risks error-recovery issues, instead of an early exit + EShMsgKeepUncalled = (1 << 8), // for testing, don't eliminate uncalled functions + EShMsgHlslOffsets = (1 << 9), // allow block offsets to follow HLSL rules instead of GLSL rules + EShMsgDebugInfo = (1 << 10), // save debug information + EShMsgHlslEnable16BitTypes = (1 << 11), // enable use of 16-bit types in SPIR-V for HLSL + EShMsgHlslLegalization = (1 << 12), // enable HLSL Legalization messages + EShMsgHlslDX9Compatible = (1 << 13), // enable HLSL DX9 compatible mode (for samplers and semantics) + EShMsgBuiltinSymbolTable = (1 << 14), // print the builtin symbol table + EShMsgEnhanced = (1 << 15), // enhanced message readability + LAST_ELEMENT_MARKER(EShMsgCount), +}; + +// +// Options for building reflection +// +typedef enum { + EShReflectionDefault = 0, // default is original behaviour before options were added + EShReflectionStrictArraySuffix = (1 << 0), // reflection will follow stricter rules for array-of-structs suffixes + EShReflectionBasicArraySuffix = (1 << 1), // arrays of basic types will be appended with [0] as in GL reflection + EShReflectionIntermediateIO = (1 << 2), // reflect inputs and outputs to program, even with no vertex shader + EShReflectionSeparateBuffers = (1 << 3), // buffer variables and buffer blocks are reflected separately + EShReflectionAllBlockVariables = (1 << 4), // reflect all variables in blocks, even if they are inactive + EShReflectionUnwrapIOBlocks = (1 << 5), // unwrap input/output blocks the same as with uniform blocks + EShReflectionAllIOVariables = (1 << 6), // reflect all input/output variables, even if they are inactive + EShReflectionSharedStd140SSBO = (1 << 7), // Apply std140/shared rules for ubo to ssbo + EShReflectionSharedStd140UBO = (1 << 8), // Apply std140/shared rules for ubo to ssbo + LAST_ELEMENT_MARKER(EShReflectionCount), +} EShReflectionOptions; + +// +// Build a table for bindings. This can be used for locating +// attributes, uniforms, globals, etc., as needed. +// +typedef struct { + const char* name; + int binding; +} ShBinding; + +typedef struct { + int numBindings; + ShBinding* bindings; // array of bindings +} ShBindingTable; + +// +// ShHandle held by but opaque to the driver. It is allocated, +// managed, and de-allocated by the compiler/linker. Its contents +// are defined by and used by the compiler and linker. For example, +// symbol table information and object code passed from the compiler +// to the linker can be stored where ShHandle points. +// +// If handle creation fails, 0 will be returned. +// +typedef void* ShHandle; + +// +// Driver calls these to create and destroy compiler/linker +// objects. +// +GLSLANG_EXPORT ShHandle ShConstructCompiler(const EShLanguage, int debugOptions); // one per shader +GLSLANG_EXPORT ShHandle ShConstructLinker(const EShExecutable, int debugOptions); // one per shader pair +GLSLANG_EXPORT ShHandle ShConstructUniformMap(); // one per uniform namespace (currently entire program object) +GLSLANG_EXPORT void ShDestruct(ShHandle); + +// +// The return value of ShCompile is boolean, non-zero indicating +// success. +// +// The info-log should be written by ShCompile into +// ShHandle, so it can answer future queries. +// +GLSLANG_EXPORT int ShCompile( + const ShHandle, + const char* const shaderStrings[], + const int numStrings, + const int* lengths, + const EShOptimizationLevel, + const TBuiltInResource *resources, + int debugOptions, + int defaultVersion = 110, // use 100 for ES environment, overridden by #version in shader + bool forwardCompatible = false, // give errors for use of deprecated features + EShMessages messages = EShMsgDefault // warnings and errors + ); + +GLSLANG_EXPORT int ShLinkExt( + const ShHandle, // linker object + const ShHandle h[], // compiler objects to link together + const int numHandles); + +// +// ShSetEncrpytionMethod is a place-holder for specifying +// how source code is encrypted. +// +GLSLANG_EXPORT void ShSetEncryptionMethod(ShHandle); + +// +// All the following return 0 if the information is not +// available in the object passed down, or the object is bad. +// +GLSLANG_EXPORT const char* ShGetInfoLog(const ShHandle); +GLSLANG_EXPORT const void* ShGetExecutable(const ShHandle); +GLSLANG_EXPORT int ShSetVirtualAttributeBindings(const ShHandle, const ShBindingTable*); // to detect user aliasing +GLSLANG_EXPORT int ShSetFixedAttributeBindings(const ShHandle, const ShBindingTable*); // to force any physical mappings +// +// Tell the linker to never assign a vertex attribute to this list of physical attributes +// +GLSLANG_EXPORT int ShExcludeAttributes(const ShHandle, int *attributes, int count); + +// +// Returns the location ID of the named uniform. +// Returns -1 if error. +// +GLSLANG_EXPORT int ShGetUniformLocation(const ShHandle uniformMap, const char* name); + +#ifdef __cplusplus + } // end extern "C" +#endif + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// Deferred-Lowering C++ Interface +// ----------------------------------- +// +// Below is a new alternate C++ interface, which deprecates the above +// opaque handle-based interface. +// +// The below is further designed to handle multiple compilation units per stage, where +// the intermediate results, including the parse tree, are preserved until link time, +// rather than the above interface which is designed to have each compilation unit +// lowered at compile time. In the above model, linking occurs on the lowered results, +// whereas in this model intra-stage linking can occur at the parse tree +// (treeRoot in TIntermediate) level, and then a full stage can be lowered. +// + +#include +#include +#include + +class TCompiler; +class TInfoSink; + +namespace glslang { + +struct Version { + int major; + int minor; + int patch; + const char* flavor; +}; + +GLSLANG_EXPORT Version GetVersion(); +GLSLANG_EXPORT const char* GetEsslVersionString(); +GLSLANG_EXPORT const char* GetGlslVersionString(); +GLSLANG_EXPORT int GetKhronosToolId(); + +class TIntermediate; +class TProgram; +class TPoolAllocator; + +// Call this exactly once per process before using anything else +GLSLANG_EXPORT bool InitializeProcess(); + +// Call once per process to tear down everything +GLSLANG_EXPORT void FinalizeProcess(); + +// Resource type for IO resolver +enum TResourceType { + EResSampler, + EResTexture, + EResImage, + EResUbo, + EResSsbo, + EResUav, + EResCount +}; + +enum TBlockStorageClass +{ + EbsUniform = 0, + EbsStorageBuffer, + EbsPushConstant, + EbsNone, // not a uniform or buffer variable + EbsCount, +}; + +// Make one TShader per shader that you will link into a program. Then +// - provide the shader through setStrings() or setStringsWithLengths() +// - optionally call setEnv*(), see below for more detail +// - optionally use setPreamble() to set a special shader string that will be +// processed before all others but won't affect the validity of #version +// - optionally call addProcesses() for each setting/transform, +// see comment for class TProcesses +// - call parse(): source language and target environment must be selected +// either by correct setting of EShMessages sent to parse(), or by +// explicitly calling setEnv*() +// - query the info logs +// +// N.B.: Does not yet support having the same TShader instance being linked into +// multiple programs. +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TShader { +public: + GLSLANG_EXPORT explicit TShader(EShLanguage); + GLSLANG_EXPORT virtual ~TShader(); + GLSLANG_EXPORT void setStrings(const char* const* s, int n); + GLSLANG_EXPORT void setStringsWithLengths( + const char* const* s, const int* l, int n); + GLSLANG_EXPORT void setStringsWithLengthsAndNames( + const char* const* s, const int* l, const char* const* names, int n); + void setPreamble(const char* s) { preamble = s; } + GLSLANG_EXPORT void setEntryPoint(const char* entryPoint); + GLSLANG_EXPORT void setSourceEntryPoint(const char* sourceEntryPointName); + GLSLANG_EXPORT void addProcesses(const std::vector&); + GLSLANG_EXPORT void setUniqueId(unsigned long long id); + GLSLANG_EXPORT void setOverrideVersion(int version); + GLSLANG_EXPORT void setDebugInfo(bool debugInfo); + + // IO resolver binding data: see comments in ShaderLang.cpp + GLSLANG_EXPORT void setShiftBinding(TResourceType res, unsigned int base); + GLSLANG_EXPORT void setShiftSamplerBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftTextureBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftImageBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUavBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftCbufferBinding(unsigned int base); // synonym for setShiftUboBinding + GLSLANG_EXPORT void setShiftSsboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftBindingForSet(TResourceType res, unsigned int base, unsigned int set); + GLSLANG_EXPORT void setResourceSetBinding(const std::vector& base); + GLSLANG_EXPORT void setAutoMapBindings(bool map); + GLSLANG_EXPORT void setAutoMapLocations(bool map); + GLSLANG_EXPORT void addUniformLocationOverride(const char* name, int loc); + GLSLANG_EXPORT void setUniformLocationBase(int base); + GLSLANG_EXPORT void setInvertY(bool invert); + GLSLANG_EXPORT void setDxPositionW(bool dxPosW); + GLSLANG_EXPORT void setEnhancedMsgs(); +#ifdef ENABLE_HLSL + GLSLANG_EXPORT void setHlslIoMapping(bool hlslIoMap); + GLSLANG_EXPORT void setFlattenUniformArrays(bool flatten); +#endif + GLSLANG_EXPORT void setNoStorageFormat(bool useUnknownFormat); + GLSLANG_EXPORT void setNanMinMaxClamp(bool nanMinMaxClamp); + GLSLANG_EXPORT void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode); + GLSLANG_EXPORT void addBlockStorageOverride(const char* nameStr, glslang::TBlockStorageClass backing); + + GLSLANG_EXPORT void setGlobalUniformBlockName(const char* name); + GLSLANG_EXPORT void setAtomicCounterBlockName(const char* name); + GLSLANG_EXPORT void setGlobalUniformSet(unsigned int set); + GLSLANG_EXPORT void setGlobalUniformBinding(unsigned int binding); + GLSLANG_EXPORT void setAtomicCounterBlockSet(unsigned int set); + GLSLANG_EXPORT void setAtomicCounterBlockBinding(unsigned int binding); + + // For setting up the environment (cleared to nothingness in the constructor). + // These must be called so that parsing is done for the right source language and + // target environment, either indirectly through TranslateEnvironment() based on + // EShMessages et. al., or directly by the user. + // + // setEnvInput: The input source language and stage. If generating code for a + // specific client, the input client semantics to use and the + // version of that client's input semantics to use, otherwise + // use EShClientNone and version of 0, e.g. for validation mode. + // Note 'version' does not describe the target environment, + // just the version of the source dialect to compile under. + // For example, to choose the Vulkan dialect of GLSL defined by + // version 100 of the KHR_vulkan_glsl extension: lang = EShSourceGlsl, + // dialect = EShClientVulkan, and version = 100. + // + // See the definitions of TEnvironment, EShSource, EShLanguage, + // and EShClient for choices and more detail. + // + // setEnvClient: The client that will be hosting the execution, and its version. + // Note 'version' is not the version of the languages involved, but + // the version of the client environment. + // Use EShClientNone and version of 0 if there is no client, e.g. + // for validation mode. + // + // See EShTargetClientVersion for choices. + // + // setEnvTarget: The language to translate to when generating code, and that + // language's version. + // Use EShTargetNone and version of 0 if there is no client, e.g. + // for validation mode. + // + void setEnvInput(EShSource lang, EShLanguage envStage, EShClient client, int version) + { + environment.input.languageFamily = lang; + environment.input.stage = envStage; + environment.input.dialect = client; + environment.input.dialectVersion = version; + } + void setEnvClient(EShClient client, EShTargetClientVersion version) + { + environment.client.client = client; + environment.client.version = version; + } + void setEnvTarget(EShTargetLanguage lang, EShTargetLanguageVersion version) + { + environment.target.language = lang; + environment.target.version = version; + } + + void getStrings(const char* const* &s, int& n) { s = strings; n = numStrings; } + +#ifdef ENABLE_HLSL + void setEnvTargetHlslFunctionality1() { environment.target.hlslFunctionality1 = true; } + bool getEnvTargetHlslFunctionality1() const { return environment.target.hlslFunctionality1; } +#else + bool getEnvTargetHlslFunctionality1() const { return false; } +#endif + + void setEnvInputVulkanRulesRelaxed() { environment.input.vulkanRulesRelaxed = true; } + bool getEnvInputVulkanRulesRelaxed() const { return environment.input.vulkanRulesRelaxed; } + + // Interface to #include handlers. + // + // To support #include, a client of Glslang does the following: + // 1. Call setStringsWithNames to set the source strings and associated + // names. For example, the names could be the names of the files + // containing the shader sources. + // 2. Call parse with an Includer. + // + // When the Glslang parser encounters an #include directive, it calls + // the Includer's include method with the requested include name + // together with the current string name. The returned IncludeResult + // contains the fully resolved name of the included source, together + // with the source text that should replace the #include directive + // in the source stream. After parsing that source, Glslang will + // release the IncludeResult object. + class Includer { + public: + // An IncludeResult contains the resolved name and content of a source + // inclusion. + struct IncludeResult { + IncludeResult(const std::string& headerName, const char* const headerData, const size_t headerLength, void* userData) : + headerName(headerName), headerData(headerData), headerLength(headerLength), userData(userData) { } + // For a successful inclusion, the fully resolved name of the requested + // include. For example, in a file system-based includer, full resolution + // should convert a relative path name into an absolute path name. + // For a failed inclusion, this is an empty string. + const std::string headerName; + // The content and byte length of the requested inclusion. The + // Includer producing this IncludeResult retains ownership of the + // storage. + // For a failed inclusion, the header + // field points to a string containing error details. + const char* const headerData; + const size_t headerLength; + // Include resolver's context. + void* userData; + protected: + IncludeResult& operator=(const IncludeResult&); + IncludeResult(); + }; + + // For both include methods below: + // + // Resolves an inclusion request by name, current source name, + // and include depth. + // On success, returns an IncludeResult containing the resolved name + // and content of the include. + // On failure, returns a nullptr, or an IncludeResult + // with an empty string for the headerName and error details in the + // header field. + // The Includer retains ownership of the contents + // of the returned IncludeResult value, and those contents must + // remain valid until the releaseInclude method is called on that + // IncludeResult object. + // + // Note "local" vs. "system" is not an "either/or": "local" is an + // extra thing to do over "system". Both might get called, as per + // the C++ specification. + + // For the "system" or <>-style includes; search the "system" paths. + virtual IncludeResult* includeSystem(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // For the "local"-only aspect of a "" include. Should not search in the + // "system" paths, because on returning a failure, the parser will + // call includeSystem() to look in the "system" locations. + virtual IncludeResult* includeLocal(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // Signals that the parser will no longer use the contents of the + // specified IncludeResult. + virtual void releaseInclude(IncludeResult*) = 0; + virtual ~Includer() {} + }; + + // Fail all Includer searches + class ForbidIncluder : public Includer { + public: + virtual void releaseInclude(IncludeResult*) override { } + }; + + GLSLANG_EXPORT bool parse( + const TBuiltInResource*, int defaultVersion, EProfile defaultProfile, + bool forceDefaultVersionAndProfile, bool forwardCompatible, + EShMessages, Includer&); + + bool parse(const TBuiltInResource* res, int defaultVersion, EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages messages) + { + TShader::ForbidIncluder includer; + return parse(res, defaultVersion, defaultProfile, forceDefaultVersionAndProfile, forwardCompatible, messages, includer); + } + + // Equivalent to parse() without a default profile and without forcing defaults. + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages); + } + + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages, + Includer& includer) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages, includer); + } + + // NOTE: Doing just preprocessing to obtain a correct preprocessed shader string + // is not an officially supported or fully working path. + GLSLANG_EXPORT bool preprocess( + const TBuiltInResource* builtInResources, int defaultVersion, + EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages message, std::string* outputString, + Includer& includer); + + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + EShLanguage getStage() const { return stage; } + TIntermediate* getIntermediate() const { return intermediate; } + +protected: + TPoolAllocator* pool; + EShLanguage stage; + TCompiler* compiler; + TIntermediate* intermediate; + TInfoSink* infoSink; + // strings and lengths follow the standard for glShaderSource: + // strings is an array of numStrings pointers to string data. + // lengths can be null, but if not it is an array of numStrings + // integers containing the length of the associated strings. + // if lengths is null or lengths[n] < 0 the associated strings[n] is + // assumed to be null-terminated. + // stringNames is the optional names for all the strings. If stringNames + // is null, then none of the strings has name. If a certain element in + // stringNames is null, then the corresponding string does not have name. + const char* const* strings; // explicit code to compile, see previous comment + const int* lengths; + const char* const* stringNames; + int numStrings; // size of the above arrays + const char* preamble; // string of implicit code to compile before the explicitly provided code + + // a function in the source string can be renamed FROM this TO the name given in setEntryPoint. + std::string sourceEntryPointName; + + // overrides #version in shader source or default version if #version isn't present + int overrideVersion; + + TEnvironment environment; + + friend class TProgram; + +private: + TShader& operator=(TShader&); +}; + +#if !defined(GLSLANG_WEB) + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +// Data needed for just a single object at the granularity exchanged by the reflection API +class TObjectReflection { +public: + GLSLANG_EXPORT TObjectReflection(const std::string& pName, const TType& pType, int pOffset, int pGLDefineType, int pSize, int pIndex); + + const TType* getType() const { return type; } + GLSLANG_EXPORT int getBinding() const; + GLSLANG_EXPORT void dump() const; + static TObjectReflection badReflection() { return TObjectReflection(); } + + std::string name; + int offset; + int glDefineType; + int size; // data size in bytes for a block, array size for a (non-block) object that's an array + int index; + int counterIndex; + int numMembers; + int arrayStride; // stride of an array variable + int topLevelArraySize; // size of the top-level variable in a storage buffer member + int topLevelArrayStride; // stride of the top-level variable in a storage buffer member + EShLanguageMask stages; + +protected: + TObjectReflection() + : offset(-1), glDefineType(-1), size(-1), index(-1), counterIndex(-1), numMembers(-1), arrayStride(0), + topLevelArrayStride(0), stages(EShLanguageMask(0)), type(nullptr) + { + } + + const TType* type; +}; + +class TReflection; +class TIoMapper; +struct TVarEntryInfo; + +// Allows to customize the binding layout after linking. +// All used uniform variables will invoke at least validateBinding. +// If validateBinding returned true then the other resolveBinding, +// resolveSet, and resolveLocation are invoked to resolve the binding +// and descriptor set index respectively. +// +// Invocations happen in a particular order: +// 1) all shader inputs +// 2) all shader outputs +// 3) all uniforms with binding and set already defined +// 4) all uniforms with binding but no set defined +// 5) all uniforms with set but no binding defined +// 6) all uniforms with no binding and no set defined +// +// mapIO will use this resolver in two phases. The first +// phase is a notification phase, calling the corresponging +// notifiy callbacks, this phase ends with a call to endNotifications. +// Phase two starts directly after the call to endNotifications +// and calls all other callbacks to validate and to get the +// bindings, sets, locations, component and color indices. +// +// NOTE: that still limit checks are applied to bindings and sets +// and may result in an error. +class TIoMapResolver +{ +public: + virtual ~TIoMapResolver() {} + + // Should return true if the resulting/current binding would be okay. + // Basic idea is to do aliasing binding checks with this. + virtual bool validateBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current binding should be overridden. + // Return -1 if the current binding (including no binding) should be kept. + virtual int resolveBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current set should be overridden. + // Return -1 if the current set (including no set) should be kept. + virtual int resolveSet(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveUniformLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return true if the resulting/current setup would be okay. + // Basic idea is to do aliasing checks and reject invalid semantic names. + virtual bool validateInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current component index should be overridden. + // Return -1 if the current component index (including no index) should be kept. + virtual int resolveInOutComponent(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current color index should be overridden. + // Return -1 if the current color index (including no index) should be kept. + virtual int resolveInOutIndex(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a uniform variable + virtual void notifyBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a in or out variable + virtual void notifyInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Called by mapIO when it starts its notify pass for the given stage + virtual void beginNotifications(EShLanguage stage) = 0; + // Called by mapIO when it has finished the notify pass + virtual void endNotifications(EShLanguage stage) = 0; + // Called by mipIO when it starts its resolve pass for the given stage + virtual void beginResolve(EShLanguage stage) = 0; + // Called by mapIO when it has finished the resolve pass + virtual void endResolve(EShLanguage stage) = 0; + // Called by mapIO when it starts its symbol collect for teh given stage + virtual void beginCollect(EShLanguage stage) = 0; + // Called by mapIO when it has finished the symbol collect + virtual void endCollect(EShLanguage stage) = 0; + // Called by TSlotCollector to resolve storage locations or bindings + virtual void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by TSlotCollector to resolve resource locations or bindings + virtual void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by mapIO.addStage to set shader stage mask to mark a stage be added to this pipeline + virtual void addStage(EShLanguage stage, TIntermediate& stageIntermediate) = 0; +}; + +#endif // !GLSLANG_WEB + +// Make one TProgram per set of shaders that will get linked together. Add all +// the shaders that are to be linked together. After calling shader.parse() +// for all shaders, call link(). +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TProgram { +public: + GLSLANG_EXPORT TProgram(); + GLSLANG_EXPORT virtual ~TProgram(); + void addShader(TShader* shader) { stages[shader->stage].push_back(shader); } + std::list& getShaders(EShLanguage stage) { return stages[stage]; } + // Link Validation interface + GLSLANG_EXPORT bool link(EShMessages); + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + + TIntermediate* getIntermediate(EShLanguage stage) const { return intermediate[stage]; } + +#if !defined(GLSLANG_WEB) + + // Reflection Interface + + // call first, to do liveness analysis, index mapping, etc.; returns false on failure + GLSLANG_EXPORT bool buildReflection(int opts = EShReflectionDefault); + GLSLANG_EXPORT unsigned getLocalSize(int dim) const; // return dim'th local size + GLSLANG_EXPORT int getReflectionIndex(const char *name) const; + GLSLANG_EXPORT int getReflectionPipeIOIndex(const char* name, const bool inOrOut) const; + GLSLANG_EXPORT int getNumUniformVariables() const; + GLSLANG_EXPORT const TObjectReflection& getUniform(int index) const; + GLSLANG_EXPORT int getNumUniformBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getUniformBlock(int index) const; + GLSLANG_EXPORT int getNumPipeInputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeInput(int index) const; + GLSLANG_EXPORT int getNumPipeOutputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeOutput(int index) const; + GLSLANG_EXPORT int getNumBufferVariables() const; + GLSLANG_EXPORT const TObjectReflection& getBufferVariable(int index) const; + GLSLANG_EXPORT int getNumBufferBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getBufferBlock(int index) const; + GLSLANG_EXPORT int getNumAtomicCounters() const; + GLSLANG_EXPORT const TObjectReflection& getAtomicCounter(int index) const; + + // Legacy Reflection Interface - expressed in terms of above interface + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORMS) + int getNumLiveUniformVariables() const { return getNumUniformVariables(); } + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORM_BLOCKS) + int getNumLiveUniformBlocks() const { return getNumUniformBlocks(); } + + // can be used for glGetProgramiv(GL_ACTIVE_ATTRIBUTES) + int getNumLiveAttributes() const { return getNumPipeInputs(); } + + // can be used for glGetUniformIndices() + int getUniformIndex(const char *name) const { return getReflectionIndex(name); } + + int getPipeIOIndex(const char *name, const bool inOrOut) const + { return getReflectionPipeIOIndex(name, inOrOut); } + + // can be used for "name" part of glGetActiveUniform() + const char *getUniformName(int index) const { return getUniform(index).name.c_str(); } + + // returns the binding number + int getUniformBinding(int index) const { return getUniform(index).getBinding(); } + + // returns Shaders Stages where a Uniform is present + EShLanguageMask getUniformStages(int index) const { return getUniform(index).stages; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_BLOCK_INDEX) + int getUniformBlockIndex(int index) const { return getUniform(index).index; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_TYPE) + int getUniformType(int index) const { return getUniform(index).glDefineType; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_OFFSET) + int getUniformBufferOffset(int index) const { return getUniform(index).offset; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_SIZE) + int getUniformArraySize(int index) const { return getUniform(index).size; } + + // returns a TType* + const TType *getUniformTType(int index) const { return getUniform(index).getType(); } + + // can be used for glGetActiveUniformBlockName() + const char *getUniformBlockName(int index) const { return getUniformBlock(index).name.c_str(); } + + // can be used for glGetActiveUniformBlockiv(UNIFORM_BLOCK_DATA_SIZE) + int getUniformBlockSize(int index) const { return getUniformBlock(index).size; } + + // returns the block binding number + int getUniformBlockBinding(int index) const { return getUniformBlock(index).getBinding(); } + + // returns block index of associated counter. + int getUniformBlockCounterIndex(int index) const { return getUniformBlock(index).counterIndex; } + + // returns a TType* + const TType *getUniformBlockTType(int index) const { return getUniformBlock(index).getType(); } + + // can be used for glGetActiveAttrib() + const char *getAttributeName(int index) const { return getPipeInput(index).name.c_str(); } + + // can be used for glGetActiveAttrib() + int getAttributeType(int index) const { return getPipeInput(index).glDefineType; } + + // returns a TType* + const TType *getAttributeTType(int index) const { return getPipeInput(index).getType(); } + + GLSLANG_EXPORT void dumpReflection(); + // I/O mapping: apply base offsets and map live unbound variables + // If resolver is not provided it uses the previous approach + // and respects auto assignment and offsets. + GLSLANG_EXPORT bool mapIO(TIoMapResolver* pResolver = nullptr, TIoMapper* pIoMapper = nullptr); +#endif // !GLSLANG_WEB + +protected: + GLSLANG_EXPORT bool linkStage(EShLanguage, EShMessages); + GLSLANG_EXPORT bool crossStageCheck(EShMessages); + + TPoolAllocator* pool; + std::list stages[EShLangCount]; + TIntermediate* intermediate[EShLangCount]; + bool newedIntermediate[EShLangCount]; // track which intermediate were "new" versus reusing a singleton unit in a stage + TInfoSink* infoSink; +#if !defined(GLSLANG_WEB) + TReflection* reflection; +#endif + bool linked; + +private: + TProgram(TProgram&); + TProgram& operator=(TProgram&); +}; + +} // end namespace glslang + +#endif // _COMPILER_INTERFACE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/resource_limits_c.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/resource_limits_c.h new file mode 100644 index 0000000..05aa8eb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/Public/resource_limits_c.h @@ -0,0 +1,57 @@ +/** +BSD 2-Clause License + +Copyright (c) 2020, Travis Fort +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ + +#include "../Include/glslang_c_interface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Returns a struct that can be use to create custom resource values. +glslang_resource_t* glslang_resource(void); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +const glslang_resource_t* glslang_default_resource(void); + +// Returns the DefaultTBuiltInResource as a human-readable string. +// NOTE: User is responsible for freeing this string. +const char* glslang_default_resource_string(); + +// Decodes the resource limits from |config| to |resources|. +void glslang_decode_resource_limits(glslang_resource_t* resources, char* config); + +#ifdef __cplusplus +} +#endif + +#endif // _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.AMD.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.AMD.h new file mode 100644 index 0000000..009d2f1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.AMD.h @@ -0,0 +1,108 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextAMD_H +#define GLSLextAMD_H + +static const int GLSLextAMDVersion = 100; +static const int GLSLextAMDRevision = 7; + +// SPV_AMD_shader_ballot +static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot"; + +enum ShaderBallotAMD { + ShaderBallotBadAMD = 0, // Don't use + + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4, + + ShaderBallotCountAMD +}; + +// SPV_AMD_shader_trinary_minmax +static const char* const E_SPV_AMD_shader_trinary_minmax = "SPV_AMD_shader_trinary_minmax"; + +enum ShaderTrinaryMinMaxAMD { + ShaderTrinaryMinMaxBadAMD = 0, // Don't use + + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9, + + ShaderTrinaryMinMaxCountAMD +}; + +// SPV_AMD_shader_explicit_vertex_parameter +static const char* const E_SPV_AMD_shader_explicit_vertex_parameter = "SPV_AMD_shader_explicit_vertex_parameter"; + +enum ShaderExplicitVertexParameterAMD { + ShaderExplicitVertexParameterBadAMD = 0, // Don't use + + InterpolateAtVertexAMD = 1, + + ShaderExplicitVertexParameterCountAMD +}; + +// SPV_AMD_gcn_shader +static const char* const E_SPV_AMD_gcn_shader = "SPV_AMD_gcn_shader"; + +enum GcnShaderAMD { + GcnShaderBadAMD = 0, // Don't use + + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3, + + GcnShaderCountAMD +}; + +// SPV_AMD_gpu_shader_half_float +static const char* const E_SPV_AMD_gpu_shader_half_float = "SPV_AMD_gpu_shader_half_float"; + +// SPV_AMD_texture_gather_bias_lod +static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_gather_bias_lod"; + +// SPV_AMD_gpu_shader_int16 +static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16"; + +// SPV_AMD_shader_image_load_store_lod +static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod"; + +// SPV_AMD_shader_fragment_mask +static const char* const E_SPV_AMD_shader_fragment_mask = "SPV_AMD_shader_fragment_mask"; + +// SPV_AMD_gpu_shader_half_float_fetch +static const char* const E_SPV_AMD_gpu_shader_half_float_fetch = "SPV_AMD_gpu_shader_half_float_fetch"; + +#endif // #ifndef GLSLextAMD_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.EXT.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.EXT.h new file mode 100644 index 0000000..a247b4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.EXT.h @@ -0,0 +1,44 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextEXT_H +#define GLSLextEXT_H + +static const int GLSLextEXTVersion = 100; +static const int GLSLextEXTRevision = 2; + +static const char* const E_SPV_EXT_shader_stencil_export = "SPV_EXT_shader_stencil_export"; +static const char* const E_SPV_EXT_shader_viewport_index_layer = "SPV_EXT_shader_viewport_index_layer"; +static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fully_covered"; +static const char* const E_SPV_EXT_fragment_invocation_density = "SPV_EXT_fragment_invocation_density"; +static const char* const E_SPV_EXT_demote_to_helper_invocation = "SPV_EXT_demote_to_helper_invocation"; +static const char* const E_SPV_EXT_shader_atomic_float_add = "SPV_EXT_shader_atomic_float_add"; +static const char* const E_SPV_EXT_shader_atomic_float16_add = "SPV_EXT_shader_atomic_float16_add"; +static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader_atomic_float_min_max"; +static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64"; +static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader"; + +#endif // #ifndef GLSLextEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.KHR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.KHR.h new file mode 100644 index 0000000..d5c670f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.KHR.h @@ -0,0 +1,58 @@ +/* +** Copyright (c) 2014-2020 The Khronos Group Inc. +** Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextKHR_H +#define GLSLextKHR_H + +static const int GLSLextKHRVersion = 100; +static const int GLSLextKHRRevision = 3; + +static const char* const E_SPV_KHR_shader_ballot = "SPV_KHR_shader_ballot"; +static const char* const E_SPV_KHR_subgroup_vote = "SPV_KHR_subgroup_vote"; +static const char* const E_SPV_KHR_device_group = "SPV_KHR_device_group"; +static const char* const E_SPV_KHR_multiview = "SPV_KHR_multiview"; +static const char* const E_SPV_KHR_shader_draw_parameters = "SPV_KHR_shader_draw_parameters"; +static const char* const E_SPV_KHR_16bit_storage = "SPV_KHR_16bit_storage"; +static const char* const E_SPV_KHR_8bit_storage = "SPV_KHR_8bit_storage"; +static const char* const E_SPV_KHR_storage_buffer_storage_class = "SPV_KHR_storage_buffer_storage_class"; +static const char* const E_SPV_KHR_post_depth_coverage = "SPV_KHR_post_depth_coverage"; +static const char* const E_SPV_KHR_vulkan_memory_model = "SPV_KHR_vulkan_memory_model"; +static const char* const E_SPV_EXT_physical_storage_buffer = "SPV_EXT_physical_storage_buffer"; +static const char* const E_SPV_KHR_physical_storage_buffer = "SPV_KHR_physical_storage_buffer"; +static const char* const E_SPV_EXT_fragment_shader_interlock = "SPV_EXT_fragment_shader_interlock"; +static const char* const E_SPV_KHR_shader_clock = "SPV_KHR_shader_clock"; +static const char* const E_SPV_KHR_non_semantic_info = "SPV_KHR_non_semantic_info"; +static const char* const E_SPV_KHR_ray_tracing = "SPV_KHR_ray_tracing"; +static const char* const E_SPV_KHR_ray_query = "SPV_KHR_ray_query"; +static const char* const E_SPV_KHR_fragment_shading_rate = "SPV_KHR_fragment_shading_rate"; +static const char* const E_SPV_KHR_terminate_invocation = "SPV_KHR_terminate_invocation"; +static const char* const E_SPV_KHR_workgroup_memory_explicit_layout = "SPV_KHR_workgroup_memory_explicit_layout"; +static const char* const E_SPV_KHR_subgroup_uniform_control_flow = "SPV_KHR_subgroup_uniform_control_flow"; +static const char* const E_SPV_KHR_fragment_shader_barycentric = "SPV_KHR_fragment_shader_barycentric"; +static const char* const E_SPV_AMD_shader_early_and_late_fragment_tests = "SPV_AMD_shader_early_and_late_fragment_tests"; + +#endif // #ifndef GLSLextKHR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.NV.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.NV.h new file mode 100644 index 0000000..93c98bf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.ext.NV.h @@ -0,0 +1,84 @@ +/* +** Copyright (c) 2014-2017 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextNV_H +#define GLSLextNV_H + +enum BuiltIn; +enum Decoration; +enum Op; +enum Capability; + +static const int GLSLextNVVersion = 100; +static const int GLSLextNVRevision = 11; + +//SPV_NV_sample_mask_override_coverage +const char* const E_SPV_NV_sample_mask_override_coverage = "SPV_NV_sample_mask_override_coverage"; + +//SPV_NV_geometry_shader_passthrough +const char* const E_SPV_NV_geometry_shader_passthrough = "SPV_NV_geometry_shader_passthrough"; + +//SPV_NV_viewport_array2 +const char* const E_SPV_NV_viewport_array2 = "SPV_NV_viewport_array2"; +const char* const E_ARB_shader_viewport_layer_array = "SPV_ARB_shader_viewport_layer_array"; + +//SPV_NV_stereo_view_rendering +const char* const E_SPV_NV_stereo_view_rendering = "SPV_NV_stereo_view_rendering"; + +//SPV_NVX_multiview_per_view_attributes +const char* const E_SPV_NVX_multiview_per_view_attributes = "SPV_NVX_multiview_per_view_attributes"; + +//SPV_NV_shader_subgroup_partitioned +const char* const E_SPV_NV_shader_subgroup_partitioned = "SPV_NV_shader_subgroup_partitioned"; + +//SPV_NV_fragment_shader_barycentric +const char* const E_SPV_NV_fragment_shader_barycentric = "SPV_NV_fragment_shader_barycentric"; + +//SPV_NV_compute_shader_derivatives +const char* const E_SPV_NV_compute_shader_derivatives = "SPV_NV_compute_shader_derivatives"; + +//SPV_NV_shader_image_footprint +const char* const E_SPV_NV_shader_image_footprint = "SPV_NV_shader_image_footprint"; + +//SPV_NV_mesh_shader +const char* const E_SPV_NV_mesh_shader = "SPV_NV_mesh_shader"; + +//SPV_NV_raytracing +const char* const E_SPV_NV_ray_tracing = "SPV_NV_ray_tracing"; + +//SPV_NV_ray_tracing_motion_blur +const char* const E_SPV_NV_ray_tracing_motion_blur = "SPV_NV_ray_tracing_motion_blur"; + +//SPV_NV_shading_rate +const char* const E_SPV_NV_shading_rate = "SPV_NV_shading_rate"; + +//SPV_NV_cooperative_matrix +const char* const E_SPV_NV_cooperative_matrix = "SPV_NV_cooperative_matrix"; + +//SPV_NV_shader_sm_builtins +const char* const E_SPV_NV_shader_sm_builtins = "SPV_NV_shader_sm_builtins"; + +#endif // #ifndef GLSLextNV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.std.450.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.std.450.h new file mode 100644 index 0000000..df31092 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GlslangToSpv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GlslangToSpv.h new file mode 100644 index 0000000..3907be4 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/GlslangToSpv.h @@ -0,0 +1,61 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' +#endif + +#include "SpvTools.h" +#include "glslang/Include/intermediate.h" + +#include +#include + +#include "Logger.h" + +namespace glslang { + +void GetSpirvVersion(std::string&); +int GetSpirvGeneratorVersion(); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + SpvOptions* options = nullptr); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger* logger, SpvOptions* options = nullptr); +void OutputSpvBin(const std::vector& spirv, const char* baseName); +void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName); + +} diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/Logger.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/Logger.h new file mode 100644 index 0000000..411367c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/Logger.h @@ -0,0 +1,83 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_SPIRV_LOGGER_H +#define GLSLANG_SPIRV_LOGGER_H + +#include +#include + +namespace spv { + +// A class for holding all SPIR-V build status messages, including +// missing/TBD functionalities, warnings, and errors. +class SpvBuildLogger { +public: + SpvBuildLogger() {} + +#ifdef GLSLANG_WEB + void tbdFunctionality(const std::string& f) { } + void missingFunctionality(const std::string& f) { } + void warning(const std::string& w) { } + void error(const std::string& e) { errors.push_back(e); } + std::string getAllMessages() { return ""; } +#else + + // Registers a TBD functionality. + void tbdFunctionality(const std::string& f); + // Registers a missing functionality. + void missingFunctionality(const std::string& f); + + // Logs a warning. + void warning(const std::string& w) { warnings.push_back(w); } + // Logs an error. + void error(const std::string& e) { errors.push_back(e); } + + // Returns all messages accumulated in the order of: + // TBD functionalities, missing functionalities, warnings, errors. + std::string getAllMessages() const; +#endif + +private: + SpvBuildLogger(const SpvBuildLogger&); + + std::vector tbdFeatures; + std::vector missingFeatures; + std::vector warnings; + std::vector errors; +}; + +} // end spv namespace + +#endif // GLSLANG_SPIRV_LOGGER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/NonSemanticDebugPrintf.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/NonSemanticDebugPrintf.h new file mode 100644 index 0000000..83796d7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/NonSemanticDebugPrintf.h @@ -0,0 +1,50 @@ +// Copyright (c) 2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and/or associated documentation files (the +// "Materials"), to deal in the Materials without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Materials, and to +// permit persons to whom the Materials are furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS +// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS +// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT +// https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +// + +#ifndef SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ +#define SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticDebugPrintfRevision = 1, + NonSemanticDebugPrintfRevision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticDebugPrintfInstructions { + NonSemanticDebugPrintfDebugPrintf = 1, + NonSemanticDebugPrintfInstructionsMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h new file mode 100644 index 0000000..c52f32f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h @@ -0,0 +1,171 @@ +// Copyright (c) 2018 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +#ifndef SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ +#define SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticShaderDebugInfo100Version = 100, + NonSemanticShaderDebugInfo100Version_BitWidthPadding = 0x7fffffff +}; +enum { + NonSemanticShaderDebugInfo100Revision = 6, + NonSemanticShaderDebugInfo100Revision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100Instructions { + NonSemanticShaderDebugInfo100DebugInfoNone = 0, + NonSemanticShaderDebugInfo100DebugCompilationUnit = 1, + NonSemanticShaderDebugInfo100DebugTypeBasic = 2, + NonSemanticShaderDebugInfo100DebugTypePointer = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifier = 4, + NonSemanticShaderDebugInfo100DebugTypeArray = 5, + NonSemanticShaderDebugInfo100DebugTypeVector = 6, + NonSemanticShaderDebugInfo100DebugTypedef = 7, + NonSemanticShaderDebugInfo100DebugTypeFunction = 8, + NonSemanticShaderDebugInfo100DebugTypeEnum = 9, + NonSemanticShaderDebugInfo100DebugTypeComposite = 10, + NonSemanticShaderDebugInfo100DebugTypeMember = 11, + NonSemanticShaderDebugInfo100DebugTypeInheritance = 12, + NonSemanticShaderDebugInfo100DebugTypePtrToMember = 13, + NonSemanticShaderDebugInfo100DebugTypeTemplate = 14, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameter = 15, + NonSemanticShaderDebugInfo100DebugTypeTemplateTemplateParameter = 16, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameterPack = 17, + NonSemanticShaderDebugInfo100DebugGlobalVariable = 18, + NonSemanticShaderDebugInfo100DebugFunctionDeclaration = 19, + NonSemanticShaderDebugInfo100DebugFunction = 20, + NonSemanticShaderDebugInfo100DebugLexicalBlock = 21, + NonSemanticShaderDebugInfo100DebugLexicalBlockDiscriminator = 22, + NonSemanticShaderDebugInfo100DebugScope = 23, + NonSemanticShaderDebugInfo100DebugNoScope = 24, + NonSemanticShaderDebugInfo100DebugInlinedAt = 25, + NonSemanticShaderDebugInfo100DebugLocalVariable = 26, + NonSemanticShaderDebugInfo100DebugInlinedVariable = 27, + NonSemanticShaderDebugInfo100DebugDeclare = 28, + NonSemanticShaderDebugInfo100DebugValue = 29, + NonSemanticShaderDebugInfo100DebugOperation = 30, + NonSemanticShaderDebugInfo100DebugExpression = 31, + NonSemanticShaderDebugInfo100DebugMacroDef = 32, + NonSemanticShaderDebugInfo100DebugMacroUndef = 33, + NonSemanticShaderDebugInfo100DebugImportedEntity = 34, + NonSemanticShaderDebugInfo100DebugSource = 35, + NonSemanticShaderDebugInfo100DebugFunctionDefinition = 101, + NonSemanticShaderDebugInfo100DebugSourceContinued = 102, + NonSemanticShaderDebugInfo100DebugLine = 103, + NonSemanticShaderDebugInfo100DebugNoLine = 104, + NonSemanticShaderDebugInfo100DebugBuildIdentifier = 105, + NonSemanticShaderDebugInfo100DebugStoragePath = 106, + NonSemanticShaderDebugInfo100DebugEntryPoint = 107, + NonSemanticShaderDebugInfo100DebugTypeMatrix = 108, + NonSemanticShaderDebugInfo100InstructionsMax = 0x7fffffff +}; + + +enum NonSemanticShaderDebugInfo100DebugInfoFlags { + NonSemanticShaderDebugInfo100None = 0x0000, + NonSemanticShaderDebugInfo100FlagIsProtected = 0x01, + NonSemanticShaderDebugInfo100FlagIsPrivate = 0x02, + NonSemanticShaderDebugInfo100FlagIsPublic = 0x03, + NonSemanticShaderDebugInfo100FlagIsLocal = 0x04, + NonSemanticShaderDebugInfo100FlagIsDefinition = 0x08, + NonSemanticShaderDebugInfo100FlagFwdDecl = 0x10, + NonSemanticShaderDebugInfo100FlagArtificial = 0x20, + NonSemanticShaderDebugInfo100FlagExplicit = 0x40, + NonSemanticShaderDebugInfo100FlagPrototyped = 0x80, + NonSemanticShaderDebugInfo100FlagObjectPointer = 0x100, + NonSemanticShaderDebugInfo100FlagStaticMember = 0x200, + NonSemanticShaderDebugInfo100FlagIndirectVariable = 0x400, + NonSemanticShaderDebugInfo100FlagLValueReference = 0x800, + NonSemanticShaderDebugInfo100FlagRValueReference = 0x1000, + NonSemanticShaderDebugInfo100FlagIsOptimized = 0x2000, + NonSemanticShaderDebugInfo100FlagIsEnumClass = 0x4000, + NonSemanticShaderDebugInfo100FlagTypePassByValue = 0x8000, + NonSemanticShaderDebugInfo100FlagTypePassByReference = 0x10000, + NonSemanticShaderDebugInfo100FlagUnknownPhysicalLayout = 0x20000, + NonSemanticShaderDebugInfo100DebugInfoFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100BuildIdentifierFlags { + NonSemanticShaderDebugInfo100IdentifierPossibleDuplicates = 0x01, + NonSemanticShaderDebugInfo100BuildIdentifierFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncoding { + NonSemanticShaderDebugInfo100Unspecified = 0, + NonSemanticShaderDebugInfo100Address = 1, + NonSemanticShaderDebugInfo100Boolean = 2, + NonSemanticShaderDebugInfo100Float = 3, + NonSemanticShaderDebugInfo100Signed = 4, + NonSemanticShaderDebugInfo100SignedChar = 5, + NonSemanticShaderDebugInfo100Unsigned = 6, + NonSemanticShaderDebugInfo100UnsignedChar = 7, + NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncodingMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugCompositeType { + NonSemanticShaderDebugInfo100Class = 0, + NonSemanticShaderDebugInfo100Structure = 1, + NonSemanticShaderDebugInfo100Union = 2, + NonSemanticShaderDebugInfo100DebugCompositeTypeMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugTypeQualifier { + NonSemanticShaderDebugInfo100ConstType = 0, + NonSemanticShaderDebugInfo100VolatileType = 1, + NonSemanticShaderDebugInfo100RestrictType = 2, + NonSemanticShaderDebugInfo100AtomicType = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifierMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugOperation { + NonSemanticShaderDebugInfo100Deref = 0, + NonSemanticShaderDebugInfo100Plus = 1, + NonSemanticShaderDebugInfo100Minus = 2, + NonSemanticShaderDebugInfo100PlusUconst = 3, + NonSemanticShaderDebugInfo100BitPiece = 4, + NonSemanticShaderDebugInfo100Swap = 5, + NonSemanticShaderDebugInfo100Xderef = 6, + NonSemanticShaderDebugInfo100StackValue = 7, + NonSemanticShaderDebugInfo100Constu = 8, + NonSemanticShaderDebugInfo100Fragment = 9, + NonSemanticShaderDebugInfo100DebugOperationMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugImportedEntity { + NonSemanticShaderDebugInfo100ImportedModule = 0, + NonSemanticShaderDebugInfo100ImportedDeclaration = 1, + NonSemanticShaderDebugInfo100DebugImportedEntityMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SPVRemapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SPVRemapper.h new file mode 100644 index 0000000..d216946 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SPVRemapper.h @@ -0,0 +1,312 @@ +// +// Copyright (C) 2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef SPIRVREMAPPER_H +#define SPIRVREMAPPER_H + +#include +#include +#include +#include + +namespace spv { + +// MSVC defines __cplusplus as an older value, even when it supports almost all of 11. +// We handle that here by making our own symbol. +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1700) +# define use_cpp11 1 +#endif + +class spirvbin_base_t +{ +public: + enum Options { + NONE = 0, + STRIP = (1<<0), + MAP_TYPES = (1<<1), + MAP_NAMES = (1<<2), + MAP_FUNCS = (1<<3), + DCE_FUNCS = (1<<4), + DCE_VARS = (1<<5), + DCE_TYPES = (1<<6), + OPT_LOADSTORE = (1<<7), + OPT_FWD_LS = (1<<8), // EXPERIMENTAL: PRODUCES INVALID SCHEMA-0 SPIRV + MAP_ALL = (MAP_TYPES | MAP_NAMES | MAP_FUNCS), + DCE_ALL = (DCE_FUNCS | DCE_VARS | DCE_TYPES), + OPT_ALL = (OPT_LOADSTORE), + + ALL_BUT_STRIP = (MAP_ALL | DCE_ALL | OPT_ALL), + DO_EVERYTHING = (STRIP | ALL_BUT_STRIP) + }; +}; + +} // namespace SPV + +#if !defined (use_cpp11) +#include +#include + +namespace spv { +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int /*verbose = 0*/) { } + + void remap(std::vector& /*spv*/, unsigned int /*opts = 0*/) + { + printf("Tool not compiled for C++11, which is required for SPIR-V remapping.\n"); + exit(5); + } +}; + +} // namespace SPV + +#else // defined (use_cpp11) + +#include +#include +#include +#include +#include +#include +#include + +#include "spirv.hpp" +#include "spvIR.h" + +namespace spv { + +// class to hold SPIR-V binary data for remapping, DCE, and debug stripping +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int verbose = 0) : entryPoint(spv::NoResult), largestNewId(0), verbose(verbose), errorLatch(false) + { } + + virtual ~spirvbin_t() { } + + // remap on an existing binary in memory + void remap(std::vector& spv, const std::vector& whiteListStrings, + std::uint32_t opts = DO_EVERYTHING); + + // remap on an existing binary in memory - legacy interface without white list + void remap(std::vector& spv, std::uint32_t opts = DO_EVERYTHING); + + // Type for error/log handler functions + typedef std::function errorfn_t; + typedef std::function logfn_t; + + // Register error/log handling functions (can be lambda fn / functor / etc) + static void registerErrorHandler(errorfn_t handler) { errorHandler = handler; } + static void registerLogHandler(logfn_t handler) { logHandler = handler; } + +protected: + // This can be overridden to provide other message behavior if needed + virtual void msg(int minVerbosity, int indent, const std::string& txt) const; + +private: + // Local to global, or global to local ID map + typedef std::unordered_map idmap_t; + typedef std::unordered_set idset_t; + typedef std::unordered_map blockmap_t; + + void remap(std::uint32_t opts = DO_EVERYTHING); + + // Map of names to IDs + typedef std::unordered_map namemap_t; + + typedef std::uint32_t spirword_t; + + typedef std::pair range_t; + typedef std::function idfn_t; + typedef std::function instfn_t; + + // Special Values for ID map: + static const spv::Id unmapped; // unchanged from default value + static const spv::Id unused; // unused ID + static const int header_size; // SPIR header = 5 words + + class id_iterator_t; + + // For mapping type entries between different shaders + typedef std::vector typeentry_t; + typedef std::map globaltypes_t; + + // A set that preserves position order, and a reverse map + typedef std::set posmap_t; + typedef std::unordered_map posmap_rev_t; + + // Maps and ID to the size of its base type, if known. + typedef std::unordered_map typesize_map_t; + + // handle error + void error(const std::string& txt) const { errorLatch = true; errorHandler(txt); } + + bool isConstOp(spv::Op opCode) const; + bool isTypeOp(spv::Op opCode) const; + bool isStripOp(spv::Op opCode) const; + bool isFlowCtrl(spv::Op opCode) const; + range_t literalRange(spv::Op opCode) const; + range_t typeRange(spv::Op opCode) const; + range_t constRange(spv::Op opCode) const; + unsigned typeSizeInWords(spv::Id id) const; + unsigned idTypeSizeInWords(spv::Id id) const; + + bool isStripOp(spv::Op opCode, unsigned start) const; + + spv::Id& asId(unsigned word) { return spv[word]; } + const spv::Id& asId(unsigned word) const { return spv[word]; } + spv::Op asOpCode(unsigned word) const { return opOpCode(spv[word]); } + std::uint32_t asOpCodeHash(unsigned word); + spv::Decoration asDecoration(unsigned word) const { return spv::Decoration(spv[word]); } + unsigned asWordCount(unsigned word) const { return opWordCount(spv[word]); } + spv::Id asTypeConstId(unsigned word) const { return asId(word + (isTypeOp(asOpCode(word)) ? 1 : 2)); } + unsigned idPos(spv::Id id) const; + + static unsigned opWordCount(spirword_t data) { return data >> spv::WordCountShift; } + static spv::Op opOpCode(spirword_t data) { return spv::Op(data & spv::OpCodeMask); } + + // Header access & set methods + spirword_t magic() const { return spv[0]; } // return magic number + spirword_t bound() const { return spv[3]; } // return Id bound from header + spirword_t bound(spirword_t b) { return spv[3] = b; } + spirword_t genmagic() const { return spv[2]; } // generator magic + spirword_t genmagic(spirword_t m) { return spv[2] = m; } + spirword_t schemaNum() const { return spv[4]; } // schema number from header + + // Mapping fns: get + spv::Id localId(spv::Id id) const { return idMapL[id]; } + + // Mapping fns: set + inline spv::Id localId(spv::Id id, spv::Id newId); + void countIds(spv::Id id); + + // Return next unused new local ID. + // NOTE: boost::dynamic_bitset would be more efficient due to find_next(), + // which std::vector doens't have. + inline spv::Id nextUnusedId(spv::Id id); + + void buildLocalMaps(); + std::string literalString(unsigned word) const; // Return literal as a std::string + int literalStringWords(const std::string& str) const { return (int(str.size())+4)/4; } + + bool isNewIdMapped(spv::Id newId) const { return isMapped(newId); } + bool isOldIdUnmapped(spv::Id oldId) const { return localId(oldId) == unmapped; } + bool isOldIdUnused(spv::Id oldId) const { return localId(oldId) == unused; } + bool isOldIdMapped(spv::Id oldId) const { return !isOldIdUnused(oldId) && !isOldIdUnmapped(oldId); } + bool isFunction(spv::Id oldId) const { return fnPos.find(oldId) != fnPos.end(); } + + // bool matchType(const globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const; + // spv::Id findType(const globaltypes_t& globalTypes, spv::Id lt) const; + std::uint32_t hashType(unsigned typeStart) const; + + spirvbin_t& process(instfn_t, idfn_t, unsigned begin = 0, unsigned end = 0); + int processInstruction(unsigned word, instfn_t, idfn_t); + + void validate() const; + void mapTypeConst(); + void mapFnBodies(); + void optLoadStore(); + void dceFuncs(); + void dceVars(); + void dceTypes(); + void mapNames(); + void foldIds(); // fold IDs to smallest space + void forwardLoadStores(); // load store forwarding (EXPERIMENTAL) + void offsetIds(); // create relative offset IDs + + void applyMap(); // remap per local name map + void mapRemainder(); // map any IDs we haven't touched yet + void stripDebug(); // strip all debug info + void stripDeadRefs(); // strips debug info for now-dead references after DCE + void strip(); // remove debug symbols + + std::vector spv; // SPIR words + + std::vector stripWhiteList; + + namemap_t nameMap; // ID names from OpName + + // Since we want to also do binary ops, we can't use std::vector. we could use + // boost::dynamic_bitset, but we're trying to avoid a boost dependency. + typedef std::uint64_t bits_t; + std::vector mapped; // which new IDs have been mapped + static const int mBits = sizeof(bits_t) * 4; + + bool isMapped(spv::Id id) const { return id < maxMappedId() && ((mapped[id/mBits] & (1LL<<(id%mBits))) != 0); } + void setMapped(spv::Id id) { resizeMapped(id); mapped[id/mBits] |= (1LL<<(id%mBits)); } + void resizeMapped(spv::Id id) { if (id >= maxMappedId()) mapped.resize(id/mBits+1, 0); } + size_t maxMappedId() const { return mapped.size() * mBits; } + + // Add a strip range for a given instruction starting at 'start' + // Note: avoiding brace initializers to please older versions os MSVC. + void stripInst(unsigned start) { stripRange.push_back(range_t(start, start + asWordCount(start))); } + + // Function start and end. use unordered_map because we'll have + // many fewer functions than IDs. + std::unordered_map fnPos; + + // Which functions are called, anywhere in the module, with a call count + std::unordered_map fnCalls; + + posmap_t typeConstPos; // word positions that define types & consts (ordered) + posmap_rev_t idPosR; // reverse map from IDs to positions + typesize_map_t idTypeSizeMap; // maps each ID to its type size, if known. + + std::vector idMapL; // ID {M}ap from {L}ocal to {G}lobal IDs + + spv::Id entryPoint; // module entry point + spv::Id largestNewId; // biggest new ID we have mapped anything to + + // Sections of the binary to strip, given as [begin,end) + std::vector stripRange; + + // processing options: + std::uint32_t options; + int verbose; // verbosity level + + // Error latch: this is set if the error handler is ever executed. It would be better to + // use a try/catch block and throw, but that's not desired for certain environments, so + // this is the alternative. + mutable bool errorLatch; + + static errorfn_t errorHandler; + static logfn_t logHandler; +}; + +} // namespace SPV + +#endif // defined (use_cpp11) +#endif // SPIRVREMAPPER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SpvBuilder.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SpvBuilder.h new file mode 100644 index 0000000..f7fdc6a --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SpvBuilder.h @@ -0,0 +1,959 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// Copyright (C) 2015-2020 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// "Builder" is an interface to fully build SPIR-V IR. Allocate one of +// these to build (a thread safe) internal SPIR-V representation (IR), +// and then dump it as a binary stream according to the SPIR-V specification. +// +// A Builder has a 1:1 relationship with a SPIR-V module. +// + +#pragma once +#ifndef SpvBuilder_H +#define SpvBuilder_H + +#include "Logger.h" +#include "spirv.hpp" +#include "spvIR.h" +namespace spv { + #include "GLSL.ext.KHR.h" + #include "NonSemanticShaderDebugInfo100.h" +} + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +typedef enum { + Spv_1_0 = (1 << 16), + Spv_1_1 = (1 << 16) | (1 << 8), + Spv_1_2 = (1 << 16) | (2 << 8), + Spv_1_3 = (1 << 16) | (3 << 8), + Spv_1_4 = (1 << 16) | (4 << 8), + Spv_1_5 = (1 << 16) | (5 << 8), +} SpvVersion; + +class Builder { +public: + Builder(unsigned int spvVersion, unsigned int userNumber, SpvBuildLogger* logger); + virtual ~Builder(); + + static const int maxMatrixSize = 4; + + unsigned int getSpvVersion() const { return spvVersion; } + + void setSource(spv::SourceLanguage lang, int version) + { + sourceLang = lang; + sourceVersion = version; + } + spv::Id getStringId(const std::string& str) + { + auto sItr = stringIds.find(str); + if (sItr != stringIds.end()) + return sItr->second; + spv::Id strId = getUniqueId(); + Instruction* fileString = new Instruction(strId, NoType, OpString); + const char* file_c_str = str.c_str(); + fileString->addStringOperand(file_c_str); + strings.push_back(std::unique_ptr(fileString)); + module.mapInstruction(fileString); + stringIds[file_c_str] = strId; + return strId; + } + spv::Id getSourceFile() const + { + return sourceFileStringId; + } + void setSourceFile(const std::string& file) + { + sourceFileStringId = getStringId(file); + currentFileId = sourceFileStringId; + } + void setSourceText(const std::string& text) { sourceText = text; } + void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); } + void addModuleProcessed(const std::string& p) { moduleProcesses.push_back(p.c_str()); } + void setEmitOpLines() { emitOpLines = true; } + void setEmitNonSemanticShaderDebugInfo(bool const emit) + { + emitNonSemanticShaderDebugInfo = emit; + + if(emit) + { + importNonSemanticShaderDebugInfoInstructions(); + } + } + void setEmitNonSemanticShaderDebugSource(bool const src) + { + emitNonSemanticShaderDebugSource = src; + } + void addExtension(const char* ext) { extensions.insert(ext); } + void removeExtension(const char* ext) + { + extensions.erase(ext); + } + void addIncorporatedExtension(const char* ext, SpvVersion incorporatedVersion) + { + if (getSpvVersion() < static_cast(incorporatedVersion)) + addExtension(ext); + } + void promoteIncorporatedExtension(const char* baseExt, const char* promoExt, SpvVersion incorporatedVersion) + { + removeExtension(baseExt); + addIncorporatedExtension(promoExt, incorporatedVersion); + } + void addInclude(const std::string& name, const std::string& text) + { + spv::Id incId = getStringId(name); + includeFiles[incId] = &text; + } + Id import(const char*); + void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem) + { + addressModel = addr; + memoryModel = mem; + } + + void addCapability(spv::Capability cap) { capabilities.insert(cap); } + + // To get a new for anything needing a new one. + Id getUniqueId() { return ++uniqueId; } + + // To get a set of new s, e.g., for a set of function parameters + Id getUniqueIds(int numIds) + { + Id id = uniqueId + 1; + uniqueId += numIds; + return id; + } + + // Generate OpLine for non-filename-based #line directives (ie no filename + // seen yet): Log the current line, and if different than the last one, + // issue a new OpLine using the new line and current source file name. + void setLine(int line); + + // If filename null, generate OpLine for non-filename-based line directives, + // else do filename-based: Log the current line and file, and if different + // than the last one, issue a new OpLine using the new line and file + // name. + void setLine(int line, const char* filename); + // Low-level OpLine. See setLine() for a layered helper. + void addLine(Id fileName, int line, int column); + void addDebugScopeAndLine(Id fileName, int line, int column); + + // For creating new types (will return old type if the requested one was already made). + Id makeVoidType(); + Id makeBoolType(bool const compilerGenerated = true); + Id makePointer(StorageClass, Id pointee); + Id makeForwardPointer(StorageClass); + Id makePointerFromForwardPointer(StorageClass, Id forwardPointerType, Id pointee); + Id makeIntegerType(int width, bool hasSign); // generic + Id makeIntType(int width) { return makeIntegerType(width, true); } + Id makeUintType(int width) { return makeIntegerType(width, false); } + Id makeFloatType(int width); + Id makeStructType(const std::vector& members, const char* name, bool const compilerGenerated = true); + Id makeStructResultType(Id type0, Id type1); + Id makeVectorType(Id component, int size); + Id makeMatrixType(Id component, int cols, int rows); + Id makeArrayType(Id element, Id sizeId, int stride); // 0 stride means no stride decoration + Id makeRuntimeArray(Id element); + Id makeFunctionType(Id returnType, const std::vector& paramTypes); + Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format); + Id makeSamplerType(); + Id makeSampledImageType(Id imageType); + Id makeCooperativeMatrixType(Id component, Id scope, Id rows, Id cols); + Id makeGenericType(spv::Op opcode, std::vector& operands); + + // SPIR-V NonSemantic Shader DebugInfo Instructions + struct DebugTypeLoc { + std::string name {}; + int line {0}; + int column {0}; + }; + std::unordered_map debugTypeLocs; + Id makeDebugInfoNone(); + Id makeBoolDebugType(int const size); + Id makeIntegerDebugType(int const width, bool const hasSign); + Id makeFloatDebugType(int const width); + Id makeSequentialDebugType(Id const baseType, Id const componentCount, NonSemanticShaderDebugInfo100Instructions const sequenceType); + Id makeArrayDebugType(Id const baseType, Id const componentCount); + Id makeVectorDebugType(Id const baseType, int const componentCount); + Id makeMatrixDebugType(Id const vectorType, int const vectorCount, bool columnMajor = true); + Id makeMemberDebugType(Id const memberType, DebugTypeLoc const& debugTypeLoc); + Id makeCompositeDebugType(std::vector const& memberTypes, char const*const name, + NonSemanticShaderDebugInfo100DebugCompositeType const tag, bool const isOpaqueType = false); + Id makeDebugSource(const Id fileName); + Id makeDebugCompilationUnit(); + Id createDebugGlobalVariable(Id const type, char const*const name, Id const variable); + Id createDebugLocalVariable(Id type, char const*const name, size_t const argNumber = 0); + Id makeDebugExpression(); + Id makeDebugDeclare(Id const debugLocalVariable, Id const localVariable); + Id makeDebugValue(Id const debugLocalVariable, Id const value); + Id makeDebugFunctionType(Id returnType, const std::vector& paramTypes); + Id makeDebugFunction(Function* function, Id nameId, Id funcTypeId); + Id makeDebugLexicalBlock(uint32_t line); + std::string unmangleFunctionName(std::string const& name) const; + + // accelerationStructureNV type + Id makeAccelerationStructureType(); + // rayQueryEXT type + Id makeRayQueryType(); + + // For querying about types. + Id getTypeId(Id resultId) const { return module.getTypeId(resultId); } + Id getDerefTypeId(Id resultId) const; + Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); } + Op getTypeClass(Id typeId) const { return getOpCode(typeId); } + Op getMostBasicTypeClass(Id typeId) const; + int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); } + int getNumTypeConstituents(Id typeId) const; + int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); } + Id getScalarTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId, int) const; + StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); } + ImageFormat getImageTypeFormat(Id typeId) const + { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); } + Id getResultingAccessChainType() const; + + bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); } + bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); } + bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); } + bool isMatrix(Id resultId) const { return isMatrixType(getTypeId(resultId)); } + bool isCooperativeMatrix(Id resultId)const { return isCooperativeMatrixType(getTypeId(resultId)); } + bool isAggregate(Id resultId) const { return isAggregateType(getTypeId(resultId)); } + bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); } + + bool isBoolType(Id typeId) + { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); } + bool isIntType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) != 0; } + bool isUintType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) == 0; } + bool isFloatType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat; } + bool isPointerType(Id typeId) const { return getTypeClass(typeId) == OpTypePointer; } + bool isScalarType(Id typeId) const + { return getTypeClass(typeId) == OpTypeFloat || getTypeClass(typeId) == OpTypeInt || + getTypeClass(typeId) == OpTypeBool; } + bool isVectorType(Id typeId) const { return getTypeClass(typeId) == OpTypeVector; } + bool isMatrixType(Id typeId) const { return getTypeClass(typeId) == OpTypeMatrix; } + bool isStructType(Id typeId) const { return getTypeClass(typeId) == OpTypeStruct; } + bool isArrayType(Id typeId) const { return getTypeClass(typeId) == OpTypeArray; } +#ifdef GLSLANG_WEB + bool isCooperativeMatrixType(Id typeId)const { return false; } +#else + bool isCooperativeMatrixType(Id typeId)const { return getTypeClass(typeId) == OpTypeCooperativeMatrixNV; } +#endif + bool isAggregateType(Id typeId) const + { return isArrayType(typeId) || isStructType(typeId) || isCooperativeMatrixType(typeId); } + bool isImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeImage; } + bool isSamplerType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampler; } + bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; } + bool containsType(Id typeId, Op typeOp, unsigned int width) const; + bool containsPhysicalStorageBufferOrArray(Id typeId) const; + + bool isConstantOpCode(Op opcode) const; + bool isSpecConstantOpCode(Op opcode) const; + bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); } + bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; } + bool isSpecConstant(Id resultId) const { return isSpecConstantOpCode(getOpCode(resultId)); } + unsigned int getConstantScalar(Id resultId) const + { return module.getInstruction(resultId)->getImmediateOperand(0); } + StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); } + + bool isVariableOpCode(Op opcode) const { return opcode == OpVariable; } + bool isVariable(Id resultId) const { return isVariableOpCode(getOpCode(resultId)); } + bool isGlobalStorage(Id resultId) const { return getStorageClass(resultId) != StorageClassFunction; } + bool isGlobalVariable(Id resultId) const { return isVariable(resultId) && isGlobalStorage(resultId); } + // See if a resultId is valid for use as an initializer. + bool isValidInitializer(Id resultId) const { return isConstant(resultId) || isGlobalVariable(resultId); } + + bool isRayTracingOpCode(Op opcode) const; + + int getScalarTypeWidth(Id typeId) const + { + Id scalarTypeId = getScalarTypeId(typeId); + assert(getTypeClass(scalarTypeId) == OpTypeInt || getTypeClass(scalarTypeId) == OpTypeFloat); + return module.getInstruction(scalarTypeId)->getImmediateOperand(0); + } + + int getTypeNumColumns(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeConstituents(typeId); + } + int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); } + int getTypeNumRows(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeComponents(getContainedTypeId(typeId)); + } + int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); } + + Dim getTypeDimensionality(Id typeId) const + { + assert(isImageType(typeId)); + return (Dim)module.getInstruction(typeId)->getImmediateOperand(1); + } + Id getImageType(Id resultId) const + { + Id typeId = getTypeId(resultId); + assert(isImageType(typeId) || isSampledImageType(typeId)); + return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId; + } + bool isArrayedImageType(Id typeId) const + { + assert(isImageType(typeId)); + return module.getInstruction(typeId)->getImmediateOperand(3) != 0; + } + + // For making new constants (will return old constant if the requested one was already made). + Id makeNullConstant(Id typeId); + Id makeBoolConstant(bool b, bool specConstant = false); + Id makeInt8Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(8), (unsigned)i, specConstant); } + Id makeUint8Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(8), u, specConstant); } + Id makeInt16Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(16), (unsigned)i, specConstant); } + Id makeUint16Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(16), u, specConstant); } + Id makeIntConstant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(32), (unsigned)i, specConstant); } + Id makeUintConstant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(32), u, specConstant); } + Id makeInt64Constant(long long i, bool specConstant = false) + { return makeInt64Constant(makeIntType(64), (unsigned long long)i, specConstant); } + Id makeUint64Constant(unsigned long long u, bool specConstant = false) + { return makeInt64Constant(makeUintType(64), u, specConstant); } + Id makeFloatConstant(float f, bool specConstant = false); + Id makeDoubleConstant(double d, bool specConstant = false); + Id makeFloat16Constant(float f16, bool specConstant = false); + Id makeFpConstant(Id type, double d, bool specConstant = false); + + Id importNonSemanticShaderDebugInfoInstructions(); + + // Turn the array of constants into a proper spv constant of the requested type. + Id makeCompositeConstant(Id type, const std::vector& comps, bool specConst = false); + + // Methods for adding information outside the CFG. + Instruction* addEntryPoint(ExecutionModel, Function*, const char* name); + void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1); + void addExecutionMode(Function*, ExecutionMode mode, const std::vector& literals); + void addExecutionModeId(Function*, ExecutionMode mode, const std::vector& operandIds); + void addName(Id, const char* name); + void addMemberName(Id, int member, const char* name); + void addDecoration(Id, Decoration, int num = -1); + void addDecoration(Id, Decoration, const char*); + void addDecoration(Id, Decoration, const std::vector& literals); + void addDecoration(Id, Decoration, const std::vector& strings); + void addDecorationId(Id id, Decoration, Id idDecoration); + void addDecorationId(Id id, Decoration, const std::vector& operandIds); + void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1); + void addMemberDecoration(Id, unsigned int member, Decoration, const char*); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& literals); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& strings); + + // At the end of what block do the next create*() instructions go? + // Also reset current last DebugScope and current source line to unknown + void setBuildPoint(Block* bp) { + buildPoint = bp; + lastDebugScopeId = NoResult; + currentLine = 0; + } + Block* getBuildPoint() const { return buildPoint; } + + // Make the entry-point function. The returned pointer is only valid + // for the lifetime of this builder. + Function* makeEntryPoint(const char*); + + // Make a shader-style function, and create its entry block if entry is non-zero. + // Return the function, pass back the entry. + // The returned pointer is only valid for the lifetime of this builder. + Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, + const std::vector& paramTypes, const std::vector& paramNames, + const std::vector>& precisions, Block **entry = 0); + + // Create a return. An 'implicit' return is one not appearing in the source + // code. In the case of an implicit return, no post-return block is inserted. + void makeReturn(bool implicit, Id retVal = 0); + + // Initialize state and generate instructions for new lexical scope + void enterScope(uint32_t line); + + // Set state and generate instructions to exit current lexical scope + void leaveScope(); + + // Prepare builder for generation of instructions for a function. + void enterFunction(Function const* function); + + // Generate all the code needed to finish up a function. + void leaveFunction(); + + // Create block terminator instruction for certain statements like + // discard, terminate-invocation, terminateRayEXT, or ignoreIntersectionEXT + void makeStatementTerminator(spv::Op opcode, const char *name); + + // Create block terminator instruction for statements that have input operands + // such as OpEmitMeshTasksEXT + void makeStatementTerminator(spv::Op opcode, const std::vector& operands, const char* name); + + // Create a global or function local or IO variable. + Id createVariable(Decoration precision, StorageClass storageClass, Id type, const char* name = nullptr, + Id initializer = NoResult, bool const compilerGenerated = true); + + // Create an intermediate with an undefined value. + Id createUndefined(Id type); + + // Store into an Id and return the l-value + void createStore(Id rValue, Id lValue, spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Load from an Id and return it + Id createLoad(Id lValue, spv::Decoration precision, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Create an OpAccessChain instruction + Id createAccessChain(StorageClass, Id base, const std::vector& offsets); + + // Create an OpArrayLength instruction + Id createArrayLength(Id base, unsigned int member); + + // Create an OpCooperativeMatrixLengthNV instruction + Id createCooperativeMatrixLength(Id type); + + // Create an OpCompositeExtract instruction + Id createCompositeExtract(Id composite, Id typeId, unsigned index); + Id createCompositeExtract(Id composite, Id typeId, const std::vector& indexes); + Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index); + Id createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes); + + Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex); + Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex); + + void createNoResultOp(Op); + void createNoResultOp(Op, Id operand); + void createNoResultOp(Op, const std::vector& operands); + void createNoResultOp(Op, const std::vector& operands); + void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask); + void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics); + Id createUnaryOp(Op, Id typeId, Id operand); + Id createBinOp(Op, Id typeId, Id operand1, Id operand2); + Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createFunctionCall(spv::Function*, const std::vector&); + Id createSpecConstantOp(Op, Id typeId, const std::vector& operands, const std::vector& literals); + + // Take an rvalue (source) and a set of channels to extract from it to + // make a new rvalue, which is returned. + Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels); + + // Take a copy of an lvalue (target) and a source of components, and set the + // source components into the lvalue where the 'channels' say to put them. + // An updated version of the target is returned. + // (No true lvalue or stores are used.) + Id createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels); + + // If both the id and precision are valid, the id + // gets tagged with the requested precision. + // The passed in id is always the returned id, to simplify use patterns. + Id setPrecision(Id id, Decoration precision) + { + if (precision != NoPrecision && id != NoResult) + addDecoration(id, precision); + + return id; + } + + // Can smear a scalar to a vector for the following forms: + // - promoteScalar(scalar, vector) // smear scalar to width of vector + // - promoteScalar(vector, scalar) // smear scalar to width of vector + // - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to + // - promoteScalar(scalar, scalar) // do nothing + // Other forms are not allowed. + // + // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'. + // The type of the created vector is a vector of components of the same type as the scalar. + // + // Note: One of the arguments will change, with the result coming back that way rather than + // through the return value. + void promoteScalar(Decoration precision, Id& left, Id& right); + + // Make a value by smearing the scalar to fill the type. + // vectorType should be the correct type for making a vector of scalarVal. + // (No conversions are done.) + Id smearScalar(Decoration precision, Id scalarVal, Id vectorType); + + // Create a call to a built-in function. + Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args); + + // List of parameters used to create a texture operation + struct TextureParameters { + Id sampler; + Id coords; + Id bias; + Id lod; + Id Dref; + Id offset; + Id offsets; + Id gradX; + Id gradY; + Id sample; + Id component; + Id texelOut; + Id lodClamp; + Id granularity; + Id coarse; + bool nonprivate; + bool volatil; + }; + + // Select the correct texture operation based on all inputs, and emit the correct instruction + Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, + bool noImplicit, const TextureParameters&, ImageOperandsMask); + + // Emit the OpTextureQuery* instruction that was passed in. + // Figure out the right return value and type, and return it. + Id createTextureQueryCall(Op, const TextureParameters&, bool isUnsignedResult); + + Id createSamplePositionCall(Decoration precision, Id, Id); + + Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned); + Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id); + + // Reduction comparison for composites: For equal and not-equal resulting in a scalar. + Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */); + + // OpCompositeConstruct + Id createCompositeConstruct(Id typeId, const std::vector& constituents); + + // vector or scalar constructor + Id createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId); + + // matrix constructor + Id createMatrixConstructor(Decoration precision, const std::vector& sources, Id constructee); + + // Helper to use for building nested control flow with if-then-else. + class If { + public: + If(Id condition, unsigned int ctrl, Builder& builder); + ~If() {} + + void makeBeginElse(); + void makeEndIf(); + + private: + If(const If&); + If& operator=(If&); + + Builder& builder; + Id condition; + unsigned int control; + Function* function; + Block* headerBlock; + Block* thenBlock; + Block* elseBlock; + Block* mergeBlock; + }; + + // Make a switch statement. A switch has 'numSegments' of pieces of code, not containing + // any case/default labels, all separated by one or more case/default labels. Each possible + // case value v is a jump to the caseValues[v] segment. The defaultSegment is also in this + // number space. How to compute the value is given by 'condition', as in switch(condition). + // + // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches. + // + // Use a defaultSegment < 0 if there is no default segment (to branch to post switch). + // + // Returns the right set of basic blocks to start each code segment with, so that the caller's + // recursion stack can hold the memory for it. + // + void makeSwitch(Id condition, unsigned int control, int numSegments, const std::vector& caseValues, + const std::vector& valueToSegment, int defaultSegment, std::vector& segmentBB); + + // Add a branch to the innermost switch's merge block. + void addSwitchBreak(); + + // Move to the next code segment, passing in the return argument in makeSwitch() + void nextSwitchSegment(std::vector& segmentBB, int segment); + + // Finish off the innermost switch. + void endSwitch(std::vector& segmentBB); + + struct LoopBlocks { + LoopBlocks(Block& head, Block& body, Block& merge, Block& continue_target) : + head(head), body(body), merge(merge), continue_target(continue_target) { } + Block &head, &body, &merge, &continue_target; + private: + LoopBlocks(); + LoopBlocks& operator=(const LoopBlocks&) = delete; + }; + + // Start a new loop and prepare the builder to generate code for it. Until + // closeLoop() is called for this loop, createLoopContinue() and + // createLoopExit() will target its corresponding blocks. + LoopBlocks& makeNewLoop(); + + // Create a new block in the function containing the build point. Memory is + // owned by the function object. + Block& makeNewBlock(); + + // Add a branch to the continue_target of the current (innermost) loop. + void createLoopContinue(); + + // Add an exit (e.g. "break") from the innermost loop that we're currently + // in. + void createLoopExit(); + + // Close the innermost loop that you're in + void closeLoop(); + + // + // Access chain design for an R-Value vs. L-Value: + // + // There is a single access chain the builder is building at + // any particular time. Such a chain can be used to either to a load or + // a store, when desired. + // + // Expressions can be r-values, l-values, or both, or only r-values: + // a[b.c].d = .... // l-value + // ... = a[b.c].d; // r-value, that also looks like an l-value + // ++a[b.c].d; // r-value and l-value + // (x + y)[2]; // r-value only, can't possibly be l-value + // + // Computing an r-value means generating code. Hence, + // r-values should only be computed when they are needed, not speculatively. + // + // Computing an l-value means saving away information for later use in the compiler, + // no code is generated until the l-value is later dereferenced. It is okay + // to speculatively generate an l-value, just not okay to speculatively dereference it. + // + // The base of the access chain (the left-most variable or expression + // from which everything is based) can be set either as an l-value + // or as an r-value. Most efficient would be to set an l-value if one + // is available. If an expression was evaluated, the resulting r-value + // can be set as the chain base. + // + // The users of this single access chain can save and restore if they + // want to nest or manage multiple chains. + // + + struct AccessChain { + Id base; // for l-values, pointer to the base object, for r-values, the base object + std::vector indexChain; + Id instr; // cache the instruction that generates this access chain + std::vector swizzle; // each std::vector element selects the next GLSL component number + Id component; // a dynamic component index, can coexist with a swizzle, + // done after the swizzle, NoResult if not present + Id preSwizzleBaseType; // dereferenced type, before swizzle or component is applied; + // NoType unless a swizzle or component is present + bool isRValue; // true if 'base' is an r-value, otherwise, base is an l-value + unsigned int alignment; // bitwise OR of alignment values passed in. Accumulates worst alignment. + // Only tracks base and (optional) component selection alignment. + + // Accumulate whether anything in the chain of structures has coherent decorations. + struct CoherentFlags { + CoherentFlags() { clear(); } +#ifdef GLSLANG_WEB + void clear() { } + bool isVolatile() const { return false; } + CoherentFlags operator |=(const CoherentFlags &other) { return *this; } +#else + bool isVolatile() const { return volatil; } + bool isNonUniform() const { return nonUniform; } + bool anyCoherent() const { + return coherent || devicecoherent || queuefamilycoherent || workgroupcoherent || + subgroupcoherent || shadercallcoherent; + } + + unsigned coherent : 1; + unsigned devicecoherent : 1; + unsigned queuefamilycoherent : 1; + unsigned workgroupcoherent : 1; + unsigned subgroupcoherent : 1; + unsigned shadercallcoherent : 1; + unsigned nonprivate : 1; + unsigned volatil : 1; + unsigned isImage : 1; + unsigned nonUniform : 1; + + void clear() { + coherent = 0; + devicecoherent = 0; + queuefamilycoherent = 0; + workgroupcoherent = 0; + subgroupcoherent = 0; + shadercallcoherent = 0; + nonprivate = 0; + volatil = 0; + isImage = 0; + nonUniform = 0; + } + + CoherentFlags operator |=(const CoherentFlags &other) { + coherent |= other.coherent; + devicecoherent |= other.devicecoherent; + queuefamilycoherent |= other.queuefamilycoherent; + workgroupcoherent |= other.workgroupcoherent; + subgroupcoherent |= other.subgroupcoherent; + shadercallcoherent |= other.shadercallcoherent; + nonprivate |= other.nonprivate; + volatil |= other.volatil; + isImage |= other.isImage; + nonUniform |= other.nonUniform; + return *this; + } +#endif + }; + CoherentFlags coherentFlags; + }; + + // + // the SPIR-V builder maintains a single active chain that + // the following methods operate on + // + + // for external save and restore + AccessChain getAccessChain() { return accessChain; } + void setAccessChain(AccessChain newChain) { accessChain = newChain; } + + // clear accessChain + void clearAccessChain(); + + // set new base as an l-value base + void setAccessChainLValue(Id lValue) + { + assert(isPointer(lValue)); + accessChain.base = lValue; + } + + // set new base value as an r-value + void setAccessChainRValue(Id rValue) + { + accessChain.isRValue = true; + accessChain.base = rValue; + } + + // push offset onto the end of the chain + void accessChainPush(Id offset, AccessChain::CoherentFlags coherentFlags, unsigned int alignment) + { + accessChain.indexChain.push_back(offset); + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // push new swizzle onto the end of any existing swizzle, merging into a single swizzle + void accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType, + AccessChain::CoherentFlags coherentFlags, unsigned int alignment); + + // push a dynamic component selection onto the access chain, only applicable with a + // non-trivial swizzle or no swizzle + void accessChainPushComponent(Id component, Id preSwizzleBaseType, AccessChain::CoherentFlags coherentFlags, + unsigned int alignment) + { + if (accessChain.swizzle.size() != 1) { + accessChain.component = component; + if (accessChain.preSwizzleBaseType == NoType) + accessChain.preSwizzleBaseType = preSwizzleBaseType; + } + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // use accessChain and swizzle to store value + void accessChainStore(Id rvalue, Decoration nonUniform, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // use accessChain and swizzle to load an r-value + Id accessChainLoad(Decoration precision, Decoration l_nonUniform, Decoration r_nonUniform, Id ResultType, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, spv::Scope scope = spv::ScopeMax, + unsigned int alignment = 0); + + // Return whether or not the access chain can be represented in SPIR-V + // as an l-value. + // E.g., a[3].yx cannot be, while a[3].y and a[3].y[x] can be. + bool isSpvLvalue() const { return accessChain.swizzle.size() <= 1; } + + // get the direct pointer for an l-value + Id accessChainGetLValue(); + + // Get the inferred SPIR-V type of the result of the current access chain, + // based on the type of the base and the chain of dereferences. + Id accessChainGetInferredType(); + + // Add capabilities, extensions, remove unneeded decorations, etc., + // based on the resulting SPIR-V. + void postProcess(); + + // Prune unreachable blocks in the CFG and remove unneeded decorations. + void postProcessCFG(); + +#ifndef GLSLANG_WEB + // Add capabilities, extensions based on instructions in the module. + void postProcessFeatures(); + // Hook to visit each instruction in a block in a function + void postProcess(Instruction&); + // Hook to visit each non-32-bit sized float/int operation in a block. + void postProcessType(const Instruction&, spv::Id typeId); +#endif + + void dump(std::vector&) const; + + void createBranch(Block* block); + void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); + void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, + const std::vector& operands); + + // Sets to generate opcode for specialization constants. + void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; } + // Sets to generate opcode for non-specialization constants (normal mode). + void setToNormalCodeGenMode() { generatingOpCodeForSpecConst = false; } + // Check if the builder is generating code for spec constants. + bool isInSpecConstCodeGenMode() { return generatingOpCodeForSpecConst; } + + protected: + Id makeIntConstant(Id typeId, unsigned value, bool specConstant); + Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2); + Id findCompositeConstant(Op typeClass, Id typeId, const std::vector& comps); + Id findStructConstant(Id typeId, const std::vector& comps); + Id collapseAccessChain(); + void remapDynamicSwizzle(); + void transferAccessChainSwizzle(bool dynamic); + void simplifyAccessChainSwizzle(); + void createAndSetNoPredecessorBlock(const char*); + void createSelectionMerge(Block* mergeBlock, unsigned int control); + void dumpSourceInstructions(std::vector&) const; + void dumpSourceInstructions(const spv::Id fileId, const std::string& text, std::vector&) const; + void dumpInstructions(std::vector&, const std::vector >&) const; + void dumpModuleProcesses(std::vector&) const; + spv::MemoryAccessMask sanitizeMemoryAccessForStorageClass(spv::MemoryAccessMask memoryAccess, StorageClass sc) + const; + + unsigned int spvVersion; // the version of SPIR-V to emit in the header + SourceLanguage sourceLang; + int sourceVersion; + spv::Id sourceFileStringId; + spv::Id nonSemanticShaderCompilationUnitId {0}; + spv::Id nonSemanticShaderDebugInfo {0}; + spv::Id debugInfoNone {0}; + spv::Id debugExpression {0}; // Debug expression with zero operations. + std::string sourceText; + int currentLine; + const char* currentFile; + spv::Id currentFileId; + std::stack currentDebugScopeId; + spv::Id lastDebugScopeId; + bool emitOpLines; + bool emitNonSemanticShaderDebugInfo; + bool restoreNonSemanticShaderDebugInfo; + bool emitNonSemanticShaderDebugSource; + std::set extensions; + std::vector sourceExtensions; + std::vector moduleProcesses; + AddressingModel addressModel; + MemoryModel memoryModel; + std::set capabilities; + int builderNumber; + Module module; + Block* buildPoint; + Id uniqueId; + Function* entryPointFunction; + bool generatingOpCodeForSpecConst; + AccessChain accessChain; + + // special blocks of instructions for output + std::vector > strings; + std::vector > imports; + std::vector > entryPoints; + std::vector > executionModes; + std::vector > names; + std::vector > decorations; + std::vector > constantsTypesGlobals; + std::vector > externals; + std::vector > functions; + + // not output, internally used for quick & dirty canonical (unique) creation + + // map type opcodes to constant inst. + std::unordered_map> groupedConstants; + // map struct-id to constant instructions + std::unordered_map> groupedStructConstants; + // map type opcodes to type instructions + std::unordered_map> groupedTypes; + // map type opcodes to debug type instructions + std::unordered_map> groupedDebugTypes; + // list of OpConstantNull instructions + std::vector nullConstants; + + // stack of switches + std::stack switchMerges; + + // Our loop stack. + std::stack loops; + + // map from strings to their string ids + std::unordered_map stringIds; + + // map from include file name ids to their contents + std::map includeFiles; + + // map from core id to debug id + std::map debugId; + + // map from file name string id to DebugSource id + std::unordered_map debugSourceId; + + // The stream for outputting warnings and errors. + SpvBuildLogger* logger; +}; // end Builder class + +}; // end spv namespace + +#endif // SpvBuilder_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SpvTools.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SpvTools.h new file mode 100644 index 0000000..5386048 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/SpvTools.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2014-2016 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Call into SPIRV-Tools to disassemble, validate, and optimize. +// + +#pragma once +#ifndef GLSLANG_SPV_TOOLS_H +#define GLSLANG_SPV_TOOLS_H + +#if ENABLE_OPT +#include +#include +#include "spirv-tools/libspirv.h" +#endif + +#include "glslang/MachineIndependent/localintermediate.h" +#include "Logger.h" + +namespace glslang { + +struct SpvOptions { + bool generateDebugInfo {false}; + bool stripDebugInfo {false}; + bool disableOptimizer {true}; + bool optimizeSize {false}; + bool disassemble {false}; + bool validate {false}; + bool emitNonSemanticShaderDebugInfo {false}; + bool emitNonSemanticShaderDebugSource{ false }; +}; + +#if ENABLE_OPT + +// Use the SPIRV-Tools disassembler to print SPIR-V using a SPV_ENV_UNIVERSAL_1_3 environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv); + +// Use the SPIRV-Tools disassembler to print SPIR-V with a provided SPIR-V environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv, + spv_target_env requested_context); + +// Apply the SPIRV-Tools validator to generated SPIR-V. +void SpirvToolsValidate(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, bool prelegalization); + +// Apply the SPIRV-Tools optimizer to generated SPIR-V. HLSL SPIR-V is legalized in the process. +void SpirvToolsTransform(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, const SpvOptions*); + +// Apply the SPIRV-Tools optimizer to strip debug info from SPIR-V. This is implicitly done by +// SpirvToolsTransform if spvOptions->stripDebugInfo is set, but can be called separately if +// optimization is disabled. +void SpirvToolsStripDebugInfo(const glslang::TIntermediate& intermediate, + std::vector& spirv, spv::SpvBuildLogger*); + +#endif + +} // end namespace glslang + +#endif // GLSLANG_SPV_TOOLS_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/bitutils.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/bitutils.h new file mode 100644 index 0000000..22e44ce --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/bitutils.h @@ -0,0 +1,81 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_BITUTILS_H_ +#define LIBSPIRV_UTIL_BITUTILS_H_ + +#include +#include + +namespace spvutils { + +// Performs a bitwise copy of source to the destination type Dest. +template +Dest BitwiseCast(Src source) { + Dest dest; + static_assert(sizeof(source) == sizeof(dest), + "BitwiseCast: Source and destination must have the same size"); + std::memcpy(static_cast(&dest), &source, sizeof(dest)); + return dest; +} + +// SetBits returns an integer of type with bits set +// for position through , counting from the least +// significant bit. In particular when Num == 0, no positions are set to 1. +// A static assert will be triggered if First + Num > sizeof(T) * 8, that is, +// a bit that will not fit in the underlying type is set. +template +struct SetBits { + static_assert(First < sizeof(T) * 8, + "Tried to set a bit that is shifted too far."); + const static T get = (T(1) << First) | SetBits::get; +}; + +template +struct SetBits { + const static T get = T(0); +}; + +// This is all compile-time so we can put our tests right here. +static_assert(SetBits::get == uint32_t(0x00000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000001), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x80000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000006), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xc0000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x7FFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFF0000), + "SetBits failed"); + +static_assert(SetBits::get == uint64_t(0x0000000000000001LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x8000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0xc000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x0000000080000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x00000000FFFF0000LL), + "SetBits failed"); + +} // namespace spvutils + +#endif // LIBSPIRV_UTIL_BITUTILS_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/disassemble.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/disassemble.h new file mode 100644 index 0000000..b6a4635 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/disassemble.h @@ -0,0 +1,53 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Disassembler for SPIR-V. +// + +#pragma once +#ifndef disassembler_H +#define disassembler_H + +#include +#include + +namespace spv { + + // disassemble with glslang custom disassembler + void Disassemble(std::ostream& out, const std::vector&); + +} // end namespace spv + +#endif // disassembler_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/doc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/doc.h new file mode 100644 index 0000000..2a0b28c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/doc.h @@ -0,0 +1,259 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Parameterize the SPIR-V enumerants. +// + +#pragma once + +#include "spirv.hpp" + +#include + +namespace spv { + +// Fill in all the parameters +void Parameterize(); + +// Return the English names of all the enums. +const char* SourceString(int); +const char* AddressingString(int); +const char* MemoryString(int); +const char* ExecutionModelString(int); +const char* ExecutionModeString(int); +const char* StorageClassString(int); +const char* DecorationString(int); +const char* BuiltInString(int); +const char* DimensionString(int); +const char* SelectControlString(int); +const char* LoopControlString(int); +const char* FunctionControlString(int); +const char* SamplerAddressingModeString(int); +const char* SamplerFilterModeString(int); +const char* ImageFormatString(int); +const char* ImageChannelOrderString(int); +const char* ImageChannelTypeString(int); +const char* ImageChannelDataTypeString(int type); +const char* ImageOperandsString(int format); +const char* ImageOperands(int); +const char* FPFastMathString(int); +const char* FPRoundingModeString(int); +const char* LinkageTypeString(int); +const char* FuncParamAttrString(int); +const char* AccessQualifierString(int); +const char* MemorySemanticsString(int); +const char* MemoryAccessString(int); +const char* ExecutionScopeString(int); +const char* GroupOperationString(int); +const char* KernelEnqueueFlagsString(int); +const char* KernelProfilingInfoString(int); +const char* CapabilityString(int); +const char* OpcodeString(int); +const char* ScopeString(int mem); + +// For grouping opcodes into subsections +enum OpcodeClass { + OpClassMisc, + OpClassDebug, + OpClassAnnotate, + OpClassExtension, + OpClassMode, + OpClassType, + OpClassConstant, + OpClassMemory, + OpClassFunction, + OpClassImage, + OpClassConvert, + OpClassComposite, + OpClassArithmetic, + OpClassBit, + OpClassRelationalLogical, + OpClassDerivative, + OpClassFlowControl, + OpClassAtomic, + OpClassPrimitive, + OpClassBarrier, + OpClassGroup, + OpClassDeviceSideEnqueue, + OpClassPipe, + + OpClassCount, + OpClassMissing // all instructions start out as missing +}; + +// For parameterizing operands. +enum OperandClass { + OperandNone, + OperandId, + OperandVariableIds, + OperandOptionalLiteral, + OperandOptionalLiteralString, + OperandVariableLiterals, + OperandVariableIdLiteral, + OperandVariableLiteralId, + OperandLiteralNumber, + OperandLiteralString, + OperandVariableLiteralStrings, + OperandSource, + OperandExecutionModel, + OperandAddressing, + OperandMemory, + OperandExecutionMode, + OperandStorage, + OperandDimensionality, + OperandSamplerAddressingMode, + OperandSamplerFilterMode, + OperandSamplerImageFormat, + OperandImageChannelOrder, + OperandImageChannelDataType, + OperandImageOperands, + OperandFPFastMath, + OperandFPRoundingMode, + OperandLinkageType, + OperandAccessQualifier, + OperandFuncParamAttr, + OperandDecoration, + OperandBuiltIn, + OperandSelect, + OperandLoop, + OperandFunction, + OperandMemorySemantics, + OperandMemoryAccess, + OperandScope, + OperandGroupOperation, + OperandKernelEnqueueFlags, + OperandKernelProfilingInfo, + OperandCapability, + + OperandOpcode, + + OperandCount +}; + +// Any specific enum can have a set of capabilities that allow it: +typedef std::vector EnumCaps; + +// Parameterize a set of operands with their OperandClass(es) and descriptions. +class OperandParameters { +public: + OperandParameters() { } + void push(OperandClass oc, const char* d, bool opt = false) + { + opClass.push_back(oc); + desc.push_back(d); + optional.push_back(opt); + } + void setOptional(); + OperandClass getClass(int op) const { return opClass[op]; } + const char* getDesc(int op) const { return desc[op]; } + bool isOptional(int op) const { return optional[op]; } + int getNum() const { return (int)opClass.size(); } + +protected: + std::vector opClass; + std::vector desc; + std::vector optional; +}; + +// Parameterize an enumerant +class EnumParameters { +public: + EnumParameters() : desc(0) { } + const char* desc; +}; + +// Parameterize a set of enumerants that form an enum +class EnumDefinition : public EnumParameters { +public: + EnumDefinition() : + ceiling(0), bitmask(false), getName(0), enumParams(0), operandParams(0) { } + void set(int ceil, const char* (*name)(int), EnumParameters* ep, bool mask = false) + { + ceiling = ceil; + getName = name; + bitmask = mask; + enumParams = ep; + } + void setOperands(OperandParameters* op) { operandParams = op; } + int ceiling; // ceiling of enumerants + bool bitmask; // true if these enumerants combine into a bitmask + const char* (*getName)(int); // a function that returns the name for each enumerant value (or shift) + EnumParameters* enumParams; // parameters for each individual enumerant + OperandParameters* operandParams; // sets of operands +}; + +// Parameterize an instruction's logical format, including its known set of operands, +// per OperandParameters above. +class InstructionParameters { +public: + InstructionParameters() : + opDesc("TBD"), + opClass(OpClassMissing), + typePresent(true), // most normal, only exceptions have to be spelled out + resultPresent(true) // most normal, only exceptions have to be spelled out + { } + + void setResultAndType(bool r, bool t) + { + resultPresent = r; + typePresent = t; + } + + bool hasResult() const { return resultPresent != 0; } + bool hasType() const { return typePresent != 0; } + + const char* opDesc; + OpcodeClass opClass; + OperandParameters operands; + +protected: + int typePresent : 1; + int resultPresent : 1; +}; + +// The set of objects that hold all the instruction/operand +// parameterization information. +extern InstructionParameters InstructionDesc[]; + +// These hold definitions of the enumerants used for operands +extern EnumDefinition OperandClassParams[]; + +const char* GetOperandDesc(OperandClass operand); +void PrintImmediateRow(int imm, const char* name, const EnumParameters* enumParams, bool caps, bool hex = false); +const char* AccessQualifierString(int attr); + +void PrintOperands(const OperandParameters& operands, int reservedOperands); + +} // end namespace spv diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/hex_float.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/hex_float.h new file mode 100644 index 0000000..8be8e9f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/hex_float.h @@ -0,0 +1,1078 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ +#define LIBSPIRV_UTIL_HEX_FLOAT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER < 1800 +namespace std { +bool isnan(double f) +{ + return ::_isnan(f) != 0; +} +bool isinf(double f) +{ + return ::_finite(f) == 0; +} +} +#endif + +#include "bitutils.h" + +namespace spvutils { + +class Float16 { + public: + Float16(uint16_t v) : val(v) {} + Float16() {} + static bool isNan(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); + } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); + } + Float16(const Float16& other) { val = other.val; } + uint16_t get_value() const { return val; } + + // Returns the maximum normal value. + static Float16 max() { return Float16(0x7bff); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16(0xfbff); } + + private: + uint16_t val; +}; + +// To specialize this type, you must override uint_type to define +// an unsigned integer that can fit your floating point type. +// You must also add a isNan function that returns true if +// a value is Nan. +template +struct FloatProxyTraits { + typedef void uint_type; +}; + +template <> +struct FloatProxyTraits { + typedef uint32_t uint_type; + static bool isNan(float f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(float f) { return std::isinf(f); } + // Returns the maximum normal value. + static float max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static float lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint64_t uint_type; + static bool isNan(double f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(double f) { return std::isinf(f); } + // Returns the maximum normal value. + static double max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static double lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint16_t uint_type; + static bool isNan(Float16 f) { return Float16::isNan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } + // Returns the maximum normal value. + static Float16 max() { return Float16::max(); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16::lowest(); } +}; + +// Since copying a floating point number (especially if it is NaN) +// does not guarantee that bits are preserved, this class lets us +// store the type and use it as a float when necessary. +template +class FloatProxy { + public: + typedef typename FloatProxyTraits::uint_type uint_type; + + // Since this is to act similar to the normal floats, + // do not initialize the data by default. + FloatProxy() {} + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(T val) { data_ = BitwiseCast(val); } + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(uint_type val) { data_ = val; } + + // This is helpful to have and is guaranteed not to stomp bits. + FloatProxy operator-() const { + return static_cast(data_ ^ + (uint_type(0x1) << (sizeof(T) * 8 - 1))); + } + + // Returns the data as a floating point value. + T getAsFloat() const { return BitwiseCast(data_); } + + // Returns the raw data. + uint_type data() const { return data_; } + + // Returns true if the value represents any type of NaN. + bool isNan() { return FloatProxyTraits::isNan(getAsFloat()); } + // Returns true if the value represents any type of infinity. + bool isInfinity() { return FloatProxyTraits::isInfinity(getAsFloat()); } + + // Returns the maximum normal value. + static FloatProxy max() { + return FloatProxy(FloatProxyTraits::max()); + } + // Returns the lowest normal value. + static FloatProxy lowest() { + return FloatProxy(FloatProxyTraits::lowest()); + } + + private: + uint_type data_; +}; + +template +bool operator==(const FloatProxy& first, const FloatProxy& second) { + return first.data() == second.data(); +} + +// Reads a FloatProxy value as a normal float from a stream. +template +std::istream& operator>>(std::istream& is, FloatProxy& value) { + T float_val; + is >> float_val; + value = FloatProxy(float_val); + return is; +} + +// This is an example traits. It is not meant to be used in practice, but will +// be the default for any non-specialized type. +template +struct HexFloatTraits { + // Integer type that can store this hex-float. + typedef void uint_type; + // Signed integer type that can store this hex-float. + typedef void int_type; + // The numerical type that this HexFloat represents. + typedef void underlying_type; + // The type needed to construct the underlying type. + typedef void native_type; + // The number of bits that are actually relevant in the uint_type. + // This allows us to deal with, for example, 24-bit values in a 32-bit + // integer. + static const uint32_t num_used_bits = 0; + // Number of bits that represent the exponent. + static const uint32_t num_exponent_bits = 0; + // Number of bits that represent the fractional part. + static const uint32_t num_fraction_bits = 0; + // The bias of the exponent. (How much we need to subtract from the stored + // value to get the correct value.) + static const uint32_t exponent_bias = 0; +}; + +// Traits for IEEE float. +// 1 sign bit, 8 exponent bits, 23 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint32_t uint_type; + typedef int32_t int_type; + typedef FloatProxy underlying_type; + typedef float native_type; + static const uint_type num_used_bits = 32; + static const uint_type num_exponent_bits = 8; + static const uint_type num_fraction_bits = 23; + static const uint_type exponent_bias = 127; +}; + +// Traits for IEEE double. +// 1 sign bit, 11 exponent bits, 52 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint64_t uint_type; + typedef int64_t int_type; + typedef FloatProxy underlying_type; + typedef double native_type; + static const uint_type num_used_bits = 64; + static const uint_type num_exponent_bits = 11; + static const uint_type num_fraction_bits = 52; + static const uint_type exponent_bias = 1023; +}; + +// Traits for IEEE half. +// 1 sign bit, 5 exponent bits, 10 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint16_t uint_type; + typedef int16_t int_type; + typedef uint16_t underlying_type; + typedef uint16_t native_type; + static const uint_type num_used_bits = 16; + static const uint_type num_exponent_bits = 5; + static const uint_type num_fraction_bits = 10; + static const uint_type exponent_bias = 15; +}; + +enum round_direction { + kRoundToZero, + kRoundToNearestEven, + kRoundToPositiveInfinity, + kRoundToNegativeInfinity +}; + +// Template class that houses a floating pointer number. +// It exposes a number of constants based on the provided traits to +// assist in interpreting the bits of the value. +template > +class HexFloat { + public: + typedef typename Traits::uint_type uint_type; + typedef typename Traits::int_type int_type; + typedef typename Traits::underlying_type underlying_type; + typedef typename Traits::native_type native_type; + + explicit HexFloat(T f) : value_(f) {} + + T value() const { return value_; } + void set_value(T f) { value_ = f; } + + // These are all written like this because it is convenient to have + // compile-time constants for all of these values. + + // Pass-through values to save typing. + static const uint32_t num_used_bits = Traits::num_used_bits; + static const uint32_t exponent_bias = Traits::exponent_bias; + static const uint32_t num_exponent_bits = Traits::num_exponent_bits; + static const uint32_t num_fraction_bits = Traits::num_fraction_bits; + + // Number of bits to shift left to set the highest relevant bit. + static const uint32_t top_bit_left_shift = num_used_bits - 1; + // How many nibbles (hex characters) the fractional part takes up. + static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; + // If the fractional part does not fit evenly into a hex character (4-bits) + // then we have to left-shift to get rid of leading 0s. This is the amount + // we have to shift (might be 0). + static const uint32_t num_overflow_bits = + fraction_nibbles * 4 - num_fraction_bits; + + // The representation of the fraction, not the actual bits. This + // includes the leading bit that is usually implicit. + static const uint_type fraction_represent_mask = + spvutils::SetBits::get; + + // The topmost bit in the nibble-aligned fraction. + static const uint_type fraction_top_bit = + uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); + + // The least significant bit in the exponent, which is also the bit + // immediately to the left of the significand. + static const uint_type first_exponent_bit = uint_type(1) + << (num_fraction_bits); + + // The mask for the encoded fraction. It does not include the + // implicit bit. + static const uint_type fraction_encode_mask = + spvutils::SetBits::get; + + // The bit that is used as a sign. + static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; + + // The bits that represent the exponent. + static const uint_type exponent_mask = + spvutils::SetBits::get; + + // How far left the exponent is shifted. + static const uint32_t exponent_left_shift = num_fraction_bits; + + // How far from the right edge the fraction is shifted. + static const uint32_t fraction_right_shift = + static_cast(sizeof(uint_type) * 8) - num_fraction_bits; + + // The maximum representable unbiased exponent. + static const int_type max_exponent = + (exponent_mask >> num_fraction_bits) - exponent_bias; + // The minimum representable exponent for normalized numbers. + static const int_type min_exponent = -static_cast(exponent_bias); + + // Returns the bits associated with the value. + uint_type getBits() const { return spvutils::BitwiseCast(value_); } + + // Returns the bits associated with the value, without the leading sign bit. + uint_type getUnsignedBits() const { + return static_cast(spvutils::BitwiseCast(value_) & + ~sign_mask); + } + + // Returns the bits associated with the exponent, shifted to start at the + // lsb of the type. + const uint_type getExponentBits() const { + return static_cast((getBits() & exponent_mask) >> + num_fraction_bits); + } + + // Returns the exponent in unbiased form. This is the exponent in the + // human-friendly form. + const int_type getUnbiasedExponent() const { + return static_cast(getExponentBits() - exponent_bias); + } + + // Returns just the significand bits from the value. + const uint_type getSignificandBits() const { + return getBits() & fraction_encode_mask; + } + + // If the number was normalized, returns the unbiased exponent. + // If the number was denormal, normalize the exponent first. + const int_type getUnbiasedNormalizedExponent() const { + if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 + return 0; + } + int_type exp = getUnbiasedExponent(); + if (exp == min_exponent) { // We are in denorm land. + uint_type significand_bits = getSignificandBits(); + while ((significand_bits & (first_exponent_bit >> 1)) == 0) { + significand_bits = static_cast(significand_bits << 1); + exp = static_cast(exp - 1); + } + significand_bits &= fraction_encode_mask; + } + return exp; + } + + // Returns the signficand after it has been normalized. + const uint_type getNormalizedSignificand() const { + int_type unbiased_exponent = getUnbiasedNormalizedExponent(); + uint_type significand = getSignificandBits(); + for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { + significand = static_cast(significand << 1); + } + significand &= fraction_encode_mask; + return significand; + } + + // Returns true if this number represents a negative value. + bool isNegative() const { return (getBits() & sign_mask) != 0; } + + // Sets this HexFloat from the individual components. + // Note this assumes EVERY significand is normalized, and has an implicit + // leading one. This means that the only way that this method will set 0, + // is if you set a number so denormalized that it underflows. + // Do not use this method with raw bits extracted from a subnormal number, + // since subnormals do not have an implicit leading 1 in the significand. + // The significand is also expected to be in the + // lowest-most num_fraction_bits of the uint_type. + // The exponent is expected to be unbiased, meaning an exponent of + // 0 actually means 0. + // If underflow_round_up is set, then on underflow, if a number is non-0 + // and would underflow, we round up to the smallest denorm. + void setFromSignUnbiasedExponentAndNormalizedSignificand( + bool negative, int_type exponent, uint_type significand, + bool round_denorm_up) { + bool significand_is_zero = significand == 0; + + if (exponent <= min_exponent) { + // If this was denormalized, then we have to shift the bit on, meaning + // the significand is not zero. + significand_is_zero = false; + significand |= first_exponent_bit; + significand = static_cast(significand >> 1); + } + + while (exponent < min_exponent) { + significand = static_cast(significand >> 1); + ++exponent; + } + + if (exponent == min_exponent) { + if (significand == 0 && !significand_is_zero && round_denorm_up) { + significand = static_cast(0x1); + } + } + + uint_type new_value = 0; + if (negative) { + new_value = static_cast(new_value | sign_mask); + } + exponent = static_cast(exponent + exponent_bias); + assert(exponent >= 0); + + // put it all together + exponent = static_cast((exponent << exponent_left_shift) & + exponent_mask); + significand = static_cast(significand & fraction_encode_mask); + new_value = static_cast(new_value | (exponent | significand)); + value_ = BitwiseCast(new_value); + } + + // Increments the significand of this number by the given amount. + // If this would spill the significand into the implicit bit, + // carry is set to true and the significand is shifted to fit into + // the correct location, otherwise carry is set to false. + // All significands and to_increment are assumed to be within the bounds + // for a valid significand. + static uint_type incrementSignificand(uint_type significand, + uint_type to_increment, bool* carry) { + significand = static_cast(significand + to_increment); + *carry = false; + if (significand & first_exponent_bit) { + *carry = true; + // The implicit 1-bit will have carried, so we should zero-out the + // top bit and shift back. + significand = static_cast(significand & ~first_exponent_bit); + significand = static_cast(significand >> 1); + } + return significand; + } + + // These exist because MSVC throws warnings on negative right-shifts + // even if they are not going to be executed. Eg: + // constant_number < 0? 0: constant_number + // These convert the negative left-shifts into right shifts. + + template + uint_type negatable_left_shift(int_type N, uint_type val) + { + if(N >= 0) + return val << N; + + return val >> -N; + } + + template + uint_type negatable_right_shift(int_type N, uint_type val) + { + if(N >= 0) + return val >> N; + + return val << -N; + } + + // Returns the significand, rounded to fit in a significand in + // other_T. This is shifted so that the most significant + // bit of the rounded number lines up with the most significant bit + // of the returned significand. + template + typename other_T::uint_type getRoundedNormalizedSignificand( + round_direction dir, bool* carry_bit) { + typedef typename other_T::uint_type other_uint_type; + static const int_type num_throwaway_bits = + static_cast(num_fraction_bits) - + static_cast(other_T::num_fraction_bits); + + static const uint_type last_significant_bit = + (num_throwaway_bits < 0) + ? 0 + : negatable_left_shift(num_throwaway_bits, 1u); + static const uint_type first_rounded_bit = + (num_throwaway_bits < 1) + ? 0 + : negatable_left_shift(num_throwaway_bits - 1, 1u); + + static const uint_type throwaway_mask_bits = + num_throwaway_bits > 0 ? num_throwaway_bits : 0; + static const uint_type throwaway_mask = + spvutils::SetBits::get; + + *carry_bit = false; + other_uint_type out_val = 0; + uint_type significand = getNormalizedSignificand(); + // If we are up-casting, then we just have to shift to the right location. + if (num_throwaway_bits <= 0) { + out_val = static_cast(significand); + uint_type shift_amount = static_cast(-num_throwaway_bits); + out_val = static_cast(out_val << shift_amount); + return out_val; + } + + // If every non-representable bit is 0, then we don't have any casting to + // do. + if ((significand & throwaway_mask) == 0) { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + + bool round_away_from_zero = false; + // We actually have to narrow the significand here, so we have to follow the + // rounding rules. + switch (dir) { + case kRoundToZero: + break; + case kRoundToPositiveInfinity: + round_away_from_zero = !isNegative(); + break; + case kRoundToNegativeInfinity: + round_away_from_zero = isNegative(); + break; + case kRoundToNearestEven: + // Have to round down, round bit is 0 + if ((first_rounded_bit & significand) == 0) { + break; + } + if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { + // If any subsequent bit of the rounded portion is non-0 then we round + // up. + round_away_from_zero = true; + break; + } + // We are exactly half-way between 2 numbers, pick even. + if ((significand & last_significant_bit) != 0) { + // 1 for our last bit, round up. + round_away_from_zero = true; + break; + } + break; + } + + if (round_away_from_zero) { + return static_cast( + negatable_right_shift(num_throwaway_bits, incrementSignificand( + significand, last_significant_bit, carry_bit))); + } else { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + } + + // Casts this value to another HexFloat. If the cast is widening, + // then round_dir is ignored. If the cast is narrowing, then + // the result is rounded in the direction specified. + // This number will retain Nan and Inf values. + // It will also saturate to Inf if the number overflows, and + // underflow to (0 or min depending on rounding) if the number underflows. + template + void castTo(other_T& other, round_direction round_dir) { + other = other_T(static_cast(0)); + bool negate = isNegative(); + if (getUnsignedBits() == 0) { + if (negate) { + other.set_value(-other.value()); + } + return; + } + uint_type significand = getSignificandBits(); + bool carried = false; + typename other_T::uint_type rounded_significand = + getRoundedNormalizedSignificand(round_dir, &carried); + + int_type exponent = getUnbiasedExponent(); + if (exponent == min_exponent) { + // If we are denormal, normalize the exponent, so that we can encode + // easily. + exponent = static_cast(exponent + 1); + for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; + check_bit = static_cast(check_bit >> 1)) { + exponent = static_cast(exponent - 1); + if (check_bit & significand) break; + } + } + + bool is_nan = + (getBits() & exponent_mask) == exponent_mask && significand != 0; + bool is_inf = + !is_nan && + ((exponent + carried) > static_cast(other_T::exponent_bias) || + (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); + + // If we are Nan or Inf we should pass that through. + if (is_inf) { + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); + return; + } + if (is_nan) { + typename other_T::uint_type shifted_significand; + shifted_significand = static_cast( + negatable_left_shift( + static_cast(other_T::num_fraction_bits) - + static_cast(num_fraction_bits), significand)); + + // We are some sort of Nan. We try to keep the bit-pattern of the Nan + // as close as possible. If we had to shift off bits so we are 0, then we + // just set the last bit. + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | + (shifted_significand == 0 ? 0x1 : shifted_significand)))); + return; + } + + bool round_underflow_up = + isNegative() ? round_dir == kRoundToNegativeInfinity + : round_dir == kRoundToPositiveInfinity; + typedef typename other_T::int_type other_int_type; + // setFromSignUnbiasedExponentAndNormalizedSignificand will + // zero out any underflowing value (but retain the sign). + other.setFromSignUnbiasedExponentAndNormalizedSignificand( + negate, static_cast(exponent), rounded_significand, + round_underflow_up); + return; + } + + private: + T value_; + + static_assert(num_used_bits == + Traits::num_exponent_bits + Traits::num_fraction_bits + 1, + "The number of bits do not fit"); + static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); +}; + +// Returns 4 bits represented by the hex character. +inline uint8_t get_nibble_from_character(int character) { + const char* dec = "0123456789"; + const char* lower = "abcdef"; + const char* upper = "ABCDEF"; + const char* p = nullptr; + if ((p = strchr(dec, character))) { + return static_cast(p - dec); + } else if ((p = strchr(lower, character))) { + return static_cast(p - lower + 0xa); + } else if ((p = strchr(upper, character))) { + return static_cast(p - upper + 0xa); + } + + assert(false && "This was called with a non-hex character"); + return 0; +} + +// Outputs the given HexFloat to the stream. +template +std::ostream& operator<<(std::ostream& os, const HexFloat& value) { + typedef HexFloat HF; + typedef typename HF::uint_type uint_type; + typedef typename HF::int_type int_type; + + static_assert(HF::num_used_bits != 0, + "num_used_bits must be non-zero for a valid float"); + static_assert(HF::num_exponent_bits != 0, + "num_exponent_bits must be non-zero for a valid float"); + static_assert(HF::num_fraction_bits != 0, + "num_fractin_bits must be non-zero for a valid float"); + + const uint_type bits = spvutils::BitwiseCast(value.value()); + const char* const sign = (bits & HF::sign_mask) ? "-" : ""; + const uint_type exponent = static_cast( + (bits & HF::exponent_mask) >> HF::num_fraction_bits); + + uint_type fraction = static_cast((bits & HF::fraction_encode_mask) + << HF::num_overflow_bits); + + const bool is_zero = exponent == 0 && fraction == 0; + const bool is_denorm = exponent == 0 && !is_zero; + + // exponent contains the biased exponent we have to convert it back into + // the normal range. + int_type int_exponent = static_cast(exponent - HF::exponent_bias); + // If the number is all zeros, then we actually have to NOT shift the + // exponent. + int_exponent = is_zero ? 0 : int_exponent; + + // If we are denorm, then start shifting, and decreasing the exponent until + // our leading bit is 1. + + if (is_denorm) { + while ((fraction & HF::fraction_top_bit) == 0) { + fraction = static_cast(fraction << 1); + int_exponent = static_cast(int_exponent - 1); + } + // Since this is denormalized, we have to consume the leading 1 since it + // will end up being implicit. + fraction = static_cast(fraction << 1); // eat the leading 1 + fraction &= HF::fraction_represent_mask; + } + + uint_type fraction_nibbles = HF::fraction_nibbles; + // We do not have to display any trailing 0s, since this represents the + // fractional part. + while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { + // Shift off any trailing values; + fraction = static_cast(fraction >> 4); + --fraction_nibbles; + } + + const auto saved_flags = os.flags(); + const auto saved_fill = os.fill(); + + os << sign << "0x" << (is_zero ? '0' : '1'); + if (fraction_nibbles) { + // Make sure to keep the leading 0s in place, since this is the fractional + // part. + os << "." << std::setw(static_cast(fraction_nibbles)) + << std::setfill('0') << std::hex << fraction; + } + os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; + + os.flags(saved_flags); + os.fill(saved_fill); + + return os; +} + +// Returns true if negate_value is true and the next character on the +// input stream is a plus or minus sign. In that case we also set the fail bit +// on the stream and set the value to the zero value for its type. +template +inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, + HexFloat& value) { + if (negate_value) { + auto next_char = is.peek(); + if (next_char == '-' || next_char == '+') { + // Fail the parse. Emulate standard behaviour by setting the value to + // the zero value, and set the fail bit on the stream. + value = HexFloat(typename HexFloat::uint_type(0)); + is.setstate(std::ios_base::failbit); + return true; + } + } + return false; +} + +// Parses a floating point number from the given stream and stores it into the +// value parameter. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template +inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, + HexFloat& value) { + if (RejectParseDueToLeadingSign(is, negate_value, value)) { + return is; + } + T val; + is >> val; + if (negate_value) { + val = -val; + } + value.set_value(val); + // In the failure case, map -0.0 to 0.0. + if (is.fail() && value.getUnsignedBits() == 0u) { + value = HexFloat(typename HexFloat::uint_type(0)); + } + if (val.isInfinity()) { + // Fail the parse. Emulate standard behaviour by setting the value to + // the closest normal value, and set the fail bit on the stream. + value.set_value((value.isNegative() || negate_value) ? T::lowest() + : T::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Specialization of ParseNormalFloat for FloatProxy values. +// This will parse the float as it were a 32-bit floating point number, +// and then round it down to fit into a Float16 value. +// The number is rounded towards zero. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template <> +inline std::istream& +ParseNormalFloat, HexFloatTraits>>( + std::istream& is, bool negate_value, + HexFloat, HexFloatTraits>>& value) { + // First parse as a 32-bit float. + HexFloat> float_val(0.0f); + ParseNormalFloat(is, negate_value, float_val); + + // Then convert to 16-bit float, saturating at infinities, and + // rounding toward zero. + float_val.castTo(value, kRoundToZero); + + // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the + // fail bit and set the lowest or highest value. + if (Float16::isInfinity(value.value().getAsFloat())) { + value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Reads a HexFloat from the given stream. +// If the float is not encoded as a hex-float then it will be parsed +// as a regular float. +// This may fail if your stream does not support at least one unget. +// Nan values can be encoded with "0x1.p+exponent_bias". +// This would normally overflow a float and round to +// infinity but this special pattern is the exact representation for a NaN, +// and therefore is actually encoded as the correct NaN. To encode inf, +// either 0x0p+exponent_bias can be specified or any exponent greater than +// exponent_bias. +// Examples using IEEE 32-bit float encoding. +// 0x1.0p+128 (+inf) +// -0x1.0p-128 (-inf) +// +// 0x1.1p+128 (+Nan) +// -0x1.1p+128 (-Nan) +// +// 0x1p+129 (+inf) +// -0x1p+129 (-inf) +template +std::istream& operator>>(std::istream& is, HexFloat& value) { + using HF = HexFloat; + using uint_type = typename HF::uint_type; + using int_type = typename HF::int_type; + + value.set_value(static_cast(0.f)); + + if (is.flags() & std::ios::skipws) { + // If the user wants to skip whitespace , then we should obey that. + while (std::isspace(is.peek())) { + is.get(); + } + } + + auto next_char = is.peek(); + bool negate_value = false; + + if (next_char != '-' && next_char != '0') { + return ParseNormalFloat(is, negate_value, value); + } + + if (next_char == '-') { + negate_value = true; + is.get(); + next_char = is.peek(); + } + + if (next_char == '0') { + is.get(); // We may have to unget this. + auto maybe_hex_start = is.peek(); + if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { + is.unget(); + return ParseNormalFloat(is, negate_value, value); + } else { + is.get(); // Throw away the 'x'; + } + } else { + return ParseNormalFloat(is, negate_value, value); + } + + // This "looks" like a hex-float so treat it as one. + bool seen_p = false; + bool seen_dot = false; + uint_type fraction_index = 0; + + uint_type fraction = 0; + int_type exponent = HF::exponent_bias; + + // Strip off leading zeros so we don't have to special-case them later. + while ((next_char = is.peek()) == '0') { + is.get(); + } + + bool is_denorm = + true; // Assume denorm "representation" until we hear otherwise. + // NB: This does not mean the value is actually denorm, + // it just means that it was written 0. + bool bits_written = false; // Stays false until we write a bit. + while (!seen_p && !seen_dot) { + // Handle characters that are left of the fractional part. + if (next_char == '.') { + seen_dot = true; + } else if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + // We know this is not denormalized since we have stripped all leading + // zeroes and we are not a ".". + is_denorm = false; + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; + if (bits_written) { + // If we are here the bits represented belong in the fractional + // part of the float, and we have to adjust the exponent accordingly. + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + exponent = static_cast(exponent + 1); + } + bits_written |= write_bit != 0; + } + } else { + // We have not found our exponent yet, so we have to fail. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + bits_written = false; + while (seen_dot && !seen_p) { + // Handle only fractional parts now. + if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; + bits_written |= write_bit != 0; + if (is_denorm && !bits_written) { + // Handle modifying the exponent here this way we can handle + // an arbitrary number of hex values without overflowing our + // integer. + exponent = static_cast(exponent - 1); + } else { + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + } + } + } else { + // We still have not found our 'p' exponent yet, so this is not a valid + // hex-float. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + + bool seen_sign = false; + int8_t exponent_sign = 1; + int_type written_exponent = 0; + while (true) { + if ((next_char == '-' || next_char == '+')) { + if (seen_sign) { + is.setstate(std::ios::failbit); + return is; + } + seen_sign = true; + exponent_sign = (next_char == '-') ? -1 : 1; + } else if (::isdigit(next_char)) { + // Hex-floats express their exponent as decimal. + written_exponent = static_cast(written_exponent * 10); + written_exponent = + static_cast(written_exponent + (next_char - '0')); + } else { + break; + } + is.get(); + next_char = is.peek(); + } + + written_exponent = static_cast(written_exponent * exponent_sign); + exponent = static_cast(exponent + written_exponent); + + bool is_zero = is_denorm && (fraction == 0); + if (is_denorm && !is_zero) { + fraction = static_cast(fraction << 1); + exponent = static_cast(exponent - 1); + } else if (is_zero) { + exponent = 0; + } + + if (exponent <= 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + fraction |= static_cast(1) << HF::top_bit_left_shift; + } + + fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; + + const int_type max_exponent = + SetBits::get; + + // Handle actual denorm numbers + while (exponent < 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + exponent = static_cast(exponent + 1); + + fraction &= HF::fraction_encode_mask; + if (fraction == 0) { + // We have underflowed our fraction. We should clamp to zero. + is_zero = true; + exponent = 0; + } + } + + // We have overflowed so we should be inf/-inf. + if (exponent > max_exponent) { + exponent = max_exponent; + fraction = 0; + } + + uint_type output_bits = static_cast( + static_cast(negate_value ? 1 : 0) << HF::top_bit_left_shift); + output_bits |= fraction; + + uint_type shifted_exponent = static_cast( + static_cast(exponent << HF::exponent_left_shift) & + HF::exponent_mask); + output_bits |= shifted_exponent; + + T output_float = spvutils::BitwiseCast(output_bits); + value.set_value(output_float); + + return is; +} + +// Writes a FloatProxy value to a stream. +// Zero and normal numbers are printed in the usual notation, but with +// enough digits to fully reproduce the value. Other values (subnormal, +// NaN, and infinity) are printed as a hex float. +template +std::ostream& operator<<(std::ostream& os, const FloatProxy& value) { + auto float_val = value.getAsFloat(); + switch (std::fpclassify(float_val)) { + case FP_ZERO: + case FP_NORMAL: { + auto saved_precision = os.precision(); + os.precision(std::numeric_limits::digits10); + os << float_val; + os.precision(saved_precision); + } break; + default: + os << HexFloat>(value); + break; + } + return os; +} + +template <> +inline std::ostream& operator<<(std::ostream& os, + const FloatProxy& value) { + os << HexFloat>(value); + return os; +} +} + +#endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/spirv.hpp b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/spirv.hpp new file mode 100644 index 0000000..f85469d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/spirv.hpp @@ -0,0 +1,2533 @@ +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010600; +static const unsigned int Revision = 1; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, + SourceLanguageHLSL = 5, + SourceLanguageCPP_for_OpenCL = 6, + SourceLanguageMax = 0x7fffffff, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, + ExecutionModelTaskNV = 5267, + ExecutionModelMeshNV = 5268, + ExecutionModelRayGenerationKHR = 5313, + ExecutionModelRayGenerationNV = 5313, + ExecutionModelIntersectionKHR = 5314, + ExecutionModelIntersectionNV = 5314, + ExecutionModelAnyHitKHR = 5315, + ExecutionModelAnyHitNV = 5315, + ExecutionModelClosestHitKHR = 5316, + ExecutionModelClosestHitNV = 5316, + ExecutionModelMissKHR = 5317, + ExecutionModelMissNV = 5317, + ExecutionModelCallableKHR = 5318, + ExecutionModelCallableNV = 5318, + ExecutionModelTaskEXT = 5364, + ExecutionModelMeshEXT = 5365, + ExecutionModelMax = 0x7fffffff, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, + AddressingModelPhysicalStorageBuffer64EXT = 5348, + AddressingModelMax = 0x7fffffff, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, + MemoryModelVulkanKHR = 3, + MemoryModelMax = 0x7fffffff, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, + ExecutionModeInitializer = 33, + ExecutionModeFinalizer = 34, + ExecutionModeSubgroupSize = 35, + ExecutionModeSubgroupsPerWorkgroup = 36, + ExecutionModeSubgroupsPerWorkgroupId = 37, + ExecutionModeLocalSizeId = 38, + ExecutionModeLocalSizeHintId = 39, + ExecutionModeSubgroupUniformControlFlowKHR = 4421, + ExecutionModePostDepthCoverage = 4446, + ExecutionModeDenormPreserve = 4459, + ExecutionModeDenormFlushToZero = 4460, + ExecutionModeSignedZeroInfNanPreserve = 4461, + ExecutionModeRoundingModeRTE = 4462, + ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, + ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeStencilRefUnchangedFrontAMD = 5079, + ExecutionModeStencilRefGreaterFrontAMD = 5080, + ExecutionModeStencilRefLessFrontAMD = 5081, + ExecutionModeStencilRefUnchangedBackAMD = 5082, + ExecutionModeStencilRefGreaterBackAMD = 5083, + ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeOutputLinesEXT = 5269, + ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesEXT = 5270, + ExecutionModeOutputPrimitivesNV = 5270, + ExecutionModeDerivativeGroupQuadsNV = 5289, + ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesEXT = 5298, + ExecutionModeOutputTrianglesNV = 5298, + ExecutionModePixelInterlockOrderedEXT = 5366, + ExecutionModePixelInterlockUnorderedEXT = 5367, + ExecutionModeSampleInterlockOrderedEXT = 5368, + ExecutionModeSampleInterlockUnorderedEXT = 5369, + ExecutionModeShadingRateInterlockOrderedEXT = 5370, + ExecutionModeShadingRateInterlockUnorderedEXT = 5371, + ExecutionModeSharedLocalMemorySizeINTEL = 5618, + ExecutionModeRoundingModeRTPINTEL = 5620, + ExecutionModeRoundingModeRTNINTEL = 5621, + ExecutionModeFloatingPointModeALTINTEL = 5622, + ExecutionModeFloatingPointModeIEEEINTEL = 5623, + ExecutionModeMaxWorkgroupSizeINTEL = 5893, + ExecutionModeMaxWorkDimINTEL = 5894, + ExecutionModeNoGlobalOffsetINTEL = 5895, + ExecutionModeNumSIMDWorkitemsINTEL = 5896, + ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeMax = 0x7fffffff, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, + StorageClassStorageBuffer = 12, + StorageClassCallableDataKHR = 5328, + StorageClassCallableDataNV = 5328, + StorageClassIncomingCallableDataKHR = 5329, + StorageClassIncomingCallableDataNV = 5329, + StorageClassRayPayloadKHR = 5338, + StorageClassRayPayloadNV = 5338, + StorageClassHitAttributeKHR = 5339, + StorageClassHitAttributeNV = 5339, + StorageClassIncomingRayPayloadKHR = 5342, + StorageClassIncomingRayPayloadNV = 5342, + StorageClassShaderRecordBufferKHR = 5343, + StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, + StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassTaskPayloadWorkgroupEXT = 5402, + StorageClassCodeSectionINTEL = 5605, + StorageClassDeviceOnlyINTEL = 5936, + StorageClassHostOnlyINTEL = 5937, + StorageClassMax = 0x7fffffff, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, + DimMax = 0x7fffffff, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, + SamplerAddressingModeMax = 0x7fffffff, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, + SamplerFilterModeMax = 0x7fffffff, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, + ImageFormatR64ui = 40, + ImageFormatR64i = 41, + ImageFormatMax = 0x7fffffff, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, + ImageChannelOrderABGR = 19, + ImageChannelOrderMax = 0x7fffffff, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeMax = 0x7fffffff, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, + ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, + ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, + ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, + ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsSignExtendShift = 12, + ImageOperandsZeroExtendShift = 13, + ImageOperandsNontemporalShift = 14, + ImageOperandsOffsetsShift = 16, + ImageOperandsMax = 0x7fffffff, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, + ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, + ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, + ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, + ImageOperandsVolatileTexelKHRMask = 0x00000800, + ImageOperandsSignExtendMask = 0x00001000, + ImageOperandsZeroExtendMask = 0x00002000, + ImageOperandsNontemporalMask = 0x00004000, + ImageOperandsOffsetsMask = 0x00010000, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, + FPFastMathModeAllowContractFastINTELShift = 16, + FPFastMathModeAllowReassocINTELShift = 17, + FPFastMathModeMax = 0x7fffffff, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, + FPFastMathModeAllowContractFastINTELMask = 0x00010000, + FPFastMathModeAllowReassocINTELMask = 0x00020000, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, + FPRoundingModeMax = 0x7fffffff, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, + LinkageTypeLinkOnceODR = 2, + LinkageTypeMax = 0x7fffffff, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, + AccessQualifierMax = 0x7fffffff, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeMax = 0x7fffffff, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationUniformId = 27, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, + DecorationMaxByteOffset = 45, + DecorationAlignmentId = 46, + DecorationMaxByteOffsetId = 47, + DecorationNoSignedWrap = 4469, + DecorationNoUnsignedWrap = 4470, + DecorationExplicitInterpAMD = 4999, + DecorationOverrideCoverageNV = 5248, + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveEXT = 5271, + DecorationPerPrimitiveNV = 5271, + DecorationPerViewNV = 5272, + DecorationPerTaskNV = 5273, + DecorationPerVertexKHR = 5285, + DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, + DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, + DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, + DecorationAliasedPointerEXT = 5356, + DecorationBindlessSamplerNV = 5398, + DecorationBindlessImageNV = 5399, + DecorationBoundSamplerNV = 5400, + DecorationBoundImageNV = 5401, + DecorationSIMTCallINTEL = 5599, + DecorationReferencedIndirectlyINTEL = 5602, + DecorationClobberINTEL = 5607, + DecorationSideEffectsINTEL = 5608, + DecorationVectorComputeVariableINTEL = 5624, + DecorationFuncParamIOKindINTEL = 5625, + DecorationVectorComputeFunctionINTEL = 5626, + DecorationStackCallINTEL = 5627, + DecorationGlobalVariableOffsetINTEL = 5628, + DecorationCounterBuffer = 5634, + DecorationHlslCounterBufferGOOGLE = 5634, + DecorationHlslSemanticGOOGLE = 5635, + DecorationUserSemantic = 5635, + DecorationUserTypeGOOGLE = 5636, + DecorationFunctionRoundingModeINTEL = 5822, + DecorationFunctionDenormModeINTEL = 5823, + DecorationRegisterINTEL = 5825, + DecorationMemoryINTEL = 5826, + DecorationNumbanksINTEL = 5827, + DecorationBankwidthINTEL = 5828, + DecorationMaxPrivateCopiesINTEL = 5829, + DecorationSinglepumpINTEL = 5830, + DecorationDoublepumpINTEL = 5831, + DecorationMaxReplicatesINTEL = 5832, + DecorationSimpleDualPortINTEL = 5833, + DecorationMergeINTEL = 5834, + DecorationBankBitsINTEL = 5835, + DecorationForcePow2DepthINTEL = 5836, + DecorationBurstCoalesceINTEL = 5899, + DecorationCacheSizeINTEL = 5900, + DecorationDontStaticallyCoalesceINTEL = 5901, + DecorationPrefetchINTEL = 5902, + DecorationStallEnableINTEL = 5905, + DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationBufferLocationINTEL = 5921, + DecorationIOPipeStorageINTEL = 5944, + DecorationFunctionFloatingPointModeINTEL = 6080, + DecorationSingleElementVectorINTEL = 6085, + DecorationVectorComputeCallableFunctionINTEL = 6087, + DecorationMediaBlockIOINTEL = 6140, + DecorationMax = 0x7fffffff, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, + BuiltInSubgroupEqMask = 4416, + BuiltInSubgroupEqMaskKHR = 4416, + BuiltInSubgroupGeMask = 4417, + BuiltInSubgroupGeMaskKHR = 4417, + BuiltInSubgroupGtMask = 4418, + BuiltInSubgroupGtMaskKHR = 4418, + BuiltInSubgroupLeMask = 4419, + BuiltInSubgroupLeMaskKHR = 4419, + BuiltInSubgroupLtMask = 4420, + BuiltInSubgroupLtMaskKHR = 4420, + BuiltInBaseVertex = 4424, + BuiltInBaseInstance = 4425, + BuiltInDrawIndex = 4426, + BuiltInPrimitiveShadingRateKHR = 4432, + BuiltInDeviceIndex = 4438, + BuiltInViewIndex = 4440, + BuiltInShadingRateKHR = 4444, + BuiltInBaryCoordNoPerspAMD = 4992, + BuiltInBaryCoordNoPerspCentroidAMD = 4993, + BuiltInBaryCoordNoPerspSampleAMD = 4994, + BuiltInBaryCoordSmoothAMD = 4995, + BuiltInBaryCoordSmoothCentroidAMD = 4996, + BuiltInBaryCoordSmoothSampleAMD = 4997, + BuiltInBaryCoordPullModelAMD = 4998, + BuiltInFragStencilRefEXT = 5014, + BuiltInViewportMaskNV = 5253, + BuiltInSecondaryPositionNV = 5257, + BuiltInSecondaryViewportMaskNV = 5258, + BuiltInPositionPerViewNV = 5261, + BuiltInViewportMaskPerViewNV = 5262, + BuiltInFullyCoveredEXT = 5264, + BuiltInTaskCountNV = 5274, + BuiltInPrimitiveCountNV = 5275, + BuiltInPrimitiveIndicesNV = 5276, + BuiltInClipDistancePerViewNV = 5277, + BuiltInCullDistancePerViewNV = 5278, + BuiltInLayerPerViewNV = 5279, + BuiltInMeshViewCountNV = 5280, + BuiltInMeshViewIndicesNV = 5281, + BuiltInBaryCoordKHR = 5286, + BuiltInBaryCoordNV = 5286, + BuiltInBaryCoordNoPerspKHR = 5287, + BuiltInBaryCoordNoPerspNV = 5287, + BuiltInFragSizeEXT = 5292, + BuiltInFragmentSizeNV = 5292, + BuiltInFragInvocationCountEXT = 5293, + BuiltInInvocationsPerPixelNV = 5293, + BuiltInPrimitivePointIndicesEXT = 5294, + BuiltInPrimitiveLineIndicesEXT = 5295, + BuiltInPrimitiveTriangleIndicesEXT = 5296, + BuiltInCullPrimitiveEXT = 5299, + BuiltInLaunchIdKHR = 5319, + BuiltInLaunchIdNV = 5319, + BuiltInLaunchSizeKHR = 5320, + BuiltInLaunchSizeNV = 5320, + BuiltInWorldRayOriginKHR = 5321, + BuiltInWorldRayOriginNV = 5321, + BuiltInWorldRayDirectionKHR = 5322, + BuiltInWorldRayDirectionNV = 5322, + BuiltInObjectRayOriginKHR = 5323, + BuiltInObjectRayOriginNV = 5323, + BuiltInObjectRayDirectionKHR = 5324, + BuiltInObjectRayDirectionNV = 5324, + BuiltInRayTminKHR = 5325, + BuiltInRayTminNV = 5325, + BuiltInRayTmaxKHR = 5326, + BuiltInRayTmaxNV = 5326, + BuiltInInstanceCustomIndexKHR = 5327, + BuiltInInstanceCustomIndexNV = 5327, + BuiltInObjectToWorldKHR = 5330, + BuiltInObjectToWorldNV = 5330, + BuiltInWorldToObjectKHR = 5331, + BuiltInWorldToObjectNV = 5331, + BuiltInHitTNV = 5332, + BuiltInHitKindKHR = 5333, + BuiltInHitKindNV = 5333, + BuiltInCurrentRayTimeNV = 5334, + BuiltInIncomingRayFlagsKHR = 5351, + BuiltInIncomingRayFlagsNV = 5351, + BuiltInRayGeometryIndexKHR = 5352, + BuiltInWarpsPerSMNV = 5374, + BuiltInSMCountNV = 5375, + BuiltInWarpIDNV = 5376, + BuiltInSMIDNV = 5377, + BuiltInCullMaskKHR = 6021, + BuiltInMax = 0x7fffffff, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, + SelectionControlMax = 0x7fffffff, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, + LoopControlDependencyInfiniteShift = 2, + LoopControlDependencyLengthShift = 3, + LoopControlMinIterationsShift = 4, + LoopControlMaxIterationsShift = 5, + LoopControlIterationMultipleShift = 6, + LoopControlPeelCountShift = 7, + LoopControlPartialCountShift = 8, + LoopControlInitiationIntervalINTELShift = 16, + LoopControlMaxConcurrencyINTELShift = 17, + LoopControlDependencyArrayINTELShift = 18, + LoopControlPipelineEnableINTELShift = 19, + LoopControlLoopCoalesceINTELShift = 20, + LoopControlMaxInterleavingINTELShift = 21, + LoopControlSpeculatedIterationsINTELShift = 22, + LoopControlNoFusionINTELShift = 23, + LoopControlMax = 0x7fffffff, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, + LoopControlDependencyInfiniteMask = 0x00000004, + LoopControlDependencyLengthMask = 0x00000008, + LoopControlMinIterationsMask = 0x00000010, + LoopControlMaxIterationsMask = 0x00000020, + LoopControlIterationMultipleMask = 0x00000040, + LoopControlPeelCountMask = 0x00000080, + LoopControlPartialCountMask = 0x00000100, + LoopControlInitiationIntervalINTELMask = 0x00010000, + LoopControlMaxConcurrencyINTELMask = 0x00020000, + LoopControlDependencyArrayINTELMask = 0x00040000, + LoopControlPipelineEnableINTELMask = 0x00080000, + LoopControlLoopCoalesceINTELMask = 0x00100000, + LoopControlMaxInterleavingINTELMask = 0x00200000, + LoopControlSpeculatedIterationsINTELMask = 0x00400000, + LoopControlNoFusionINTELMask = 0x00800000, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, + FunctionControlOptNoneINTELShift = 16, + FunctionControlMax = 0x7fffffff, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, + FunctionControlOptNoneINTELMask = 0x00010000, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, + MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, + MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, + MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsVolatileShift = 15, + MemorySemanticsMax = 0x7fffffff, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, + MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, + MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, + MemorySemanticsMakeVisibleKHRMask = 0x00004000, + MemorySemanticsVolatileMask = 0x00008000, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, + MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, + MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, + MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessMax = 0x7fffffff, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, + MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, + MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, + MemoryAccessNonPrivatePointerKHRMask = 0x00000020, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, + ScopeQueueFamily = 5, + ScopeQueueFamilyKHR = 5, + ScopeShaderCallKHR = 6, + ScopeMax = 0x7fffffff, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, + GroupOperationClusteredReduce = 3, + GroupOperationPartitionedReduceNV = 6, + GroupOperationPartitionedInclusiveScanNV = 7, + GroupOperationPartitionedExclusiveScanNV = 8, + GroupOperationMax = 0x7fffffff, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, + KernelEnqueueFlagsMax = 0x7fffffff, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, + KernelProfilingInfoMax = 0x7fffffff, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, + CapabilitySubgroupDispatch = 58, + CapabilityNamedBarrier = 59, + CapabilityPipeStorage = 60, + CapabilityGroupNonUniform = 61, + CapabilityGroupNonUniformVote = 62, + CapabilityGroupNonUniformArithmetic = 63, + CapabilityGroupNonUniformBallot = 64, + CapabilityGroupNonUniformShuffle = 65, + CapabilityGroupNonUniformShuffleRelative = 66, + CapabilityGroupNonUniformClustered = 67, + CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, + CapabilityUniformDecoration = 71, + CapabilityFragmentShadingRateKHR = 4422, + CapabilitySubgroupBallotKHR = 4423, + CapabilityDrawParameters = 4427, + CapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, + CapabilitySubgroupVoteKHR = 4431, + CapabilityStorageBuffer16BitAccess = 4433, + CapabilityStorageUniformBufferBlock16 = 4433, + CapabilityStorageUniform16 = 4434, + CapabilityUniformAndStorageBuffer16BitAccess = 4434, + CapabilityStoragePushConstant16 = 4435, + CapabilityStorageInputOutput16 = 4436, + CapabilityDeviceGroup = 4437, + CapabilityMultiView = 4439, + CapabilityVariablePointersStorageBuffer = 4441, + CapabilityVariablePointers = 4442, + CapabilityAtomicStorageOps = 4445, + CapabilitySampleMaskPostDepthCoverage = 4447, + CapabilityStorageBuffer8BitAccess = 4448, + CapabilityUniformAndStorageBuffer8BitAccess = 4449, + CapabilityStoragePushConstant8 = 4450, + CapabilityDenormPreserve = 4464, + CapabilityDenormFlushToZero = 4465, + CapabilitySignedZeroInfNanPreserve = 4466, + CapabilityRoundingModeRTE = 4467, + CapabilityRoundingModeRTZ = 4468, + CapabilityRayQueryProvisionalKHR = 4471, + CapabilityRayQueryKHR = 4472, + CapabilityRayTraversalPrimitiveCullingKHR = 4478, + CapabilityRayTracingKHR = 4479, + CapabilityFloat16ImageAMD = 5008, + CapabilityImageGatherBiasLodAMD = 5009, + CapabilityFragmentMaskAMD = 5010, + CapabilityStencilExportEXT = 5013, + CapabilityImageReadWriteLodAMD = 5015, + CapabilityInt64ImageEXT = 5016, + CapabilityShaderClockKHR = 5055, + CapabilitySampleMaskOverrideCoverageNV = 5249, + CapabilityGeometryShaderPassthroughNV = 5251, + CapabilityShaderViewportIndexLayerEXT = 5254, + CapabilityShaderViewportIndexLayerNV = 5254, + CapabilityShaderViewportMaskNV = 5255, + CapabilityShaderStereoViewNV = 5259, + CapabilityPerViewAttributesNV = 5260, + CapabilityFragmentFullyCoveredEXT = 5265, + CapabilityMeshShadingNV = 5266, + CapabilityImageFootprintNV = 5282, + CapabilityMeshShadingEXT = 5283, + CapabilityFragmentBarycentricKHR = 5284, + CapabilityFragmentBarycentricNV = 5284, + CapabilityComputeDerivativeGroupQuadsNV = 5288, + CapabilityFragmentDensityEXT = 5291, + CapabilityShadingRateNV = 5291, + CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, + CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, + CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, + CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, + CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, + CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, + CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, + CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, + CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + CapabilityRayTracingNV = 5340, + CapabilityRayTracingMotionBlurNV = 5341, + CapabilityVulkanMemoryModel = 5345, + CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, + CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, + CapabilityPhysicalStorageBufferAddressesEXT = 5347, + CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilityRayTracingProvisionalKHR = 5353, + CapabilityCooperativeMatrixNV = 5357, + CapabilityFragmentShaderSampleInterlockEXT = 5363, + CapabilityFragmentShaderShadingRateInterlockEXT = 5372, + CapabilityShaderSMBuiltinsNV = 5373, + CapabilityFragmentShaderPixelInterlockEXT = 5378, + CapabilityDemoteToHelperInvocation = 5379, + CapabilityDemoteToHelperInvocationEXT = 5379, + CapabilityBindlessTextureNV = 5390, + CapabilitySubgroupShuffleINTEL = 5568, + CapabilitySubgroupBufferBlockIOINTEL = 5569, + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityRoundToInfinityINTEL = 5582, + CapabilityFloatingPointModeINTEL = 5583, + CapabilityIntegerFunctions2INTEL = 5584, + CapabilityFunctionPointersINTEL = 5603, + CapabilityIndirectReferencesINTEL = 5604, + CapabilityAsmINTEL = 5606, + CapabilityAtomicFloat32MinMaxEXT = 5612, + CapabilityAtomicFloat64MinMaxEXT = 5613, + CapabilityAtomicFloat16MinMaxEXT = 5616, + CapabilityVectorComputeINTEL = 5617, + CapabilityVectorAnyINTEL = 5619, + CapabilityExpectAssumeKHR = 5629, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + CapabilityVariableLengthArrayINTEL = 5817, + CapabilityFunctionFloatControlINTEL = 5821, + CapabilityFPGAMemoryAttributesINTEL = 5824, + CapabilityFPFastMathModeINTEL = 5837, + CapabilityArbitraryPrecisionIntegersINTEL = 5844, + CapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + CapabilityUnstructuredLoopControlsINTEL = 5886, + CapabilityFPGALoopControlsINTEL = 5888, + CapabilityKernelAttributesINTEL = 5892, + CapabilityFPGAKernelAttributesINTEL = 5897, + CapabilityFPGAMemoryAccessesINTEL = 5898, + CapabilityFPGAClusterAttributesINTEL = 5904, + CapabilityLoopFuseINTEL = 5906, + CapabilityFPGABufferLocationINTEL = 5920, + CapabilityArbitraryPrecisionFixedPointINTEL = 5922, + CapabilityUSMStorageClassesINTEL = 5935, + CapabilityIOPipesINTEL = 5943, + CapabilityBlockingPipesINTEL = 5945, + CapabilityFPGARegINTEL = 5948, + CapabilityDotProductInputAll = 6016, + CapabilityDotProductInputAllKHR = 6016, + CapabilityDotProductInput4x8Bit = 6017, + CapabilityDotProductInput4x8BitKHR = 6017, + CapabilityDotProductInput4x8BitPacked = 6018, + CapabilityDotProductInput4x8BitPackedKHR = 6018, + CapabilityDotProduct = 6019, + CapabilityDotProductKHR = 6019, + CapabilityRayCullMaskKHR = 6020, + CapabilityBitInstructions = 6025, + CapabilityAtomicFloat32AddEXT = 6033, + CapabilityAtomicFloat64AddEXT = 6034, + CapabilityLongConstantCompositeINTEL = 6089, + CapabilityOptNoneINTEL = 6094, + CapabilityAtomicFloat16AddEXT = 6095, + CapabilityDebugInfoModuleINTEL = 6114, + CapabilityMax = 0x7fffffff, +}; + +enum RayFlagsShift { + RayFlagsOpaqueKHRShift = 0, + RayFlagsNoOpaqueKHRShift = 1, + RayFlagsTerminateOnFirstHitKHRShift = 2, + RayFlagsSkipClosestHitShaderKHRShift = 3, + RayFlagsCullBackFacingTrianglesKHRShift = 4, + RayFlagsCullFrontFacingTrianglesKHRShift = 5, + RayFlagsCullOpaqueKHRShift = 6, + RayFlagsCullNoOpaqueKHRShift = 7, + RayFlagsSkipTrianglesKHRShift = 8, + RayFlagsSkipAABBsKHRShift = 9, + RayFlagsMax = 0x7fffffff, +}; + +enum RayFlagsMask { + RayFlagsMaskNone = 0, + RayFlagsOpaqueKHRMask = 0x00000001, + RayFlagsNoOpaqueKHRMask = 0x00000002, + RayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + RayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + RayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + RayFlagsCullOpaqueKHRMask = 0x00000040, + RayFlagsCullNoOpaqueKHRMask = 0x00000080, + RayFlagsSkipTrianglesKHRMask = 0x00000100, + RayFlagsSkipAABBsKHRMask = 0x00000200, +}; + +enum RayQueryIntersection { + RayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + RayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + RayQueryIntersectionMax = 0x7fffffff, +}; + +enum RayQueryCommittedIntersectionType { + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + RayQueryCommittedIntersectionTypeMax = 0x7fffffff, +}; + +enum RayQueryCandidateIntersectionType { + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + RayQueryCandidateIntersectionTypeMax = 0x7fffffff, +}; + +enum FragmentShadingRateShift { + FragmentShadingRateVertical2PixelsShift = 0, + FragmentShadingRateVertical4PixelsShift = 1, + FragmentShadingRateHorizontal2PixelsShift = 2, + FragmentShadingRateHorizontal4PixelsShift = 3, + FragmentShadingRateMax = 0x7fffffff, +}; + +enum FragmentShadingRateMask { + FragmentShadingRateMaskNone = 0, + FragmentShadingRateVertical2PixelsMask = 0x00000001, + FragmentShadingRateVertical4PixelsMask = 0x00000002, + FragmentShadingRateHorizontal2PixelsMask = 0x00000004, + FragmentShadingRateHorizontal4PixelsMask = 0x00000008, +}; + +enum FPDenormMode { + FPDenormModePreserve = 0, + FPDenormModeFlushToZero = 1, + FPDenormModeMax = 0x7fffffff, +}; + +enum FPOperationMode { + FPOperationModeIEEE = 0, + FPOperationModeALT = 1, + FPOperationModeMax = 0x7fffffff, +}; + +enum QuantizationModes { + QuantizationModesTRN = 0, + QuantizationModesTRN_ZERO = 1, + QuantizationModesRND = 2, + QuantizationModesRND_ZERO = 3, + QuantizationModesRND_INF = 4, + QuantizationModesRND_MIN_INF = 5, + QuantizationModesRND_CONV = 6, + QuantizationModesRND_CONV_ODD = 7, + QuantizationModesMax = 0x7fffffff, +}; + +enum OverflowModes { + OverflowModesWRAP = 0, + OverflowModesSAT = 1, + OverflowModesSAT_ZERO = 2, + OverflowModesSAT_SYM = 3, + OverflowModesMax = 0x7fffffff, +}; + +enum PackedVectorFormat { + PackedVectorFormatPackedVectorFormat4x8Bit = 0, + PackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + PackedVectorFormatMax = 0x7fffffff, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpTerminateInvocation = 4416, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpTraceRayKHR = 4445, + OpExecuteCallableKHR = 4446, + OpConvertUToAccelerationStructureKHR = 4447, + OpIgnoreIntersectionKHR = 4448, + OpTerminateRayKHR = 4449, + OpSDot = 4450, + OpSDotKHR = 4450, + OpUDot = 4451, + OpUDotKHR = 4451, + OpSUDot = 4452, + OpSUDotKHR = 4452, + OpSDotAccSat = 4453, + OpSDotAccSatKHR = 4453, + OpUDotAccSat = 4454, + OpUDotAccSatKHR = 4454, + OpSUDotAccSat = 4455, + OpSUDotAccSatKHR = 4455, + OpTypeRayQueryKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, + OpImageSampleFootprintNV = 5283, + OpEmitMeshTasksEXT = 5294, + OpSetMeshOutputsEXT = 5295, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTraceMotionNV = 5338, + OpTraceRayMotionNV = 5339, + OpTypeAccelerationStructureKHR = 5341, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocation = 5380, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpConvertUToImageNV = 5391, + OpConvertUToSamplerNV = 5392, + OpConvertImageToUNV = 5393, + OpConvertSamplerToUNV = 5394, + OpConvertUToSampledImageNV = 5395, + OpConvertSampledImageToUNV = 5396, + OpSamplerImageAddressingModeNV = 5397, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpConstantFunctionPointerINTEL = 5600, + OpFunctionPointerCallINTEL = 5601, + OpAsmTargetINTEL = 5609, + OpAsmINTEL = 5610, + OpAsmCallINTEL = 5611, + OpAtomicFMinEXT = 5614, + OpAtomicFMaxEXT = 5615, + OpAssumeTrueKHR = 5630, + OpExpectKHR = 5631, + OpDecorateString = 5632, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, + OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpVariableLengthArrayINTEL = 5818, + OpSaveMemoryINTEL = 5819, + OpRestoreMemoryINTEL = 5820, + OpArbitraryFloatSinCosPiINTEL = 5840, + OpArbitraryFloatCastINTEL = 5841, + OpArbitraryFloatCastFromIntINTEL = 5842, + OpArbitraryFloatCastToIntINTEL = 5843, + OpArbitraryFloatAddINTEL = 5846, + OpArbitraryFloatSubINTEL = 5847, + OpArbitraryFloatMulINTEL = 5848, + OpArbitraryFloatDivINTEL = 5849, + OpArbitraryFloatGTINTEL = 5850, + OpArbitraryFloatGEINTEL = 5851, + OpArbitraryFloatLTINTEL = 5852, + OpArbitraryFloatLEINTEL = 5853, + OpArbitraryFloatEQINTEL = 5854, + OpArbitraryFloatRecipINTEL = 5855, + OpArbitraryFloatRSqrtINTEL = 5856, + OpArbitraryFloatCbrtINTEL = 5857, + OpArbitraryFloatHypotINTEL = 5858, + OpArbitraryFloatSqrtINTEL = 5859, + OpArbitraryFloatLogINTEL = 5860, + OpArbitraryFloatLog2INTEL = 5861, + OpArbitraryFloatLog10INTEL = 5862, + OpArbitraryFloatLog1pINTEL = 5863, + OpArbitraryFloatExpINTEL = 5864, + OpArbitraryFloatExp2INTEL = 5865, + OpArbitraryFloatExp10INTEL = 5866, + OpArbitraryFloatExpm1INTEL = 5867, + OpArbitraryFloatSinINTEL = 5868, + OpArbitraryFloatCosINTEL = 5869, + OpArbitraryFloatSinCosINTEL = 5870, + OpArbitraryFloatSinPiINTEL = 5871, + OpArbitraryFloatCosPiINTEL = 5872, + OpArbitraryFloatASinINTEL = 5873, + OpArbitraryFloatASinPiINTEL = 5874, + OpArbitraryFloatACosINTEL = 5875, + OpArbitraryFloatACosPiINTEL = 5876, + OpArbitraryFloatATanINTEL = 5877, + OpArbitraryFloatATanPiINTEL = 5878, + OpArbitraryFloatATan2INTEL = 5879, + OpArbitraryFloatPowINTEL = 5880, + OpArbitraryFloatPowRINTEL = 5881, + OpArbitraryFloatPowNINTEL = 5882, + OpLoopControlINTEL = 5887, + OpFixedSqrtINTEL = 5923, + OpFixedRecipINTEL = 5924, + OpFixedRsqrtINTEL = 5925, + OpFixedSinINTEL = 5926, + OpFixedCosINTEL = 5927, + OpFixedSinCosINTEL = 5928, + OpFixedSinPiINTEL = 5929, + OpFixedCosPiINTEL = 5930, + OpFixedSinCosPiINTEL = 5931, + OpFixedLogINTEL = 5932, + OpFixedExpINTEL = 5933, + OpPtrCastToCrossWorkgroupINTEL = 5934, + OpCrossWorkgroupCastToPtrINTEL = 5938, + OpReadPipeBlockingINTEL = 5946, + OpWritePipeBlockingINTEL = 5947, + OpFPGARegINTEL = 5949, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpAtomicFAddEXT = 6035, + OpTypeBufferSurfaceINTEL = 6086, + OpTypeStructContinuedINTEL = 6090, + OpConstantCompositeContinuedINTEL = 6091, + OpSpecConstantCompositeContinuedINTEL = 6092, + OpMax = 0x7fffffff, +}; + +#ifdef SPV_ENABLE_UTILITY_CODE +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case OpNop: *hasResult = false; *hasResultType = false; break; + case OpUndef: *hasResult = true; *hasResultType = true; break; + case OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case OpSource: *hasResult = false; *hasResultType = false; break; + case OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case OpName: *hasResult = false; *hasResultType = false; break; + case OpMemberName: *hasResult = false; *hasResultType = false; break; + case OpString: *hasResult = true; *hasResultType = false; break; + case OpLine: *hasResult = false; *hasResultType = false; break; + case OpExtension: *hasResult = false; *hasResultType = false; break; + case OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case OpExtInst: *hasResult = true; *hasResultType = true; break; + case OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case OpCapability: *hasResult = false; *hasResultType = false; break; + case OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case OpTypeBool: *hasResult = true; *hasResultType = false; break; + case OpTypeInt: *hasResult = true; *hasResultType = false; break; + case OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case OpTypeVector: *hasResult = true; *hasResultType = false; break; + case OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case OpTypeImage: *hasResult = true; *hasResultType = false; break; + case OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case OpTypeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case OpTypePointer: *hasResult = true; *hasResultType = false; break; + case OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case OpTypePipe: *hasResult = true; *hasResultType = false; break; + case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpConstant: *hasResult = true; *hasResultType = true; break; + case OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case OpConstantNull: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case OpFunction: *hasResult = true; *hasResultType = true; break; + case OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case OpVariable: *hasResult = true; *hasResultType = true; break; + case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case OpLoad: *hasResult = true; *hasResultType = true; break; + case OpStore: *hasResult = false; *hasResultType = false; break; + case OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case OpAccessChain: *hasResult = true; *hasResultType = true; break; + case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpArrayLength: *hasResult = true; *hasResultType = true; break; + case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpDecorate: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case OpCopyObject: *hasResult = true; *hasResultType = true; break; + case OpTranspose: *hasResult = true; *hasResultType = true; break; + case OpSampledImage: *hasResult = true; *hasResultType = true; break; + case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageFetch: *hasResult = true; *hasResultType = true; break; + case OpImageGather: *hasResult = true; *hasResultType = true; break; + case OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageRead: *hasResult = true; *hasResultType = true; break; + case OpImageWrite: *hasResult = false; *hasResultType = false; break; + case OpImage: *hasResult = true; *hasResultType = true; break; + case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case OpUConvert: *hasResult = true; *hasResultType = true; break; + case OpSConvert: *hasResult = true; *hasResultType = true; break; + case OpFConvert: *hasResult = true; *hasResultType = true; break; + case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case OpBitcast: *hasResult = true; *hasResultType = true; break; + case OpSNegate: *hasResult = true; *hasResultType = true; break; + case OpFNegate: *hasResult = true; *hasResultType = true; break; + case OpIAdd: *hasResult = true; *hasResultType = true; break; + case OpFAdd: *hasResult = true; *hasResultType = true; break; + case OpISub: *hasResult = true; *hasResultType = true; break; + case OpFSub: *hasResult = true; *hasResultType = true; break; + case OpIMul: *hasResult = true; *hasResultType = true; break; + case OpFMul: *hasResult = true; *hasResultType = true; break; + case OpUDiv: *hasResult = true; *hasResultType = true; break; + case OpSDiv: *hasResult = true; *hasResultType = true; break; + case OpFDiv: *hasResult = true; *hasResultType = true; break; + case OpUMod: *hasResult = true; *hasResultType = true; break; + case OpSRem: *hasResult = true; *hasResultType = true; break; + case OpSMod: *hasResult = true; *hasResultType = true; break; + case OpFRem: *hasResult = true; *hasResultType = true; break; + case OpFMod: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case OpDot: *hasResult = true; *hasResultType = true; break; + case OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case OpAny: *hasResult = true; *hasResultType = true; break; + case OpAll: *hasResult = true; *hasResultType = true; break; + case OpIsNan: *hasResult = true; *hasResultType = true; break; + case OpIsInf: *hasResult = true; *hasResultType = true; break; + case OpIsFinite: *hasResult = true; *hasResultType = true; break; + case OpIsNormal: *hasResult = true; *hasResultType = true; break; + case OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case OpOrdered: *hasResult = true; *hasResultType = true; break; + case OpUnordered: *hasResult = true; *hasResultType = true; break; + case OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case OpSelect: *hasResult = true; *hasResultType = true; break; + case OpIEqual: *hasResult = true; *hasResultType = true; break; + case OpINotEqual: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpULessThan: *hasResult = true; *hasResultType = true; break; + case OpSLessThan: *hasResult = true; *hasResultType = true; break; + case OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpNot: *hasResult = true; *hasResultType = true; break; + case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case OpBitReverse: *hasResult = true; *hasResultType = true; break; + case OpBitCount: *hasResult = true; *hasResultType = true; break; + case OpDPdx: *hasResult = true; *hasResultType = true; break; + case OpDPdy: *hasResult = true; *hasResultType = true; break; + case OpFwidth: *hasResult = true; *hasResultType = true; break; + case OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case OpPhi: *hasResult = true; *hasResultType = true; break; + case OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case OpLabel: *hasResult = true; *hasResultType = false; break; + case OpBranch: *hasResult = false; *hasResultType = false; break; + case OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case OpSwitch: *hasResult = false; *hasResultType = false; break; + case OpKill: *hasResult = false; *hasResultType = false; break; + case OpReturn: *hasResult = false; *hasResultType = false; break; + case OpReturnValue: *hasResult = false; *hasResultType = false; break; + case OpUnreachable: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case OpGroupAll: *hasResult = true; *hasResultType = true; break; + case OpGroupAny: *hasResult = true; *hasResultType = true; break; + case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case OpReadPipe: *hasResult = true; *hasResultType = true; break; + case OpWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case OpNoLine: *hasResult = false; *hasResultType = false; break; + case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case OpSizeOf: *hasResult = true; *hasResultType = true; break; + case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case OpDecorateId: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case OpSDot: *hasResult = true; *hasResultType = true; break; + case OpUDot: *hasResult = true; *hasResultType = true; break; + case OpSUDot: *hasResult = true; *hasResultType = true; break; + case OpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case OpTraceNV: *hasResult = false; *hasResultType = false; break; + case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case OpExpectKHR: *hasResult = true; *hasResultType = true; break; + case OpDecorateString: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case OpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case OpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case OpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case OpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case OpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case OpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case OpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } +inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/spvIR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/spvIR.h new file mode 100644 index 0000000..0969127 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/SPIRV/spvIR.h @@ -0,0 +1,520 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// SPIRV-IR +// +// Simple in-memory representation (IR) of SPIRV. Just for holding +// Each function's CFG of blocks. Has this hierarchy: +// - Module, which is a list of +// - Function, which is a list of +// - Block, which is a list of +// - Instruction +// + +#pragma once +#ifndef spvIR_H +#define spvIR_H + +#include "spirv.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +class Block; +class Function; +class Module; + +const Id NoResult = 0; +const Id NoType = 0; + +const Decoration NoPrecision = DecorationMax; + +#ifdef __GNUC__ +# define POTENTIALLY_UNUSED __attribute__((unused)) +#else +# define POTENTIALLY_UNUSED +#endif + +POTENTIALLY_UNUSED +const MemorySemanticsMask MemorySemanticsAllMemory = + (MemorySemanticsMask)(MemorySemanticsUniformMemoryMask | + MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask | + MemorySemanticsImageMemoryMask); + +struct IdImmediate { + bool isId; // true if word is an Id, false if word is an immediate + unsigned word; + IdImmediate(bool i, unsigned w) : isId(i), word(w) {} +}; + +// +// SPIR-V IR instruction. +// + +class Instruction { +public: + Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { } + explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { } + virtual ~Instruction() {} + void addIdOperand(Id id) { + operands.push_back(id); + idOperand.push_back(true); + } + void addImmediateOperand(unsigned int immediate) { + operands.push_back(immediate); + idOperand.push_back(false); + } + void setImmediateOperand(unsigned idx, unsigned int immediate) { + assert(!idOperand[idx]); + operands[idx] = immediate; + } + + void addStringOperand(const char* str) + { + unsigned int word = 0; + unsigned int shiftAmount = 0; + char c; + + do { + c = *(str++); + word |= ((unsigned int)c) << shiftAmount; + shiftAmount += 8; + if (shiftAmount == 32) { + addImmediateOperand(word); + word = 0; + shiftAmount = 0; + } + } while (c != 0); + + // deal with partial last word + if (shiftAmount > 0) { + addImmediateOperand(word); + } + } + bool isIdOperand(int op) const { return idOperand[op]; } + void setBlock(Block* b) { block = b; } + Block* getBlock() const { return block; } + Op getOpCode() const { return opCode; } + int getNumOperands() const + { + assert(operands.size() == idOperand.size()); + return (int)operands.size(); + } + Id getResultId() const { return resultId; } + Id getTypeId() const { return typeId; } + Id getIdOperand(int op) const { + assert(idOperand[op]); + return operands[op]; + } + unsigned int getImmediateOperand(int op) const { + assert(!idOperand[op]); + return operands[op]; + } + + // Write out the binary form. + void dump(std::vector& out) const + { + // Compute the wordCount + unsigned int wordCount = 1; + if (typeId) + ++wordCount; + if (resultId) + ++wordCount; + wordCount += (unsigned int)operands.size(); + + // Write out the beginning of the instruction + out.push_back(((wordCount) << WordCountShift) | opCode); + if (typeId) + out.push_back(typeId); + if (resultId) + out.push_back(resultId); + + // Write out the operands + for (int op = 0; op < (int)operands.size(); ++op) + out.push_back(operands[op]); + } + +protected: + Instruction(const Instruction&); + Id resultId; + Id typeId; + Op opCode; + std::vector operands; // operands, both and immediates (both are unsigned int) + std::vector idOperand; // true for operands that are , false for immediates + Block* block; +}; + +// +// SPIR-V IR block. +// + +class Block { +public: + Block(Id id, Function& parent); + virtual ~Block() + { + } + + Id getId() { return instructions.front()->getResultId(); } + + Function& getParent() const { return parent; } + void addInstruction(std::unique_ptr inst); + void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} + void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } + const std::vector& getPredecessors() const { return predecessors; } + const std::vector& getSuccessors() const { return successors; } + const std::vector >& getInstructions() const { + return instructions; + } + const std::vector >& getLocalVariables() const { return localVariables; } + void setUnreachable() { unreachable = true; } + bool isUnreachable() const { return unreachable; } + // Returns the block's merge instruction, if one exists (otherwise null). + const Instruction* getMergeInstruction() const { + if (instructions.size() < 2) return nullptr; + const Instruction* nextToLast = (instructions.cend() - 2)->get(); + switch (nextToLast->getOpCode()) { + case OpSelectionMerge: + case OpLoopMerge: + return nextToLast; + default: + return nullptr; + } + return nullptr; + } + + // Change this block into a canonical dead merge block. Delete instructions + // as necessary. A canonical dead merge block has only an OpLabel and an + // OpUnreachable. + void rewriteAsCanonicalUnreachableMerge() { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + addInstruction(std::unique_ptr(new Instruction(OpUnreachable))); + } + // Change this block into a canonical dead continue target branching to the + // given header ID. Delete instructions as necessary. A canonical dead continue + // target has only an OpLabel and an unconditional branch back to the corresponding + // header. + void rewriteAsCanonicalUnreachableContinue(Block* header) { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + // Add OpBranch back to the header. + assert(header != nullptr); + Instruction* branch = new Instruction(OpBranch); + branch->addIdOperand(header->getId()); + addInstruction(std::unique_ptr(branch)); + successors.push_back(header); + } + + bool isTerminated() const + { + switch (instructions.back()->getOpCode()) { + case OpBranch: + case OpBranchConditional: + case OpSwitch: + case OpKill: + case OpTerminateInvocation: + case OpReturn: + case OpReturnValue: + case OpUnreachable: + return true; + default: + return false; + } + } + + void dump(std::vector& out) const + { + instructions[0]->dump(out); + for (int i = 0; i < (int)localVariables.size(); ++i) + localVariables[i]->dump(out); + for (int i = 1; i < (int)instructions.size(); ++i) + instructions[i]->dump(out); + } + +protected: + Block(const Block&); + Block& operator=(Block&); + + // To enforce keeping parent and ownership in sync: + friend Function; + + std::vector > instructions; + std::vector predecessors, successors; + std::vector > localVariables; + Function& parent; + + // track whether this block is known to be uncreachable (not necessarily + // true for all unreachable blocks, but should be set at least + // for the extraneous ones introduced by the builder). + bool unreachable; +}; + +// The different reasons for reaching a block in the inReadableOrder traversal. +enum ReachReason { + // Reachable from the entry block via transfers of control, i.e. branches. + ReachViaControlFlow = 0, + // A continue target that is not reachable via control flow. + ReachDeadContinue, + // A merge block that is not reachable via control flow. + ReachDeadMerge +}; + +// Traverses the control-flow graph rooted at root in an order suited for +// readable code generation. Invokes callback at every node in the traversal +// order. The callback arguments are: +// - the block, +// - the reason we reached the block, +// - if the reason was that block is an unreachable continue or unreachable merge block +// then the last parameter is the corresponding header block. +void inReadableOrder(Block* root, std::function callback); + +// +// SPIR-V IR Function. +// + +class Function { +public: + Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent); + virtual ~Function() + { + for (int i = 0; i < (int)parameterInstructions.size(); ++i) + delete parameterInstructions[i]; + + for (int i = 0; i < (int)blocks.size(); ++i) + delete blocks[i]; + } + Id getId() const { return functionInstruction.getResultId(); } + Id getParamId(int p) const { return parameterInstructions[p]->getResultId(); } + Id getParamType(int p) const { return parameterInstructions[p]->getTypeId(); } + + void addBlock(Block* block) { blocks.push_back(block); } + void removeBlock(Block* block) + { + auto found = find(blocks.begin(), blocks.end(), block); + assert(found != blocks.end()); + blocks.erase(found); + delete block; + } + + Module& getParent() const { return parent; } + Block* getEntryBlock() const { return blocks.front(); } + Block* getLastBlock() const { return blocks.back(); } + const std::vector& getBlocks() const { return blocks; } + void addLocalVariable(std::unique_ptr inst); + Id getReturnType() const { return functionInstruction.getTypeId(); } + Id getFuncId() const { return functionInstruction.getResultId(); } + void setReturnPrecision(Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionReturn = true; + } + Decoration getReturnPrecision() const + { return reducedPrecisionReturn ? DecorationRelaxedPrecision : NoPrecision; } + + void setDebugLineInfo(Id fileName, int line, int column) { + lineInstruction = std::unique_ptr{new Instruction(OpLine)}; + lineInstruction->addIdOperand(fileName); + lineInstruction->addImmediateOperand(line); + lineInstruction->addImmediateOperand(column); + } + bool hasDebugLineInfo() const { return lineInstruction != nullptr; } + + void setImplicitThis() { implicitThis = true; } + bool hasImplicitThis() const { return implicitThis; } + + void addParamPrecision(unsigned param, Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionParams.insert(param); + } + Decoration getParamPrecision(unsigned param) const + { + return reducedPrecisionParams.find(param) != reducedPrecisionParams.end() ? + DecorationRelaxedPrecision : NoPrecision; + } + + void dump(std::vector& out) const + { + // OpLine + if (lineInstruction != nullptr) { + lineInstruction->dump(out); + } + + // OpFunction + functionInstruction.dump(out); + + // OpFunctionParameter + for (int p = 0; p < (int)parameterInstructions.size(); ++p) + parameterInstructions[p]->dump(out); + + // Blocks + inReadableOrder(blocks[0], [&out](const Block* b, ReachReason, Block*) { b->dump(out); }); + Instruction end(0, 0, OpFunctionEnd); + end.dump(out); + } + +protected: + Function(const Function&); + Function& operator=(Function&); + + Module& parent; + std::unique_ptr lineInstruction; + Instruction functionInstruction; + std::vector parameterInstructions; + std::vector blocks; + bool implicitThis; // true if this is a member function expecting to be passed a 'this' as the first argument + bool reducedPrecisionReturn; + std::set reducedPrecisionParams; // list of parameter indexes that need a relaxed precision arg +}; + +// +// SPIR-V IR Module. +// + +class Module { +public: + Module() {} + virtual ~Module() + { + // TODO delete things + } + + void addFunction(Function *fun) { functions.push_back(fun); } + + void mapInstruction(Instruction *instruction) + { + spv::Id resultId = instruction->getResultId(); + // map the instruction's result id + if (resultId >= idToInstruction.size()) + idToInstruction.resize(resultId + 16); + idToInstruction[resultId] = instruction; + } + + Instruction* getInstruction(Id id) const { return idToInstruction[id]; } + const std::vector& getFunctions() const { return functions; } + spv::Id getTypeId(Id resultId) const { + return idToInstruction[resultId] == nullptr ? NoType : idToInstruction[resultId]->getTypeId(); + } + StorageClass getStorageClass(Id typeId) const + { + assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer); + return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0); + } + + void dump(std::vector& out) const + { + for (int f = 0; f < (int)functions.size(); ++f) + functions[f]->dump(out); + } + +protected: + Module(const Module&); + std::vector functions; + + // map from result id to instruction having that result id + std::vector idToInstruction; + + // map from a result id to its type id +}; + +// +// Implementation (it's here due to circular type definitions). +// + +// Add both +// - the OpFunction instruction +// - all the OpFunctionParameter instructions +__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent) + : parent(parent), lineInstruction(nullptr), + functionInstruction(id, resultType, OpFunction), implicitThis(false), + reducedPrecisionReturn(false) +{ + // OpFunction + functionInstruction.addImmediateOperand(FunctionControlMaskNone); + functionInstruction.addIdOperand(functionType); + parent.mapInstruction(&functionInstruction); + parent.addFunction(this); + + // OpFunctionParameter + Instruction* typeInst = parent.getInstruction(functionType); + int numParams = typeInst->getNumOperands() - 1; + for (int p = 0; p < numParams; ++p) { + Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter); + parent.mapInstruction(param); + parameterInstructions.push_back(param); + } +} + +__inline void Function::addLocalVariable(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + blocks[0]->addLocalVariable(std::move(inst)); + parent.mapInstruction(raw_instruction); +} + +__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false) +{ + instructions.push_back(std::unique_ptr(new Instruction(id, NoType, OpLabel))); + instructions.back()->setBlock(this); + parent.getParent().mapInstruction(instructions.back().get()); +} + +__inline void Block::addInstruction(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + instructions.push_back(std::move(inst)); + raw_instruction->setBlock(this); + if (raw_instruction->getResultId()) + parent.getParent().mapInstruction(raw_instruction); +} + +} // end spv namespace + +#endif // spvIR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/build_info.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/build_info.h new file mode 100644 index 0000000..e711b20 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/glslang/build_info.h @@ -0,0 +1,62 @@ +// Copyright (C) 2020 The Khronos Group Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of The Khronos Group Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_BUILD_INFO +#define GLSLANG_BUILD_INFO + +#define GLSLANG_VERSION_MAJOR 11 +#define GLSLANG_VERSION_MINOR 12 +#define GLSLANG_VERSION_PATCH 0 +#define GLSLANG_VERSION_FLAVOR "" + +#define GLSLANG_VERSION_GREATER_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) > (patch))))) + +#define GLSLANG_VERSION_GREATER_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH >= (patch)))))) + +#define GLSLANG_VERSION_LESS_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) < (patch))))) + +#define GLSLANG_VERSION_LESS_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH <= (patch)))))) + +#endif // GLSLANG_BUILD_INFO diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/allocator.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/allocator.h new file mode 100644 index 0000000..3a5ebca --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/allocator.h @@ -0,0 +1,448 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_ALLOCATOR_H +#define NCNN_ALLOCATOR_H + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +#include +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// the alignment of all the allocated buffers +#if NCNN_AVX512 +#define NCNN_MALLOC_ALIGN 64 +#elif NCNN_AVX +#define NCNN_MALLOC_ALIGN 32 +#else +#define NCNN_MALLOC_ALIGN 16 +#endif + +// we have some optimized kernels that may overread buffer a bit in loop +// it is common to interleave next-loop data load with arithmetic instructions +// allocating more bytes keeps us safe from SEGV_ACCERR failure +#define NCNN_MALLOC_OVERREAD 64 + +// Aligns a pointer to the specified number of bytes +// ptr Aligned pointer +// n Alignment size that must be a power of two +template +static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) +{ + return (_Tp*)(((size_t)ptr + n - 1) & -n); +} + +// Aligns a buffer size to the specified number of bytes +// The function returns the minimum number that is greater or equal to sz and is divisible by n +// sz Buffer size to align +// n Alignment size that must be a power of two +static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n) +{ + return (sz + n - 1) & -n; +} + +static NCNN_FORCEINLINE void* fastMalloc(size_t size) +{ +#if _MSC_VER + return _aligned_malloc(size, NCNN_MALLOC_ALIGN); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + void* ptr = 0; + if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) + ptr = 0; + return ptr; +#elif __ANDROID__ && __ANDROID_API__ < 17 + return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); +#else + unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); + if (!udata) + return 0; + unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN); + adata[-1] = udata; + return adata; +#endif +} + +static NCNN_FORCEINLINE void fastFree(void* ptr) +{ + if (ptr) + { +#if _MSC_VER + _aligned_free(ptr); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + free(ptr); +#elif __ANDROID__ && __ANDROID_API__ < 17 + free(ptr); +#else + unsigned char* udata = ((unsigned char**)ptr)[-1]; + free(udata); +#endif + } +} + +#if NCNN_THREADS +// exchange-add operation for atomic operations on reference counters +#if defined __riscv && !defined __riscv_atomic +// riscv target without A extension +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) +// atomic increment on the linux version of the Intel(tm) compiler +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) +#elif defined __GNUC__ +#if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) +#ifdef __ATOMIC_ACQ_REL +#define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) +#endif +#else +#if defined __ATOMIC_ACQ_REL && !defined __clang__ +// version for gcc >= 4.7 +#define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) +#endif +#endif +#elif defined _MSC_VER && !defined RC_INVOKED +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) +#else +// thread-unsafe branch +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif +#else // NCNN_THREADS +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif // NCNN_THREADS + +class NCNN_EXPORT Allocator +{ +public: + virtual ~Allocator(); + virtual void* fastMalloc(size_t size) = 0; + virtual void fastFree(void* ptr) = 0; +}; + +class PoolAllocatorPrivate; +class NCNN_EXPORT PoolAllocator : public Allocator +{ +public: + PoolAllocator(); + ~PoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + PoolAllocator(const PoolAllocator&); + PoolAllocator& operator=(const PoolAllocator&); + +private: + PoolAllocatorPrivate* const d; +}; + +class UnlockedPoolAllocatorPrivate; +class NCNN_EXPORT UnlockedPoolAllocator : public Allocator +{ +public: + UnlockedPoolAllocator(); + ~UnlockedPoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + UnlockedPoolAllocator(const UnlockedPoolAllocator&); + UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&); + +private: + UnlockedPoolAllocatorPrivate* const d; +}; + +#if NCNN_VULKAN + +class VulkanDevice; + +class NCNN_EXPORT VkBufferMemory +{ +public: + VkBuffer buffer; + + // the base offset assigned by allocator + size_t offset; + size_t capacity; + + VkDeviceMemory memory; + void* mapped_ptr; + + // buffer state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkPipelineStageFlags stage_flags; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkImageMemory +{ +public: + VkImage image; + VkImageView imageview; + + // underlying info assigned by allocator + int width; + int height; + int depth; + VkFormat format; + + VkDeviceMemory memory; + void* mapped_ptr; + + // the base offset assigned by allocator + size_t bind_offset; + size_t bind_capacity; + + // image state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkImageLayout image_layout; + mutable VkPipelineStageFlags stage_flags; + + // in-execution state, modified by command functions internally + mutable int command_refcount; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkAllocator +{ +public: + explicit VkAllocator(const VulkanDevice* _vkdev); + virtual ~VkAllocator(); + + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size) = 0; + virtual void fastFree(VkBufferMemory* ptr) = 0; + virtual int flush(VkBufferMemory* ptr); + virtual int invalidate(VkBufferMemory* ptr); + + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; + virtual void fastFree(VkImageMemory* ptr) = 0; + +public: + const VulkanDevice* vkdev; + uint32_t buffer_memory_type_index; + uint32_t image_memory_type_index; + uint32_t reserved_type_index; + bool mappable; + bool coherent; + +protected: + VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); + VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); + VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); + + VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); + VkImageView create_imageview(VkImage image, VkFormat format); +}; + +class VkBlobAllocatorPrivate; +class NCNN_EXPORT VkBlobAllocator : public VkAllocator +{ +public: + explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M + virtual ~VkBlobAllocator(); + +public: + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkBlobAllocator(const VkBlobAllocator&); + VkBlobAllocator& operator=(const VkBlobAllocator&); + +private: + VkBlobAllocatorPrivate* const d; +}; + +class VkWeightAllocatorPrivate; +class NCNN_EXPORT VkWeightAllocator : public VkAllocator +{ +public: + explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M + virtual ~VkWeightAllocator(); + +public: + // release all blocks immediately + virtual void clear(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightAllocator(const VkWeightAllocator&); + VkWeightAllocator& operator=(const VkWeightAllocator&); + +private: + VkWeightAllocatorPrivate* const d; +}; + +class VkStagingAllocatorPrivate; +class NCNN_EXPORT VkStagingAllocator : public VkAllocator +{ +public: + explicit VkStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkStagingAllocator(); + +public: + // ratio range 0 ~ 1 + // default cr = 0.75 + void set_size_compare_ratio(float scr); + + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkStagingAllocator(const VkStagingAllocator&); + VkStagingAllocator& operator=(const VkStagingAllocator&); + +private: + VkStagingAllocatorPrivate* const d; +}; + +class VkWeightStagingAllocatorPrivate; +class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator +{ +public: + explicit VkWeightStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkWeightStagingAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightStagingAllocator(const VkWeightStagingAllocator&); + VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&); + +private: + VkWeightStagingAllocatorPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator +{ +public: + VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); + virtual ~VkAndroidHardwareBufferImageAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&); + VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&); + +public: + int init(); + + int width() const; + int height() const; + uint64_t external_format() const; + +public: + AHardwareBuffer* hb; + AHardwareBuffer_Desc bufferDesc; + VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; + VkAndroidHardwareBufferPropertiesANDROID bufferProperties; + VkSamplerYcbcrConversionKHR samplerYcbcrConversion; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_ALLOCATOR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/benchmark.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/benchmark.h new file mode 100644 index 0000000..3d5c0cd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/benchmark.h @@ -0,0 +1,36 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BENCHMARK_H +#define NCNN_BENCHMARK_H + +#include "layer.h" +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +// get now timestamp in ms +NCNN_EXPORT double get_current_time(); + +#if NCNN_BENCHMARK + +NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); +NCNN_EXPORT void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); + +#endif // NCNN_BENCHMARK + +} // namespace ncnn + +#endif // NCNN_BENCHMARK_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/blob.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/blob.h new file mode 100644 index 0000000..c9f144f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/blob.h @@ -0,0 +1,44 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BLOB_H +#define NCNN_BLOB_H + +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT Blob +{ +public: + // empty + Blob(); + +public: +#if NCNN_STRING + // blob name + std::string name; +#endif // NCNN_STRING + // layer index which produce this blob as output + int producer; + // layer index which need this blob as input + int consumer; + // shape hint + Mat shape; +}; + +} // namespace ncnn + +#endif // NCNN_BLOB_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/c_api.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/c_api.h new file mode 100644 index 0000000..ebf5db5 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/c_api.h @@ -0,0 +1,336 @@ +/* Tencent is pleased to support the open source community by making ncnn available. + * + * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +#ifndef NCNN_C_API_H +#define NCNN_C_API_H + +#include "platform.h" + +#if NCNN_C_API + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT const char* ncnn_version(); + +/* allocator api */ +typedef struct __ncnn_allocator_t* ncnn_allocator_t; +struct NCNN_EXPORT __ncnn_allocator_t +{ + void* pthis; + + void* (*fast_malloc)(ncnn_allocator_t allocator, size_t size); + void (*fast_free)(ncnn_allocator_t allocator, void* ptr); +}; + +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_pool_allocator(); +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator(); +NCNN_EXPORT void ncnn_allocator_destroy(ncnn_allocator_t allocator); + +/* option api */ +typedef struct __ncnn_option_t* ncnn_option_t; + +NCNN_EXPORT ncnn_option_t ncnn_option_create(); +NCNN_EXPORT void ncnn_option_destroy(ncnn_option_t opt); + +NCNN_EXPORT int ncnn_option_get_num_threads(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads); + +NCNN_EXPORT int ncnn_option_get_use_local_pool_allocator(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_local_pool_allocator(ncnn_option_t opt, int use_local_pool_allocator); + +NCNN_EXPORT void ncnn_option_set_blob_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_option_set_workspace_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute); + +/* mat api */ +typedef struct __ncnn_mat_t* ncnn_mat_t; + +NCNN_EXPORT ncnn_mat_t ncnn_mat_create(); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_destroy(ncnn_mat_t mat); + +NCNN_EXPORT void ncnn_mat_fill_float(ncnn_mat_t mat, float v); + +NCNN_EXPORT ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d, int c, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_mat_get_dims(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_w(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_h(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_d(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_c(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_elempack(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_cstep(const ncnn_mat_t mat); +NCNN_EXPORT void* ncnn_mat_get_data(const ncnn_mat_t mat); + +NCNN_EXPORT void* ncnn_mat_get_channel_data(const ncnn_mat_t mat, int c); + +#if NCNN_PIXEL + +/* mat pixel api */ +#define NCNN_MAT_PIXEL_RGB 1 +#define NCNN_MAT_PIXEL_BGR 2 +#define NCNN_MAT_PIXEL_GRAY 3 +#define NCNN_MAT_PIXEL_RGBA 4 +#define NCNN_MAT_PIXEL_BGRA 5 +#define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16)) +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int type, int stride); +NCNN_EXPORT void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride); + +#endif /* NCNN_PIXEL */ + +NCNN_EXPORT void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals); + +NCNN_EXPORT void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int elempack, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_option_t opt); + +/* blob api */ +typedef struct __ncnn_blob_t* ncnn_blob_t; + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_blob_get_name(const ncnn_blob_t blob); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_blob_get_producer(const ncnn_blob_t blob); +NCNN_EXPORT int ncnn_blob_get_consumer(const ncnn_blob_t blob); + +NCNN_EXPORT void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int* h, int* c); + +/* paramdict api */ +typedef struct __ncnn_paramdict_t* ncnn_paramdict_t; + +NCNN_EXPORT ncnn_paramdict_t ncnn_paramdict_create(); +NCNN_EXPORT void ncnn_paramdict_destroy(ncnn_paramdict_t pd); + +NCNN_EXPORT int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id); + +NCNN_EXPORT int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def); +NCNN_EXPORT float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float def); +NCNN_EXPORT ncnn_mat_t ncnn_paramdict_get_array(const ncnn_paramdict_t pd, int id, const ncnn_mat_t def); + +NCNN_EXPORT void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i); +NCNN_EXPORT void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f); +NCNN_EXPORT void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, const ncnn_mat_t v); + +/* datareader api */ +typedef struct __ncnn_datareader_t* ncnn_datareader_t; +struct NCNN_EXPORT __ncnn_datareader_t +{ + void* pthis; + +#if NCNN_STRING + int (*scan)(ncnn_datareader_t dr, const char* format, void* p); +#endif /* NCNN_STRING */ + size_t (*read)(ncnn_datareader_t dr, void* buf, size_t size); +}; + +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create(); +#if NCNN_STDIO +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp); +#endif /* NCNN_STDIO */ +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char** mem); +NCNN_EXPORT void ncnn_datareader_destroy(ncnn_datareader_t dr); + +/* modelbin api */ +typedef struct __ncnn_modelbin_t* ncnn_modelbin_t; +struct NCNN_EXPORT __ncnn_modelbin_t +{ + void* pthis; + + ncnn_mat_t (*load_1d)(const ncnn_modelbin_t mb, int w, int type); + ncnn_mat_t (*load_2d)(const ncnn_modelbin_t mb, int w, int h, int type); + ncnn_mat_t (*load_3d)(const ncnn_modelbin_t mb, int w, int h, int c, int type); +}; + +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_datareader_t dr); +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* weights, int n); +NCNN_EXPORT void ncnn_modelbin_destroy(ncnn_modelbin_t mb); + +/* layer api */ +typedef struct __ncnn_layer_t* ncnn_layer_t; +struct NCNN_EXPORT __ncnn_layer_t +{ + void* pthis; + + int (*load_param)(ncnn_layer_t layer, const ncnn_paramdict_t pd); + int (*load_model)(ncnn_layer_t layer, const ncnn_modelbin_t mb); + + int (*create_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + int (*destroy_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + + int (*forward_1)(const ncnn_layer_t layer, const ncnn_mat_t bottom_blob, ncnn_mat_t* top_blob, const ncnn_option_t opt); + int (*forward_n)(const ncnn_layer_t layer, const ncnn_mat_t* bottom_blobs, int n, ncnn_mat_t* top_blobs, int n2, const ncnn_option_t opt); + + int (*forward_inplace_1)(const ncnn_layer_t layer, ncnn_mat_t bottom_top_blob, const ncnn_option_t opt); + int (*forward_inplace_n)(const ncnn_layer_t layer, ncnn_mat_t* bottom_top_blobs, int n, const ncnn_option_t opt); +}; + +NCNN_EXPORT ncnn_layer_t ncnn_layer_create(); +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex); +#if NCNN_STRING +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_type(const char* type); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_layer_destroy(ncnn_layer_t layer); + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_name(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_typeindex(const ncnn_layer_t layer); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_type(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_inplace(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_packing(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_image_storage(const ncnn_layer_t layer); + +NCNN_EXPORT void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_image_storage(ncnn_layer_t layer, int enable); + +NCNN_EXPORT int ncnn_layer_get_bottom_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i); +NCNN_EXPORT int ncnn_layer_get_top_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_top(const ncnn_layer_t layer, int i); + +NCNN_EXPORT void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); +NCNN_EXPORT void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); + +/* layer factory function */ +typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata); +typedef void (*ncnn_layer_destroyer_t)(ncnn_layer_t layer, void* userdata); + +typedef struct __ncnn_net_custom_layer_factory_t* ncnn_net_custom_layer_factory_t; +struct __ncnn_net_custom_layer_factory_t +{ + ncnn_layer_creator_t creator; + ncnn_layer_destroyer_t destroyer; + void* userdata; + ncnn_net_custom_layer_factory_t next; +}; + +/* net api */ +typedef struct __ncnn_net_t* ncnn_net_t; +struct __ncnn_net_t +{ + void* pthis; + + ncnn_net_custom_layer_factory_t custom_layer_factory; +}; + +NCNN_EXPORT ncnn_net_t ncnn_net_create(); +NCNN_EXPORT void ncnn_net_destroy(ncnn_net_t net); + +NCNN_EXPORT ncnn_option_t ncnn_net_get_option(ncnn_net_t net); +NCNN_EXPORT void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* type, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typeindex, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param(ncnn_net_t net, const char* path); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin(ncnn_net_t net, const char* path); +NCNN_EXPORT int ncnn_net_load_model(ncnn_net_t net, const char* path); +#endif /* NCNN_STDIO */ + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem); +#endif /* NCNN_STRING */ +#endif /* NCNN_STDIO */ +NCNN_EXPORT int ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned char* mem); +NCNN_EXPORT int ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +NCNN_EXPORT int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader_t dr); + +NCNN_EXPORT void ncnn_net_clear(ncnn_net_t net); + +NCNN_EXPORT int ncnn_net_get_input_count(const ncnn_net_t net); +NCNN_EXPORT int ncnn_net_get_output_count(const ncnn_net_t net); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_net_get_input_name(const ncnn_net_t net, int i); +NCNN_EXPORT const char* ncnn_net_get_output_name(const ncnn_net_t net, int i); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_get_input_index(const ncnn_net_t net, int i); +NCNN_EXPORT int ncnn_net_get_output_index(const ncnn_net_t net, int i); + +/* extractor api */ +typedef struct __ncnn_extractor_t* ncnn_extractor_t; + +NCNN_EXPORT ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net); +NCNN_EXPORT void ncnn_extractor_destroy(ncnn_extractor_t ex); + +NCNN_EXPORT void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_mat_t* mat); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* NCNN_C_API */ + +#endif /* NCNN_C_API_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/command.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/command.h new file mode 100644 index 0000000..337d085 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/command.h @@ -0,0 +1,136 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_COMMAND_H +#define NCNN_COMMAND_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +namespace ncnn { + +class Pipeline; +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class ImportAndroidHardwareBufferPipeline; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API +class VkComputePrivate; +class NCNN_EXPORT VkCompute +{ +public: + explicit VkCompute(const VulkanDevice* vkdev); + virtual ~VkCompute(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_download(const VkMat& src, Mat& dst, const Option& opt); + + void record_download(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkImageMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkImageMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const Mat& dispatcher); + +#if NCNN_BENCHMARK + void record_write_timestamp(uint32_t query); +#endif // NCNN_BENCHMARK + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst); + + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + int submit_and_wait(); + + int reset(); + +#if NCNN_BENCHMARK + int create_query_pool(uint32_t query_count); + + int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); +#endif // NCNN_BENCHMARK + +protected: + const VulkanDevice* vkdev; + + void barrier_readwrite(const VkMat& binding); + void barrier_readwrite(const VkImageMat& binding); + void barrier_readonly(const VkImageMat& binding); + +private: + VkComputePrivate* const d; +}; + +class VkTransferPrivate; +class NCNN_EXPORT VkTransfer +{ +public: + explicit VkTransfer(const VulkanDevice* vkdev); + virtual ~VkTransfer(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + int submit_and_wait(); + +protected: + const VulkanDevice* vkdev; + +private: + VkTransferPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_COMMAND_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/cpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/cpu.h new file mode 100644 index 0000000..d03e7e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/cpu.h @@ -0,0 +1,173 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_CPU_H +#define NCNN_CPU_H + +#include + +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#endif +#if defined __ANDROID__ || defined __linux__ +#include // cpu_set_t +#endif + +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT CpuSet +{ +public: + CpuSet(); + void enable(int cpu); + void disable(int cpu); + void disable_all(); + bool is_enabled(int cpu) const; + int num_enabled() const; + +public: +#if (defined _WIN32 && !(defined __MINGW32__)) + ULONG_PTR mask; +#endif +#if defined __ANDROID__ || defined __linux__ + cpu_set_t cpu_set; +#endif +#if __APPLE__ + unsigned int policy; +#endif +}; + +// test optional cpu features +// edsp = armv7 edsp +NCNN_EXPORT int cpu_support_arm_edsp(); +// neon = armv7 neon or aarch64 asimd +NCNN_EXPORT int cpu_support_arm_neon(); +// vfpv4 = armv7 fp16 + fma +NCNN_EXPORT int cpu_support_arm_vfpv4(); +// asimdhp = aarch64 asimd half precision +NCNN_EXPORT int cpu_support_arm_asimdhp(); +// asimddp = aarch64 asimd dot product +NCNN_EXPORT int cpu_support_arm_asimddp(); +// asimdfhm = aarch64 asimd fhm +NCNN_EXPORT int cpu_support_arm_asimdfhm(); +// bf16 = aarch64 bf16 +NCNN_EXPORT int cpu_support_arm_bf16(); +// i8mm = aarch64 i8mm +NCNN_EXPORT int cpu_support_arm_i8mm(); +// sve = aarch64 sve +NCNN_EXPORT int cpu_support_arm_sve(); +// sve2 = aarch64 sve2 +NCNN_EXPORT int cpu_support_arm_sve2(); +// svebf16 = aarch64 svebf16 +NCNN_EXPORT int cpu_support_arm_svebf16(); +// svei8mm = aarch64 svei8mm +NCNN_EXPORT int cpu_support_arm_svei8mm(); +// svef32mm = aarch64 svef32mm +NCNN_EXPORT int cpu_support_arm_svef32mm(); + +// avx = x86 avx +NCNN_EXPORT int cpu_support_x86_avx(); +// fma = x86 fma +NCNN_EXPORT int cpu_support_x86_fma(); +// xop = x86 xop +NCNN_EXPORT int cpu_support_x86_xop(); +// f16c = x86 f16c +NCNN_EXPORT int cpu_support_x86_f16c(); +// avx2 = x86 avx2 + fma + f16c +NCNN_EXPORT int cpu_support_x86_avx2(); +// avx_vnni = x86 avx vnni +NCNN_EXPORT int cpu_support_x86_avx_vnni(); +// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl +NCNN_EXPORT int cpu_support_x86_avx512(); +// avx512_vnni = x86 avx512 vnni +NCNN_EXPORT int cpu_support_x86_avx512_vnni(); +// avx512_bf16 = x86 avx512 bf16 +NCNN_EXPORT int cpu_support_x86_avx512_bf16(); +// avx512_fp16 = x86 avx512 fp16 +NCNN_EXPORT int cpu_support_x86_avx512_fp16(); + +// lsx = loongarch lsx +NCNN_EXPORT int cpu_support_loongarch_lsx(); +// lasx = loongarch lasx +NCNN_EXPORT int cpu_support_loongarch_lasx(); + +// msa = mips mas +NCNN_EXPORT int cpu_support_mips_msa(); +// mmi = loongson mmi +NCNN_EXPORT int cpu_support_loongson_mmi(); + +// v = riscv vector +NCNN_EXPORT int cpu_support_riscv_v(); +// zfh = riscv half-precision float +NCNN_EXPORT int cpu_support_riscv_zfh(); +// vlenb = riscv vector length in bytes +NCNN_EXPORT int cpu_riscv_vlenb(); + +// cpu info +NCNN_EXPORT int get_cpu_count(); +NCNN_EXPORT int get_little_cpu_count(); +NCNN_EXPORT int get_big_cpu_count(); + +NCNN_EXPORT int get_physical_cpu_count(); +NCNN_EXPORT int get_physical_little_cpu_count(); +NCNN_EXPORT int get_physical_big_cpu_count(); + +// cpu l2 varies from 64k to 1M, but l3 can be zero +NCNN_EXPORT int get_cpu_level2_cache_size(); +NCNN_EXPORT int get_cpu_level3_cache_size(); + +// bind all threads on little clusters if powersave enabled +// affects HMP arch cpu like ARM big.LITTLE +// only implemented on android at the moment +// switching powersave is expensive and not thread-safe +// 0 = all cores enabled(default) +// 1 = only little clusters enabled +// 2 = only big clusters enabled +// return 0 if success for setter function +NCNN_EXPORT int get_cpu_powersave(); +NCNN_EXPORT int set_cpu_powersave(int powersave); + +// convenient wrapper +NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); + +// set explicit thread affinity +NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); + +// misc function wrapper for openmp routines +NCNN_EXPORT int get_omp_num_threads(); +NCNN_EXPORT void set_omp_num_threads(int num_threads); + +NCNN_EXPORT int get_omp_dynamic(); +NCNN_EXPORT void set_omp_dynamic(int dynamic); + +NCNN_EXPORT int get_omp_thread_num(); + +NCNN_EXPORT int get_kmp_blocktime(); +NCNN_EXPORT void set_kmp_blocktime(int time_ms); + +// need to flush denormals on Intel Chipset. +// Other architectures such as ARM can be added as needed. +// 0 = DAZ OFF, FTZ OFF +// 1 = DAZ ON , FTZ OFF +// 2 = DAZ OFF, FTZ ON +// 3 = DAZ ON, FTZ ON +NCNN_EXPORT int get_flush_denormals(); +NCNN_EXPORT int set_flush_denormals(int flush_denormals); + +} // namespace ncnn + +#endif // NCNN_CPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/datareader.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/datareader.h new file mode 100644 index 0000000..ed2aba3 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/datareader.h @@ -0,0 +1,122 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_DATAREADER_H +#define NCNN_DATAREADER_H + +#include "platform.h" +#if NCNN_STDIO +#include +#endif + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// data read wrapper +class NCNN_EXPORT DataReader +{ +public: + DataReader(); + virtual ~DataReader(); + +#if NCNN_STRING + // parse plain param text + // return 1 if scan success + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + + // read binary param and model data + // return bytes read + virtual size_t read(void* buf, size_t size) const; + + // get model data reference + // return bytes referenced + virtual size_t reference(size_t size, const void** buf) const; +}; + +#if NCNN_STDIO +class DataReaderFromStdioPrivate; +class NCNN_EXPORT DataReaderFromStdio : public DataReader +{ +public: + explicit DataReaderFromStdio(FILE* fp); + virtual ~DataReaderFromStdio(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromStdio(const DataReaderFromStdio&); + DataReaderFromStdio& operator=(const DataReaderFromStdio&); + +private: + DataReaderFromStdioPrivate* const d; +}; +#endif // NCNN_STDIO + +class DataReaderFromMemoryPrivate; +class NCNN_EXPORT DataReaderFromMemory : public DataReader +{ +public: + explicit DataReaderFromMemory(const unsigned char*& mem); + virtual ~DataReaderFromMemory(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + virtual size_t reference(size_t size, const void** buf) const; + +private: + DataReaderFromMemory(const DataReaderFromMemory&); + DataReaderFromMemory& operator=(const DataReaderFromMemory&); + +private: + DataReaderFromMemoryPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +class DataReaderFromAndroidAssetPrivate; +class NCNN_EXPORT DataReaderFromAndroidAsset : public DataReader +{ +public: + explicit DataReaderFromAndroidAsset(AAsset* asset); + virtual ~DataReaderFromAndroidAsset(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromAndroidAsset(const DataReaderFromAndroidAsset&); + DataReaderFromAndroidAsset& operator=(const DataReaderFromAndroidAsset&); + +private: + DataReaderFromAndroidAssetPrivate* const d; +}; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +} // namespace ncnn + +#endif // NCNN_DATAREADER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/gpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/gpu.h new file mode 100644 index 0000000..1cda182 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/gpu.h @@ -0,0 +1,360 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_GPU_H +#define NCNN_GPU_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +#include "vulkan_header_fix.h" + +namespace ncnn { + +// instance +NCNN_EXPORT int create_gpu_instance(); +NCNN_EXPORT void destroy_gpu_instance(); + +// instance extension capability +extern int support_VK_KHR_external_memory_capabilities; +extern int support_VK_KHR_get_physical_device_properties2; +extern int support_VK_KHR_get_surface_capabilities2; +extern int support_VK_KHR_surface; +extern int support_VK_EXT_debug_utils; +#if __ANDROID_API__ >= 26 +extern int support_VK_KHR_android_surface; +#endif // __ANDROID_API__ >= 26 + +// VK_KHR_external_memory_capabilities +extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR; + +// VK_KHR_get_physical_device_properties2 +extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; +extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; +extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; +extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; +extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR; + +// VK_KHR_get_surface_capabilities2 +extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; + +// VK_KHR_surface +extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; +extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; +extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; +extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; + +#if __ANDROID_API__ >= 26 +// VK_KHR_android_surface +extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; +#endif // __ANDROID_API__ >= 26 + +// VK_NV_cooperative_matrix +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; + +// get info +NCNN_EXPORT int get_gpu_count(); +NCNN_EXPORT int get_default_gpu_index(); + +class GpuInfoPrivate; +class NCNN_EXPORT GpuInfo +{ +public: + explicit GpuInfo(); + virtual ~GpuInfo(); + + // vulkan physical device + VkPhysicalDevice physical_device() const; + + // memory properties + const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const; + + // info + uint32_t api_version() const; + uint32_t driver_version() const; + uint32_t vendor_id() const; + uint32_t device_id() const; + const char* device_name() const; + uint8_t* pipeline_cache_uuid() const; + + // 0 = discrete gpu + // 1 = integrated gpu + // 2 = virtual gpu + // 3 = cpu + int type() const; + + // hardware limit + uint32_t max_shared_memory_size() const; + uint32_t max_workgroup_count_x() const; + uint32_t max_workgroup_count_y() const; + uint32_t max_workgroup_count_z() const; + uint32_t max_workgroup_invocations() const; + uint32_t max_workgroup_size_x() const; + uint32_t max_workgroup_size_y() const; + uint32_t max_workgroup_size_z() const; + size_t memory_map_alignment() const; + size_t buffer_offset_alignment() const; + size_t non_coherent_atom_size() const; + size_t buffer_image_granularity() const; + uint32_t max_image_dimension_1d() const; + uint32_t max_image_dimension_2d() const; + uint32_t max_image_dimension_3d() const; + float timestamp_period() const; + + // runtime + uint32_t compute_queue_family_index() const; + uint32_t graphics_queue_family_index() const; + uint32_t transfer_queue_family_index() const; + + uint32_t compute_queue_count() const; + uint32_t graphics_queue_count() const; + uint32_t transfer_queue_count() const; + + // property + bool unified_compute_transfer_queue() const; + + // subgroup + uint32_t subgroup_size() const; + bool support_subgroup_basic() const; + bool support_subgroup_vote() const; + bool support_subgroup_ballot() const; + bool support_subgroup_shuffle() const; + + // bug is not feature + bool bug_storage_buffer_no_l1() const; + bool bug_corrupted_online_pipeline_cache() const; + bool bug_buffer_image_load_zero() const; + + // but sometimes bug is a feature + bool bug_implicit_fp16_arithmetic() const; + + // fp16 and int8 feature + bool support_fp16_packed() const; + bool support_fp16_storage() const; + bool support_fp16_arithmetic() const; + bool support_int8_packed() const; + bool support_int8_storage() const; + bool support_int8_arithmetic() const; + + // ycbcr conversion feature + bool support_ycbcr_conversion() const; + + // cooperative matrix feature + bool support_cooperative_matrix() const; + bool support_cooperative_matrix_16_8_8() const; + + // extension capability + int support_VK_KHR_8bit_storage() const; + int support_VK_KHR_16bit_storage() const; + int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_create_renderpass2() const; + int support_VK_KHR_dedicated_allocation() const; + int support_VK_KHR_descriptor_update_template() const; + int support_VK_KHR_external_memory() const; + int support_VK_KHR_get_memory_requirements2() const; + int support_VK_KHR_maintenance1() const; + int support_VK_KHR_maintenance2() const; + int support_VK_KHR_maintenance3() const; + int support_VK_KHR_multiview() const; + int support_VK_KHR_portability_subset() const; + int support_VK_KHR_push_descriptor() const; + int support_VK_KHR_sampler_ycbcr_conversion() const; + int support_VK_KHR_shader_float16_int8() const; + int support_VK_KHR_shader_float_controls() const; + int support_VK_KHR_storage_buffer_storage_class() const; + int support_VK_KHR_swapchain() const; + int support_VK_EXT_descriptor_indexing() const; + int support_VK_EXT_memory_budget() const; + int support_VK_EXT_queue_family_foreign() const; +#if __ANDROID_API__ >= 26 + int support_VK_ANDROID_external_memory_android_hardware_buffer() const; +#endif // __ANDROID_API__ >= 26 + int support_VK_NV_cooperative_matrix() const; + +private: + GpuInfo(const GpuInfo&); + GpuInfo& operator=(const GpuInfo&); + +private: + friend int create_gpu_instance(); + GpuInfoPrivate* const d; +}; + +NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); + +class VkAllocator; +class VkCompute; +class Option; +class PipelineCache; +class VulkanDevicePrivate; +class NCNN_EXPORT VulkanDevice +{ +public: + VulkanDevice(int device_index = get_default_gpu_index()); + ~VulkanDevice(); + + const GpuInfo& info; + + VkDevice vkdevice() const; + + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; + + // with fixed workgroup size + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const; + + // helper for creating pipeline + int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; + int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; + int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, VkPipeline* pipeline) const; + int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + + uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; + bool is_mappable(uint32_t memory_type_index) const; + bool is_coherent(uint32_t memory_type_index) const; + + VkQueue acquire_queue(uint32_t queue_family_index) const; + void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; + + // allocator on this device + VkAllocator* acquire_blob_allocator() const; + void reclaim_blob_allocator(VkAllocator* allocator) const; + + VkAllocator* acquire_staging_allocator() const; + void reclaim_staging_allocator(VkAllocator* allocator) const; + + // immutable sampler for texelfetch + const VkSampler* immutable_texelfetch_sampler() const; + + // dummy buffer image + VkMat get_dummy_buffer() const; + VkImageMat get_dummy_image() const; + VkImageMat get_dummy_image_readonly() const; + + // pipeline cache on this device + const PipelineCache* get_pipeline_cache() const; + + // test image allocation + bool shape_support_image_storage(const Mat& shape) const; + + // current gpu heap memory budget in MB + uint32_t get_heap_budget() const; + + // utility operator + void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + + // VK_KHR_bind_memory2 + PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; + PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + + // VK_KHR_create_renderpass2 + PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; + PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; + PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; + PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; + + // VK_KHR_descriptor_update_template + PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; + PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; + PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; + + // VK_KHR_get_memory_requirements2 + PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; + PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; + PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; + + // VK_KHR_maintenance1 + PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; + + // VK_KHR_maintenance3 + PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; + + // VK_KHR_push_descriptor + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; + PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; + + // VK_KHR_sampler_ycbcr_conversion + PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; + PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; + + // VK_KHR_swapchain + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkQueuePresentKHR vkQueuePresentKHR; + +#if __ANDROID_API__ >= 26 + // VK_ANDROID_external_memory_android_hardware_buffer + PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; + PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; +#endif // __ANDROID_API__ >= 26 + +protected: + // device extension + int init_device_extension(); + +private: + VulkanDevice(const VulkanDevice&); + VulkanDevice& operator=(const VulkanDevice&); + +private: + VulkanDevicePrivate* const d; +}; + +NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); + +// online spirv compilation +NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector& spirv); + +// info from spirv +class NCNN_EXPORT ShaderInfo +{ +public: + int specialization_count; + int binding_count; + int push_constant_count; + + // 0 = null + // 1 = storage buffer + // 2 = storage image + // 3 = combined image sampler + int binding_types[16]; // 16 is large enough I think ... + + int reserved_0; + int reserved_1; + int reserved_2; + int reserved_3; +}; + +NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info); + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_GPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer.h new file mode 100644 index 0000000..d02f65b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer.h @@ -0,0 +1,214 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_H +#define NCNN_LAYER_H + +#include "mat.h" +#include "modelbin.h" +#include "option.h" +#include "paramdict.h" +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include "command.h" +#include "pipeline.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +class NCNN_EXPORT Layer +{ +public: + // empty + Layer(); + // virtual destructor + virtual ~Layer(); + + // load layer specific parameter from parsed dict + // return 0 if success + virtual int load_param(const ParamDict& pd); + + // load layer specific weight data from model binary + // return 0 if success + virtual int load_model(const ModelBin& mb); + + // layer implementation specific setup + // return 0 if success + virtual int create_pipeline(const Option& opt); + + // layer implementation specific clean + // return 0 if success + virtual int destroy_pipeline(const Option& opt); + +public: + // one input and one output blob + bool one_blob_only; + + // support inplace inference + bool support_inplace; + + // support vulkan compute + bool support_vulkan; + + // accept input blob with packed storage + bool support_packing; + + // accept bf16 + bool support_bf16_storage; + + // accept fp16 + bool support_fp16_storage; + + // accept int8 + bool support_int8_storage; + + // shader image storage + bool support_image_storage; + + // shader tensor storage + bool support_tensor_storage; + + bool support_reserved_00; + + bool support_reserved_0; + bool support_reserved_1; + bool support_reserved_2; + bool support_reserved_3; + bool support_reserved_4; + bool support_reserved_5; + bool support_reserved_6; + bool support_reserved_7; + bool support_reserved_8; + bool support_reserved_9; + + // feature disabled set + int featmask; + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +#if NCNN_VULKAN +public: + // upload weight blob from host to device + virtual int upload_model(VkTransfer& cmd, const Option& opt); + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + +public: + // assigned immediately after creating this layer + const VulkanDevice* vkdev; +#endif // NCNN_VULKAN + +public: + // custom user data + void* userdata; + // layer type index + int typeindex; +#if NCNN_STRING + // layer type name + std::string type; + // layer name + std::string name; +#endif // NCNN_STRING + // blob index which this layer needs as input + std::vector bottoms; + // blob index which this layer produces as output + std::vector tops; + // shape hint + std::vector bottom_shapes; + std::vector top_shapes; +}; + +// layer factory function +typedef Layer* (*layer_creator_func)(void*); +typedef void (*layer_destroyer_func)(Layer*, void*); + +struct layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; +}; + +struct custom_layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + +#if NCNN_STRING +// get layer type from type name +NCNN_EXPORT int layer_to_index(const char* type); +// create layer from type name +NCNN_EXPORT Layer* create_layer(const char* type); +#endif // NCNN_STRING +// create layer from layer type +NCNN_EXPORT Layer* create_layer(int index); + +#define DEFINE_LAYER_CREATOR(name) \ + ::ncnn::Layer* name##_layer_creator(void* /*userdata*/) \ + { \ + return new name; \ + } + +#define DEFINE_LAYER_DESTROYER(name) \ + void name##_layer_destroyer(::ncnn::Layer* layer, void* /*userdata*/) \ + { \ + delete layer; \ + } + +} // namespace ncnn + +#endif // NCNN_LAYER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_shader_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_shader_type.h new file mode 100644 index 0000000..c143e7d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_shader_type.h @@ -0,0 +1,29 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_SHADER_TYPE_H +#define NCNN_LAYER_SHADER_TYPE_H + +namespace ncnn { + +namespace LayerShaderType { +enum LayerShaderType +{ +#include "layer_shader_type_enum.h" +}; +} // namespace LayerShaderType + +} // namespace ncnn + +#endif // NCNN_LAYER_SHADER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_shader_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_shader_type_enum.h new file mode 100644 index 0000000..bad5605 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_shader_type_enum.h @@ -0,0 +1,369 @@ +// Layer Shader Enum header +// +// This file is auto-generated by cmake, don't edit it. + +absval = 0, +absval_pack4 = 1, +absval_pack8 = 2, +batchnorm = 3, +batchnorm_pack4 = 4, +batchnorm_pack8 = 5, +concat = 6, +concat_pack4 = 7, +concat_pack4to1 = 8, +concat_pack8 = 9, +concat_pack8to1 = 10, +concat_pack8to4 = 11, +convolution = 12, +convolution_1x1s1d1 = 13, +convolution_3x3s1d1_winograd23_transform_input = 14, +convolution_3x3s1d1_winograd23_transform_output = 15, +convolution_3x3s1d1_winograd43_transform_input = 16, +convolution_3x3s1d1_winograd43_transform_output = 17, +convolution_3x3s1d1_winograd_gemm = 18, +convolution_gemm = 19, +convolution_pack1to4 = 20, +convolution_pack1to4_1x1s1d1 = 21, +convolution_pack1to4_3x3s1d1_winograd_gemm = 22, +convolution_pack1to4_gemm = 23, +convolution_pack1to8 = 24, +convolution_pack1to8_1x1s1d1 = 25, +convolution_pack1to8_3x3s1d1_winograd_gemm = 26, +convolution_pack1to8_gemm = 27, +convolution_pack4 = 28, +convolution_pack4_1x1s1d1 = 29, +convolution_pack4_1x1s1d1_cm_16_8_8 = 30, +convolution_pack4_3x3s1d1_winograd23_transform_input = 31, +convolution_pack4_3x3s1d1_winograd23_transform_output = 32, +convolution_pack4_3x3s1d1_winograd43_transform_input = 33, +convolution_pack4_3x3s1d1_winograd43_transform_output = 34, +convolution_pack4_3x3s1d1_winograd_gemm = 35, +convolution_pack4_3x3s1d1_winograd_gemm_cm_16_8_8 = 36, +convolution_pack4_gemm = 37, +convolution_pack4_gemm_cm_16_8_8 = 38, +convolution_pack4to1 = 39, +convolution_pack4to1_1x1s1d1 = 40, +convolution_pack4to1_3x3s1d1_winograd_gemm = 41, +convolution_pack4to1_gemm = 42, +convolution_pack4to8 = 43, +convolution_pack4to8_1x1s1d1 = 44, +convolution_pack4to8_3x3s1d1_winograd_gemm = 45, +convolution_pack4to8_gemm = 46, +convolution_pack8 = 47, +convolution_pack8_1x1s1d1 = 48, +convolution_pack8_3x3s1d1_winograd23_transform_input = 49, +convolution_pack8_3x3s1d1_winograd23_transform_output = 50, +convolution_pack8_3x3s1d1_winograd43_transform_input = 51, +convolution_pack8_3x3s1d1_winograd43_transform_output = 52, +convolution_pack8_3x3s1d1_winograd_gemm = 53, +convolution_pack8_gemm = 54, +convolution_pack8to1 = 55, +convolution_pack8to1_1x1s1d1 = 56, +convolution_pack8to1_3x3s1d1_winograd_gemm = 57, +convolution_pack8to1_gemm = 58, +convolution_pack8to4 = 59, +convolution_pack8to4_1x1s1d1 = 60, +convolution_pack8to4_3x3s1d1_winograd_gemm = 61, +convolution_pack8to4_gemm = 62, +crop = 63, +crop_pack1to4 = 64, +crop_pack1to8 = 65, +crop_pack4 = 66, +crop_pack4to1 = 67, +crop_pack4to8 = 68, +crop_pack8 = 69, +crop_pack8to1 = 70, +crop_pack8to4 = 71, +deconvolution = 72, +deconvolution_col2im = 73, +deconvolution_gemm = 74, +deconvolution_pack1to4 = 75, +deconvolution_pack1to4_gemm = 76, +deconvolution_pack1to8 = 77, +deconvolution_pack1to8_gemm = 78, +deconvolution_pack4 = 79, +deconvolution_pack4_col2im = 80, +deconvolution_pack4_gemm = 81, +deconvolution_pack4_gemm_cm_16_8_8 = 82, +deconvolution_pack4to1 = 83, +deconvolution_pack4to1_gemm = 84, +deconvolution_pack4to8 = 85, +deconvolution_pack4to8_gemm = 86, +deconvolution_pack8 = 87, +deconvolution_pack8_col2im = 88, +deconvolution_pack8_gemm = 89, +deconvolution_pack8to1 = 90, +deconvolution_pack8to1_gemm = 91, +deconvolution_pack8to4 = 92, +deconvolution_pack8to4_gemm = 93, +dropout = 94, +dropout_pack4 = 95, +dropout_pack8 = 96, +eltwise = 97, +eltwise_pack4 = 98, +eltwise_pack8 = 99, +elu = 100, +elu_pack4 = 101, +elu_pack8 = 102, +flatten = 103, +flatten_pack1to4 = 104, +flatten_pack1to8 = 105, +flatten_pack4 = 106, +flatten_pack4to8 = 107, +flatten_pack8 = 108, +innerproduct = 109, +innerproduct_gemm = 110, +innerproduct_gemm_wp1to4 = 111, +innerproduct_gemm_wp1to8 = 112, +innerproduct_gemm_wp4 = 113, +innerproduct_gemm_wp4to1 = 114, +innerproduct_gemm_wp4to8 = 115, +innerproduct_gemm_wp8 = 116, +innerproduct_gemm_wp8to1 = 117, +innerproduct_gemm_wp8to4 = 118, +innerproduct_pack1to4 = 119, +innerproduct_pack1to8 = 120, +innerproduct_pack4 = 121, +innerproduct_pack4to1 = 122, +innerproduct_pack4to8 = 123, +innerproduct_pack8 = 124, +innerproduct_pack8to1 = 125, +innerproduct_pack8to4 = 126, +innerproduct_reduce_sum8 = 127, +innerproduct_reduce_sum8_pack4 = 128, +innerproduct_reduce_sum8_pack8 = 129, +innerproduct_sum8 = 130, +innerproduct_sum8_pack1to4 = 131, +innerproduct_sum8_pack1to8 = 132, +innerproduct_sum8_pack4 = 133, +innerproduct_sum8_pack4to1 = 134, +innerproduct_sum8_pack4to8 = 135, +innerproduct_sum8_pack8 = 136, +innerproduct_sum8_pack8to1 = 137, +innerproduct_sum8_pack8to4 = 138, +lrn_norm = 139, +lrn_norm_across_channel_pack4 = 140, +lrn_norm_across_channel_pack8 = 141, +lrn_norm_within_channel_pack4 = 142, +lrn_norm_within_channel_pack8 = 143, +lrn_square_pad = 144, +lrn_square_pad_across_channel_pack4 = 145, +lrn_square_pad_across_channel_pack8 = 146, +lrn_square_pad_within_channel_pack4 = 147, +lrn_square_pad_within_channel_pack8 = 148, +pooling = 149, +pooling_adaptive = 150, +pooling_adaptive_pack4 = 151, +pooling_adaptive_pack8 = 152, +pooling_global = 153, +pooling_global_pack4 = 154, +pooling_global_pack8 = 155, +pooling_pack4 = 156, +pooling_pack8 = 157, +prelu = 158, +prelu_pack4 = 159, +prelu_pack8 = 160, +relu = 161, +relu_pack4 = 162, +relu_pack8 = 163, +reshape = 164, +reshape_pack1to4 = 165, +reshape_pack1to8 = 166, +reshape_pack4 = 167, +reshape_pack4to1 = 168, +reshape_pack4to8 = 169, +reshape_pack8 = 170, +reshape_pack8to1 = 171, +reshape_pack8to4 = 172, +scale = 173, +scale_pack4 = 174, +scale_pack8 = 175, +sigmoid = 176, +sigmoid_pack4 = 177, +sigmoid_pack8 = 178, +slice = 179, +slice_pack1to4 = 180, +slice_pack1to8 = 181, +slice_pack4 = 182, +slice_pack4to8 = 183, +slice_pack8 = 184, +softmax_div_sum = 185, +softmax_div_sum_pack4 = 186, +softmax_div_sum_pack8 = 187, +softmax_exp_sub_max = 188, +softmax_exp_sub_max_pack4 = 189, +softmax_exp_sub_max_pack8 = 190, +softmax_reduce_max = 191, +softmax_reduce_max_pack4 = 192, +softmax_reduce_max_pack8 = 193, +softmax_reduce_sum = 194, +softmax_reduce_sum_pack4 = 195, +softmax_reduce_sum_pack8 = 196, +tanh = 197, +tanh_pack4 = 198, +tanh_pack8 = 199, +binaryop = 200, +binaryop_broadcast_inner = 201, +binaryop_broadcast_inner_pack4 = 202, +binaryop_broadcast_inner_pack8 = 203, +binaryop_broadcast_outer = 204, +binaryop_broadcast_outer_pack4 = 205, +binaryop_broadcast_outer_pack8 = 206, +binaryop_pack4 = 207, +binaryop_pack8 = 208, +unaryop = 209, +unaryop_pack4 = 210, +unaryop_pack8 = 211, +convolutiondepthwise = 212, +convolutiondepthwise_group = 213, +convolutiondepthwise_group_pack1to4 = 214, +convolutiondepthwise_group_pack1to8 = 215, +convolutiondepthwise_group_pack4 = 216, +convolutiondepthwise_group_pack4to1 = 217, +convolutiondepthwise_group_pack4to8 = 218, +convolutiondepthwise_group_pack8 = 219, +convolutiondepthwise_group_pack8to1 = 220, +convolutiondepthwise_group_pack8to4 = 221, +convolutiondepthwise_pack4 = 222, +convolutiondepthwise_pack8 = 223, +padding = 224, +padding_3d = 225, +padding_3d_pack4 = 226, +padding_3d_pack8 = 227, +padding_pack1to4 = 228, +padding_pack1to8 = 229, +padding_pack4 = 230, +padding_pack4to1 = 231, +padding_pack4to8 = 232, +padding_pack8 = 233, +padding_pack8to1 = 234, +padding_pack8to4 = 235, +normalize_coeffs = 236, +normalize_coeffs_pack4 = 237, +normalize_coeffs_pack8 = 238, +normalize_norm = 239, +normalize_norm_pack4 = 240, +normalize_norm_pack8 = 241, +normalize_reduce_sum4_fp16_to_fp32 = 242, +normalize_reduce_sum4_fp16_to_fp32_pack4 = 243, +normalize_reduce_sum4_fp16_to_fp32_pack8 = 244, +normalize_reduce_sum4_fp32 = 245, +normalize_reduce_sum4_fp32_pack4 = 246, +normalize_reduce_sum4_fp32_pack8 = 247, +permute = 248, +permute_pack1to4 = 249, +permute_pack1to8 = 250, +permute_pack4 = 251, +permute_pack4to1 = 252, +permute_pack4to8 = 253, +permute_pack8 = 254, +permute_pack8to1 = 255, +permute_pack8to4 = 256, +priorbox = 257, +priorbox_mxnet = 258, +interp = 259, +interp_bicubic = 260, +interp_bicubic_coeffs = 261, +interp_bicubic_pack4 = 262, +interp_bicubic_pack8 = 263, +interp_pack4 = 264, +interp_pack8 = 265, +deconvolutiondepthwise = 266, +deconvolutiondepthwise_group = 267, +deconvolutiondepthwise_group_pack1to4 = 268, +deconvolutiondepthwise_group_pack1to8 = 269, +deconvolutiondepthwise_group_pack4 = 270, +deconvolutiondepthwise_group_pack4to1 = 271, +deconvolutiondepthwise_group_pack4to8 = 272, +deconvolutiondepthwise_group_pack8 = 273, +deconvolutiondepthwise_group_pack8to1 = 274, +deconvolutiondepthwise_group_pack8to4 = 275, +deconvolutiondepthwise_pack4 = 276, +deconvolutiondepthwise_pack8 = 277, +shufflechannel = 278, +shufflechannel_pack4 = 279, +shufflechannel_pack8 = 280, +instancenorm_coeffs = 281, +instancenorm_coeffs_pack4 = 282, +instancenorm_coeffs_pack8 = 283, +instancenorm_norm = 284, +instancenorm_norm_pack4 = 285, +instancenorm_norm_pack8 = 286, +instancenorm_reduce_mean = 287, +instancenorm_reduce_mean_pack4 = 288, +instancenorm_reduce_mean_pack8 = 289, +instancenorm_reduce_sum4_fp16_to_fp32 = 290, +instancenorm_reduce_sum4_fp16_to_fp32_pack4 = 291, +instancenorm_reduce_sum4_fp16_to_fp32_pack8 = 292, +instancenorm_reduce_sum4_fp32 = 293, +instancenorm_reduce_sum4_fp32_pack4 = 294, +instancenorm_reduce_sum4_fp32_pack8 = 295, +instancenorm_sub_mean_square = 296, +instancenorm_sub_mean_square_pack4 = 297, +instancenorm_sub_mean_square_pack8 = 298, +clip = 299, +clip_pack4 = 300, +clip_pack8 = 301, +reorg = 302, +reorg_pack1to4 = 303, +reorg_pack1to8 = 304, +reorg_pack4 = 305, +reorg_pack4to8 = 306, +reorg_pack8 = 307, +packing = 308, +packing_fp16_to_fp32 = 309, +packing_fp32_to_fp16 = 310, +packing_pack1to4 = 311, +packing_pack1to4_fp16_to_fp32 = 312, +packing_pack1to4_fp32_to_fp16 = 313, +packing_pack1to8 = 314, +packing_pack1to8_fp16_to_fp32 = 315, +packing_pack1to8_fp32_to_fp16 = 316, +packing_pack4 = 317, +packing_pack4_fp16_to_fp32 = 318, +packing_pack4_fp32_to_fp16 = 319, +packing_pack4to1 = 320, +packing_pack4to1_fp16_to_fp32 = 321, +packing_pack4to1_fp32_to_fp16 = 322, +packing_pack4to8 = 323, +packing_pack4to8_fp16_to_fp32 = 324, +packing_pack4to8_fp32_to_fp16 = 325, +packing_pack8 = 326, +packing_pack8_fp16_to_fp32 = 327, +packing_pack8_fp32_to_fp16 = 328, +packing_pack8to1 = 329, +packing_pack8to1_fp16_to_fp32 = 330, +packing_pack8to1_fp32_to_fp16 = 331, +packing_pack8to4 = 332, +packing_pack8to4_fp16_to_fp32 = 333, +packing_pack8to4_fp32_to_fp16 = 334, +cast_fp16_to_fp32 = 335, +cast_fp16_to_fp32_pack4 = 336, +cast_fp16_to_fp32_pack8 = 337, +cast_fp32_to_fp16 = 338, +cast_fp32_to_fp16_pack4 = 339, +cast_fp32_to_fp16_pack8 = 340, +hardsigmoid = 341, +hardsigmoid_pack4 = 342, +hardsigmoid_pack8 = 343, +hardswish = 344, +hardswish_pack4 = 345, +hardswish_pack8 = 346, +pixelshuffle = 347, +pixelshuffle_pack4 = 348, +pixelshuffle_pack4to1 = 349, +pixelshuffle_pack8 = 350, +pixelshuffle_pack8to1 = 351, +pixelshuffle_pack8to4 = 352, +deepcopy = 353, +deepcopy_pack4 = 354, +deepcopy_pack8 = 355, +mish = 356, +mish_pack4 = 357, +mish_pack8 = 358, +swish = 359, +swish_pack4 = 360, +swish_pack8 = 361, +convert_ycbcr = 362, +vulkan_activation = 363, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_type.h new file mode 100644 index 0000000..511c714 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_type.h @@ -0,0 +1,30 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_TYPE_H +#define NCNN_LAYER_TYPE_H + +namespace ncnn { + +namespace LayerType { +enum LayerType +{ +#include "layer_type_enum.h" + CustomBit = (1 << 8), +}; +} // namespace LayerType + +} // namespace ncnn + +#endif // NCNN_LAYER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_type_enum.h new file mode 100644 index 0000000..9c1be52 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/layer_type_enum.h @@ -0,0 +1,105 @@ +// Layer Type Enum header +// +// This file is auto-generated by cmake, don't edit it. + +AbsVal = 0, +ArgMax = 1, +BatchNorm = 2, +Bias = 3, +BNLL = 4, +Concat = 5, +Convolution = 6, +Crop = 7, +Deconvolution = 8, +Dropout = 9, +Eltwise = 10, +ELU = 11, +Embed = 12, +Exp = 13, +Flatten = 14, +InnerProduct = 15, +Input = 16, +Log = 17, +LRN = 18, +MemoryData = 19, +MVN = 20, +Pooling = 21, +Power = 22, +PReLU = 23, +Proposal = 24, +Reduction = 25, +ReLU = 26, +Reshape = 27, +ROIPooling = 28, +Scale = 29, +Sigmoid = 30, +Slice = 31, +Softmax = 32, +Split = 33, +SPP = 34, +TanH = 35, +Threshold = 36, +Tile = 37, +RNN = 38, +LSTM = 39, +BinaryOp = 40, +UnaryOp = 41, +ConvolutionDepthWise = 42, +Padding = 43, +Squeeze = 44, +ExpandDims = 45, +Normalize = 46, +Permute = 47, +PriorBox = 48, +DetectionOutput = 49, +Interp = 50, +DeconvolutionDepthWise = 51, +ShuffleChannel = 52, +InstanceNorm = 53, +Clip = 54, +Reorg = 55, +YoloDetectionOutput = 56, +Quantize = 57, +Dequantize = 58, +Yolov3DetectionOutput = 59, +PSROIPooling = 60, +ROIAlign = 61, +Packing = 62, +Requantize = 63, +Cast = 64, +HardSigmoid = 65, +SELU = 66, +HardSwish = 67, +Noop = 68, +PixelShuffle = 69, +DeepCopy = 70, +Mish = 71, +StatisticsPooling = 72, +Swish = 73, +Gemm = 74, +GroupNorm = 75, +LayerNorm = 76, +Softplus = 77, +GRU = 78, +MultiHeadAttention = 79, +GELU = 80, +Convolution1D = 81, +Pooling1D = 82, +ConvolutionDepthWise1D = 83, +Convolution3D = 84, +ConvolutionDepthWise3D = 85, +Pooling3D = 86, +MatMul = 87, +Deconvolution1D = 88, +DeconvolutionDepthWise1D = 89, +Deconvolution3D = 90, +DeconvolutionDepthWise3D = 91, +Einsum = 92, +DeformableConv2D = 93, +GLU = 94, +Fold = 95, +Unfold = 96, +GridSample = 97, +CumulativeSum = 98, +CopyTo = 99, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/mat.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/mat.h new file mode 100644 index 0000000..c6f59ef --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/mat.h @@ -0,0 +1,1843 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MAT_H +#define NCNN_MAT_H + +#include +#include +#if __ARM_NEON +#include +#endif +#if __SSE2__ +#include +#if __AVX__ +#include +#endif +#endif +#if __mips_msa +#include +#endif +#if __loongarch_sx +#include +#endif +#if __riscv_vector +#include +#include "cpu.h" // cpu_riscv_vlenb() +#endif + +#include "allocator.h" +#include "option.h" +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PIXEL +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + +namespace ncnn { + +#if NCNN_VULKAN +class VkMat; +class VkImageMat; +#endif // NCNN_VULKAN + +// the three dimension matrix +class NCNN_EXPORT Mat +{ +public: + // empty + Mat(); + // vec + Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // image + Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // dim + Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // cube + Mat(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // packed vec + Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed image + Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed dim + Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed cube + Mat(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // copy + Mat(const Mat& m); + // external vec + Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external image + Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external dim + Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external packed vec + Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed image + Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed dim + Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // release + ~Mat(); + // assign + Mat& operator=(const Mat& m); + // set all + void fill(float v); + void fill(int v); +#if __ARM_NEON + void fill(float32x4_t _v); + void fill(uint16x4_t _v); + void fill(int32x4_t _v); + void fill(int32x4_t _v0, int32x4_t _v1); +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + void fill(float16x4_t _v); + void fill(float16x8_t _v); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ + void fill(__m512 _v); +#endif // __AVX512F__ + void fill(__m256 _v, int i = 0); +#endif // __AVX__ + void fill(__m128 _v); + void fill(__m128i _v); +#endif // __SSE2__ +#if __mips_msa + void fill(v4f32 _v); +#endif // __mips_msa +#if __loongarch_sx + void fill(__m128 _v); +#endif //__loongarch_sx +#if __riscv_vector + void fill(vfloat32m1_t _v); + void fill(vuint16m1_t _v); + void fill(vint8m1_t _v); +#if __riscv_zfh + void fill(vfloat16m1_t _v); +#endif // __riscv_zfh +#endif // __riscv_vector + template + void fill(T v); + // deep copy + Mat clone(Allocator* allocator = 0) const; + // deep copy from other mat, inplace + void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0); + // reshape vec + Mat reshape(int w, Allocator* allocator = 0) const; + // reshape image + Mat reshape(int w, int h, Allocator* allocator = 0) const; + // reshape dim + Mat reshape(int w, int h, int c, Allocator* allocator = 0) const; + // reshape cube + Mat reshape(int w, int h, int d, int c, Allocator* allocator = 0) const; + // allocate vec + void create(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate image + void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate dim + void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate like + void create_like(const Mat& m, Allocator* allocator = 0); +#if NCNN_VULKAN + // allocate like + void create_like(const VkMat& m, Allocator* allocator = 0); + // allocate like + void create_like(const VkImageMat& im, Allocator* allocator = 0); +#endif // NCNN_VULKAN + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // data reference + Mat channel(int c); + const Mat channel(int c) const; + Mat depth(int z); + const Mat depth(int z) const; + float* row(int y); + const float* row(int y) const; + template + T* row(int y); + template + const T* row(int y) const; + + // range reference + Mat channel_range(int c, int channels); + const Mat channel_range(int c, int channels) const; + Mat depth_range(int z, int depths); + const Mat depth_range(int z, int depths) const; + Mat row_range(int y, int rows); + const Mat row_range(int y, int rows) const; + Mat range(int x, int n); + const Mat range(int x, int n) const; + + // access raw data + template + operator T*(); + template + operator const T*() const; + + // convenient access float vec element + float& operator[](size_t i); + const float& operator[](size_t i) const; + +#if NCNN_PIXEL + enum PixelType + { + PIXEL_CONVERT_SHIFT = 16, + PIXEL_FORMAT_MASK = 0x0000ffff, + PIXEL_CONVERT_MASK = 0xffff0000, + + PIXEL_RGB = 1, + PIXEL_BGR = 2, + PIXEL_GRAY = 3, + PIXEL_RGBA = 4, + PIXEL_BGRA = 5, + + PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + }; + // convenient construct from pixel data + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0); + // convenient construct from pixel data with stride(bytes-per-row) parameter + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi with stride(bytes-per-row) parameter + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + + // convenient export to pixel data + void to_pixels(unsigned char* pixels, int type) const; + // convenient export to pixel data with stride(bytes-per-row) parameter + void to_pixels(unsigned char* pixels, int type, int stride) const; + // convenient export to pixel data and resize to specific size + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const; + // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 + // convenient construct from android Bitmap + static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0); + // convenient construct from android Bitmap and resize to specific size + static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from android Bitmap roi + static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from android Bitmap roi and resize to specific size + static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient export to android Bitmap and resize to the android Bitmap size + void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + + // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip + void substract_mean_normalize(const float* mean_vals, const float* norm_vals); + + // convenient construct from half precision floating point data + static Mat from_float16(const unsigned short* data, int size); + + // pointer to the data + void* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + Allocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +#if NCNN_VULKAN + +// the three dimension matrix, vulkan version +class NCNN_EXPORT VkMat +{ +public: + // empty + VkMat(); + // vec + VkMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkMat(const VkMat& m); + // external vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkMat(); + // assign + VkMat& operator=(const VkMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkBuffer buffer() const; + size_t buffer_offset() const; + size_t buffer_capacity() const; + + // device buffer + VkBufferMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +class NCNN_EXPORT VkImageMat +{ +public: + // empty + VkImageMat(); + // vec + VkImageMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkImageMat(const VkImageMat& m); + // external vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkImageMat(); + // assign + VkImageMat& operator=(const VkImageMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkImage image() const; + VkImageView imageview() const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + // convenient construct from android hardware buffer + static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + // device image + VkImageMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; +}; + +// type for vulkan specialization constant and push constant +union vk_specialization_type +{ + int i; + float f; + uint32_t u32; +}; +union vk_constant_type +{ + int i; + float f; +}; +#endif // NCNN_VULKAN + +// misc function +#if NCNN_PIXEL +// convert yuv420sp(nv21) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv12) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv21) to rgb with half resize, the faster approximate version +NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// image pixel bilinear resize +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +// image pixel bilinear resize with stride(bytes-per-row) parameter +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +// image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +#endif // NCNN_PIXEL +#if NCNN_PIXEL_ROTATE +// type is the from type, 6 means rotating from 6 to 1 +// +// 1 2 3 4 5 6 7 8 +// +// 888888 888888 88 88 8888888888 88 88 8888888888 +// 88 88 88 88 88 88 88 88 88 88 88 88 +// 8888 8888 8888 8888 88 8888888888 8888888888 88 +// 88 88 88 88 +// 88 88 888888 888888 +// +// ref http://sylvana.net/jpegcrop/exif_orientation.html +// image pixel kanna rotate +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +// image pixel kanna rotate with stride(bytes-per-row) parameter +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +// image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +#endif // NCNN_PIXEL_ROTATE +#if NCNN_PIXEL_AFFINE +// resolve affine transform matrix from rotation angle, scale factor and x y offset +NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm); +// resolve affine transform matrix from two set of points, num_point must be >= 2 +NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm); +// resolve the inversion affine transform matrix +NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv); +// image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +#endif // NCNN_PIXEL_AFFINE +#if NCNN_PIXEL_DRAWING +// draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw line, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// resolve text bounding box size +NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h); +// draw ascii printables and newline, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +#endif // NCNN_PIXEL_DRAWING + +// type conversion +// convert float to half precision floating point +NCNN_EXPORT unsigned short float32_to_float16(float value); +// convert half precision floating point to float +NCNN_EXPORT float float16_to_float32(unsigned short value); +// convert float to brain half +NCNN_EXPORT NCNN_FORCEINLINE unsigned short float32_to_bfloat16(float value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.f = value; + return tmp.u >> 16; +} +// convert brain half to float +NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.u = value << 16; + return tmp.f; +} + +// mat process +enum BorderType +{ + BORDER_CONSTANT = 0, + BORDER_REPLICATE = 1, + BORDER_REFLECT = 2, + BORDER_TRANSPARENT = -233, +}; +NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt = Option()); +NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option()); +NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option()); +NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option()); +NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option()); + +NCNN_FORCEINLINE Mat::Mat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(const Mat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c), cstep(m.cstep) +{ + addref(); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::~Mat() +{ + release(); +} + +NCNN_FORCEINLINE void Mat::fill(float _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + + int i = 0; +#if __ARM_NEON + float32x4_t _c = vdupq_n_f32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_f32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +NCNN_FORCEINLINE void Mat::fill(int _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + + int i = 0; +#if __ARM_NEON + int32x4_t _c = vdupq_n_s32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_s32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +#if __ARM_NEON +NCNN_FORCEINLINE void Mat::fill(float32x4_t _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vst1q_f32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(uint16x4_t _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vst1_u16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v0, int32x4_t _v1) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v0); + vst1q_s32(ptr + 4, _v1); + ptr += 8; + } +} +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +NCNN_FORCEINLINE void Mat::fill(float16x4_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1_f16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(float16x8_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1q_f16(ptr, _v); + ptr += 8; + } +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON + +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m512 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm512_storeu_ps(ptr, _v); + ptr += 16; + } +} +#endif // __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m256 _v, int _i) +{ + // old gcc cannot overload __m128 and __m256 type + // add a dummy int parameter for different mangled function symbol + (void)_i; + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm256_storeu_ps(ptr, _v); + ptr += 8; + } +} +#endif // __AVX__ +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm_storeu_ps(ptr, _v); + ptr += 4; + } +} +NCNN_FORCEINLINE void Mat::fill(__m128i _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + _mm_store_si128((__m128i*)ptr, _v); + ptr += 8; + } +} +#endif // __SSE2__ + +#if __mips_msa +NCNN_FORCEINLINE void Mat::fill(v4f32 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __msa_st_w((v4i32)_v, ptr, 0); + ptr += 4; + } +} +#endif // __mips_msa + +#if __loongarch_sx +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __lsx_vst(_v, ptr, 0); + ptr += 4; + } +} +#endif // __loongarch_sx +#if __riscv_vector +NCNN_FORCEINLINE void Mat::fill(vfloat32m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 4; + const size_t vl = vsetvl_e32m1(packn); + + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vse32_v_f32m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vuint16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vse16_v_u16m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 1; + const size_t vl = vsetvl_e8m1(packn); + + int size = (int)total(); + signed char* ptr = (signed char*)data; + for (int i = 0; i < size; i++) + { + vse8_v_i8m1(ptr, _v, vl); + ptr += packn; + } +} +#if __riscv_zfh +NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vse16_v_f16m1(ptr, _v, vl); + ptr += packn; + } +} +#endif // __riscv_zfh +#endif // __riscv_vector + +template +NCNN_FORCEINLINE void Mat::fill(T _v) +{ + int size = (int)total(); + T* ptr = (T*)data; + for (int i = 0; i < size; i++) + { + ptr[i] = _v; + } +} + +NCNN_FORCEINLINE Mat& Mat::operator=(const Mat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE void Mat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void Mat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator) + allocator->fastFree(data); + else + fastFree(data); + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool Mat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t Mat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int Mat::elembits() const +{ + return elempack ? static_cast(elemsize * 8) / elempack : 0; +} + +NCNN_FORCEINLINE Mat Mat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE Mat Mat::channel(int _c) +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel(int _c) const +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth(int z) +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::depth(int z) const +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE float* Mat::row(int y) +{ + return (float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE const float* Mat::row(int y) const +{ + return (const float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE T* Mat::row(int y) +{ + return (T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE const T* Mat::row(int y) const +{ + return (const T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE Mat Mat::channel_range(int _c, int channels) +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel_range(int _c, int channels) const +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth_range(int z, int depths) +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::depth_range(int z, int depths) const +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::row_range(int y, int rows) +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::row_range(int y, int rows) const +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE Mat Mat::range(int x, int n) +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::range(int x, int n) const +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +template +NCNN_FORCEINLINE Mat::operator T*() +{ + return (T*)data; +} + +template +NCNN_FORCEINLINE Mat::operator const T*() const +{ + return (const T*)data; +} + +NCNN_FORCEINLINE float& Mat::operator[](size_t i) +{ + return ((float*)data)[i]; +} + +NCNN_FORCEINLINE const float& Mat::operator[](size_t i) const +{ + return ((const float*)data)[i]; +} + +#if NCNN_VULKAN + +NCNN_FORCEINLINE VkMat::VkMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(const VkMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); + + cstep = m.cstep; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::~VkMat() +{ + release(); +} + +NCNN_FORCEINLINE VkMat& VkMat::operator=(const VkMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE Mat VkMat::mapped() const +{ + if (!allocator->mappable) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkMat::mapped_ptr() const +{ + if (!allocator->mappable) + return 0; + + return (unsigned char*)data->mapped_ptr + data->offset; +} + +NCNN_FORCEINLINE void VkMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkMat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int VkMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkBuffer VkMat::buffer() const +{ + return data->buffer; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_offset() const +{ + return data->offset; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_capacity() const +{ + return data->capacity; +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(const VkImageMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::~VkImageMat() +{ + release(); +} + +NCNN_FORCEINLINE VkImageMat& VkImageMat::operator=(const VkImageMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + return *this; +} + +NCNN_FORCEINLINE Mat VkImageMat::mapped() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkImageMat::mapped_ptr() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return 0; + + return (unsigned char*)data->mapped_ptr + data->bind_offset; +} + +NCNN_FORCEINLINE void VkImageMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkImageMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkImageMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkImageMat::total() const +{ + return w * h * d * c; +} + +NCNN_FORCEINLINE int VkImageMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkImageMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkImage VkImageMat::image() const +{ + return data->image; +} + +NCNN_FORCEINLINE VkImageView VkImageMat::imageview() const +{ + return data->imageview; +} + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_MAT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/modelbin.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/modelbin.h new file mode 100644 index 0000000..15d2b9c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/modelbin.h @@ -0,0 +1,80 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MODELBIN_H +#define NCNN_MODELBIN_H + +#include "mat.h" + +namespace ncnn { + +class DataReader; +class NCNN_EXPORT ModelBin +{ +public: + ModelBin(); + virtual ~ModelBin(); + // element type + // 0 = auto + // 1 = float32 + // 2 = float16 + // 3 = int8 + // load vec + virtual Mat load(int w, int type) const = 0; + // load image + virtual Mat load(int w, int h, int type) const; + // load dim + virtual Mat load(int w, int h, int c, int type) const; + // load cube + virtual Mat load(int w, int h, int d, int c, int type) const; +}; + +class ModelBinFromDataReaderPrivate; +class NCNN_EXPORT ModelBinFromDataReader : public ModelBin +{ +public: + explicit ModelBinFromDataReader(const DataReader& dr); + virtual ~ModelBinFromDataReader(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromDataReader(const ModelBinFromDataReader&); + ModelBinFromDataReader& operator=(const ModelBinFromDataReader&); + +private: + ModelBinFromDataReaderPrivate* const d; +}; + +class ModelBinFromMatArrayPrivate; +class NCNN_EXPORT ModelBinFromMatArray : public ModelBin +{ +public: + // construct from weight blob array + explicit ModelBinFromMatArray(const Mat* weights); + virtual ~ModelBinFromMatArray(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromMatArray(const ModelBinFromMatArray&); + ModelBinFromMatArray& operator=(const ModelBinFromMatArray&); + +private: + ModelBinFromMatArrayPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_MODELBIN_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/ncnn_export.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/ncnn_export.h new file mode 100644 index 0000000..7343310 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/ncnn_export.h @@ -0,0 +1,42 @@ + +#ifndef NCNN_EXPORT_H +#define NCNN_EXPORT_H + +#ifdef NCNN_STATIC_DEFINE +# define NCNN_EXPORT +# define NCNN_NO_EXPORT +#else +# ifndef NCNN_EXPORT +# ifdef ncnn_EXPORTS + /* We are building this library */ +# define NCNN_EXPORT +# else + /* We are using this library */ +# define NCNN_EXPORT +# endif +# endif + +# ifndef NCNN_NO_EXPORT +# define NCNN_NO_EXPORT +# endif +#endif + +#ifndef NCNN_DEPRECATED +# define NCNN_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef NCNN_DEPRECATED_EXPORT +# define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED +#endif + +#ifndef NCNN_DEPRECATED_NO_EXPORT +# define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef NCNN_NO_DEPRECATED +# define NCNN_NO_DEPRECATED +# endif +#endif + +#endif /* NCNN_EXPORT_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/net.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/net.h new file mode 100644 index 0000000..9407042 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/net.h @@ -0,0 +1,272 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_NET_H +#define NCNN_NET_H + +#include "blob.h" +#include "layer.h" +#include "mat.h" +#include "option.h" +#include "platform.h" + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +#if NCNN_VULKAN +class VkCompute; +#endif // NCNN_VULKAN +class DataReader; +class Extractor; +class NetPrivate; +class NCNN_EXPORT Net +{ +public: + // empty init + Net(); + // clear and destroy + virtual ~Net(); + +public: + // option can be changed before loading + Option opt; + +#if NCNN_VULKAN + // set gpu device by index + void set_vulkan_device(int device_index); + + // set gpu device by device handle, no owner transfer + void set_vulkan_device(const VulkanDevice* vkdev); + + const VulkanDevice* vulkan_device() const; +#endif // NCNN_VULKAN + +#if NCNN_STRING + // register custom layer by layer type name + // return 0 if success + int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + virtual int custom_layer_to_index(const char* type); +#endif // NCNN_STRING + // register custom layer by layer type + // return 0 if success + int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + +#if NCNN_STRING + int load_param(const DataReader& dr); +#endif // NCNN_STRING + + int load_param_bin(const DataReader& dr); + + int load_model(const DataReader& dr); + +#if NCNN_STDIO +#if NCNN_STRING + // load network structure from plain param file + // return 0 if success + int load_param(FILE* fp); + int load_param(const char* protopath); + int load_param_mem(const char* mem); +#endif // NCNN_STRING + // load network structure from binary param file + // return 0 if success + int load_param_bin(FILE* fp); + int load_param_bin(const char* protopath); + + // load network weight data from model file + // return 0 if success + int load_model(FILE* fp); + int load_model(const char* modelpath); +#endif // NCNN_STDIO + + // load network structure from external memory + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_param(const unsigned char* mem); + + // reference network weight data from external memory + // weight data is not copied but referenced + // so external memory should be retained when used + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_model(const unsigned char* mem); + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#if NCNN_STRING + // convenient load network structure from android asset plain param file + int load_param(AAsset* asset); + int load_param(AAssetManager* mgr, const char* assetpath); +#endif // NCNN_STRING + // convenient load network structure from android asset binary param file + int load_param_bin(AAsset* asset); + int load_param_bin(AAssetManager* mgr, const char* assetpath); + + // convenient load network weight data from android asset model file + int load_model(AAsset* asset); + int load_model(AAssetManager* mgr, const char* assetpath); +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + + // unload network structure and weight data + void clear(); + + // construct an Extractor from network + Extractor create_extractor() const; + + // get input/output indexes/names + const std::vector& input_indexes() const; + const std::vector& output_indexes() const; +#if NCNN_STRING + const std::vector& input_names() const; + const std::vector& output_names() const; +#endif + + const std::vector& blobs() const; + const std::vector& layers() const; + + std::vector& mutable_blobs(); + std::vector& mutable_layers(); + +protected: + friend class Extractor; +#if NCNN_STRING + int find_blob_index_by_name(const char* name) const; + int find_layer_index_by_name(const char* name) const; + virtual Layer* create_custom_layer(const char* type); +#endif // NCNN_STRING + virtual Layer* create_custom_layer(int index); + +private: + Net(const Net&); + Net& operator=(const Net&); + +private: + NetPrivate* const d; +}; + +class ExtractorPrivate; +class NCNN_EXPORT Extractor +{ +public: + virtual ~Extractor(); + + // copy + Extractor(const Extractor&); + + // assign + Extractor& operator=(const Extractor&); + + // clear blob mats and alloctors + void clear(); + + // enable light mode + // intermediate blob will be recycled when enabled + // enabled by default + void set_light_mode(bool enable); + + // set thread count for this extractor + // this will overwrite the global setting + // default count is system depended + void set_num_threads(int num_threads); + + // set blob memory allocator + void set_blob_allocator(Allocator* allocator); + + // set workspace memory allocator + void set_workspace_allocator(Allocator* allocator); + +#if NCNN_VULKAN + void set_vulkan_compute(bool enable); + + void set_blob_vkallocator(VkAllocator* allocator); + + void set_workspace_vkallocator(VkAllocator* allocator); + + void set_staging_vkallocator(VkAllocator* allocator); +#endif // NCNN_VULKAN + +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const Mat& in); + + // get result by blob name + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(const char* blob_name, Mat& feat, int type = 0); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const Mat& in); + + // get result by blob index + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(int blob_index, Mat& feat, int type = 0); + +#if NCNN_VULKAN +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); + + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkImageMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkMat& feat, VkCompute& cmd); + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkImageMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_VULKAN + +protected: + friend Extractor Net::create_extractor() const; + Extractor(const Net* net, size_t blob_count); + +private: + ExtractorPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_NET_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/option.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/option.h new file mode 100644 index 0000000..3fda808 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/option.h @@ -0,0 +1,153 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_OPTION_H +#define NCNN_OPTION_H + +#include "platform.h" + +namespace ncnn { + +#if NCNN_VULKAN +class VkAllocator; +class PipelineCache; +#endif // NCNN_VULKAN + +class Allocator; +class NCNN_EXPORT Option +{ +public: + // default option + Option(); + +public: + // light mode + // intermediate blob will be recycled when enabled + // enabled by default + bool lightmode; + + // thread count + // default value is the one returned by get_cpu_count() + int num_threads; + + // blob memory allocator + Allocator* blob_allocator; + + // workspace memory allocator + Allocator* workspace_allocator; + +#if NCNN_VULKAN + // blob memory allocator + VkAllocator* blob_vkallocator; + + // workspace memory allocator + VkAllocator* workspace_vkallocator; + + // staging memory allocator + VkAllocator* staging_vkallocator; + + // pipeline cache + PipelineCache* pipeline_cache; +#endif // NCNN_VULKAN + + // the time openmp threads busy-wait for more work before going to sleep + // default value is 20ms to keep the cores enabled + // without too much extra power consumption afterwards + int openmp_blocktime; + + // enable winograd convolution optimization + // improve convolution 3x3 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_winograd_convolution; + + // enable sgemm convolution optimization + // improve convolution 1x1 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_sgemm_convolution; + + // enable quantized int8 inference + // use low-precision int8 path for quantized model + // changes should be applied before loading network structure and weight + // enabled by default + bool use_int8_inference; + + // enable vulkan compute + bool use_vulkan_compute; + + // enable bf16 data type for storage + // improve most operator performance on all arm devices, may consume more memory + bool use_bf16_storage; + + // enable options for gpu inference + bool use_fp16_packed; + bool use_fp16_storage; + bool use_fp16_arithmetic; + bool use_int8_packed; + bool use_int8_storage; + bool use_int8_arithmetic; + + // enable simd-friendly packed memory layout + // improve all operator performance on all arm devices, will consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_packing_layout; + + bool use_shader_pack8; + + // subgroup option + bool use_subgroup_basic; + bool use_subgroup_vote; + bool use_subgroup_ballot; + bool use_subgroup_shuffle; + + // turn on for adreno + bool use_image_storage; + bool use_tensor_storage; + + bool use_reserved_0; + + // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) + // default value is 3 + // 0 = DAZ OFF, FTZ OFF + // 1 = DAZ ON , FTZ OFF + // 2 = DAZ OFF, FTZ ON + // 3 = DAZ ON, FTZ ON + int flush_denormals; + + bool use_local_pool_allocator; + + // enable local memory optimization for gpu inference + bool use_shader_local_memory; + + // enable cooperative matrix optimization for gpu inference + bool use_cooperative_matrix; + + // more fine-grained control of winograd convolution + bool use_winograd23_convolution; + bool use_winograd43_convolution; + bool use_winograd63_convolution; + + bool use_reserved_6; + bool use_reserved_7; + bool use_reserved_8; + bool use_reserved_9; + bool use_reserved_10; + bool use_reserved_11; +}; + +} // namespace ncnn + +#endif // NCNN_OPTION_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/paramdict.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/paramdict.h new file mode 100644 index 0000000..c2ef160 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/paramdict.h @@ -0,0 +1,73 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PARAMDICT_H +#define NCNN_PARAMDICT_H + +#include "mat.h" + +// at most 32 parameters +#define NCNN_MAX_PARAM_COUNT 32 + +namespace ncnn { + +class DataReader; +class Net; +class ParamDictPrivate; +class NCNN_EXPORT ParamDict +{ +public: + // empty + ParamDict(); + + virtual ~ParamDict(); + + // copy + ParamDict(const ParamDict&); + + // assign + ParamDict& operator=(const ParamDict&); + + // get type + int type(int id) const; + + // get int + int get(int id, int def) const; + // get float + float get(int id, float def) const; + // get array + Mat get(int id, const Mat& def) const; + + // set int + void set(int id, int i); + // set float + void set(int id, float f); + // set array + void set(int id, const Mat& v); + +protected: + friend class Net; + + void clear(); + + int load_param(const DataReader& dr); + int load_param_bin(const DataReader& dr); + +private: + ParamDictPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_PARAMDICT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/pipeline.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/pipeline.h new file mode 100644 index 0000000..c284a14 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/pipeline.h @@ -0,0 +1,113 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINE_H +#define NCNN_PIPELINE_H + +#include "mat.h" +#include "platform.h" +#if NCNN_VULKAN +#include "gpu.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +#if NCNN_VULKAN +class Option; +class PipelinePrivate; +class NCNN_EXPORT Pipeline +{ +public: + explicit Pipeline(const VulkanDevice* vkdev); + virtual ~Pipeline(); + +public: + void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4); + void set_optimal_local_size_xyz(const Mat& local_size_xyz); + void set_local_size_xyz(int w, int h, int c); + + int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations); + + int create(int shader_type_index, const Option& opt, const std::vector& specializations); + +public: + VkShaderModule shader_module() const; + VkDescriptorSetLayout descriptorset_layout() const; + VkPipelineLayout pipeline_layout() const; + VkPipeline pipeline() const; + VkDescriptorUpdateTemplateKHR descriptor_update_template() const; + + const ShaderInfo& shader_info() const; + + uint32_t local_size_x() const; + uint32_t local_size_y() const; + uint32_t local_size_z() const; + +protected: + void set_shader_module(VkShaderModule shader_module); + void set_descriptorset_layout(VkDescriptorSetLayout descriptorset_layout); + void set_pipeline_layout(VkPipelineLayout pipeline_layout); + void set_pipeline(VkPipeline pipeline); + void set_descriptor_update_template(VkDescriptorUpdateTemplateKHR descriptor_update_template); + + void set_shader_info(const ShaderInfo& shader_info); + +public: + const VulkanDevice* vkdev; + +private: + Pipeline(const Pipeline&); + Pipeline& operator=(const Pipeline&); + +private: + PipelinePrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class VkCompute; +class NCNN_EXPORT ImportAndroidHardwareBufferPipeline : private Pipeline +{ +public: + explicit ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev); + virtual ~ImportAndroidHardwareBufferPipeline(); + + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt); + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt); + void destroy(); + + friend class VkCompute; + +protected: + int create_shader_module(const Option& opt); + int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator); + int create_descriptorset_layout(); + +public: + int type_to; + int rotate_from; + bool need_resize; + + VkSampler sampler; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/pipelinecache.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/pipelinecache.h new file mode 100644 index 0000000..bb6b8fb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/pipelinecache.h @@ -0,0 +1,85 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINECACHE_H +#define NCNN_PIPELINECACHE_H + +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#include "mat.h" +#include "gpu.h" + +namespace ncnn { + +#if NCNN_VULKAN + +class VulkanDevice; +class PipelineCachePrivate; +class NCNN_EXPORT PipelineCache +{ +public: + explicit PipelineCache(const VulkanDevice* _vkdev); + + virtual ~PipelineCache(); + + void clear(); + + int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + + int get_pipeline(int shader_type_index, const Option& opt, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + +protected: + int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* _shader_module, ShaderInfo& si) const; + + int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector& specializations, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + +protected: + const VulkanDevice* vkdev; + +private: + PipelineCache(const PipelineCache&); + PipelineCache& operator=(const PipelineCache&); + +private: + PipelineCachePrivate* const d; +}; + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINECACHE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/platform.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/platform.h new file mode 100644 index 0000000..d2ff629 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/platform.h @@ -0,0 +1,286 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PLATFORM_H +#define NCNN_PLATFORM_H + +#define NCNN_STDIO 1 +#define NCNN_STRING 1 +#define NCNN_SIMPLEOCV 0 +#define NCNN_SIMPLEOMP 0 +#define NCNN_SIMPLESTL 0 +#define NCNN_THREADS 1 +#define NCNN_BENCHMARK 0 +#define NCNN_C_API 1 +#define NCNN_PLATFORM_API 1 +#define NCNN_PIXEL 1 +#define NCNN_PIXEL_ROTATE 1 +#define NCNN_PIXEL_AFFINE 1 +#define NCNN_PIXEL_DRAWING 1 +#define NCNN_VULKAN 1 +#define NCNN_SYSTEM_GLSLANG 0 +#define NCNN_RUNTIME_CPU 1 +#define NCNN_GNU_INLINE_ASM 1 +#define NCNN_AVX 1 +#define NCNN_XOP 1 +#define NCNN_FMA 1 +#define NCNN_F16C 1 +#define NCNN_AVX2 1 +#define NCNN_AVXVNNI 1 +#define NCNN_AVX512 1 +#define NCNN_AVX512VNNI 1 +#define NCNN_AVX512BF16 1 +#define NCNN_AVX512FP16 1 +#define NCNN_VFPV4 0 +#if __aarch64__ +#define NCNN_ARM82 0 +#define NCNN_ARM82DOT 0 +#define NCNN_ARM82FP16FML 0 +#define NCNN_ARM84BF16 0 +#define NCNN_ARM84I8MM 0 +#define NCNN_ARM86SVE 0 +#define NCNN_ARM86SVE2 0 +#define NCNN_ARM86SVEBF16 0 +#define NCNN_ARM86SVEI8MM 0 +#define NCNN_ARM86SVEF32MM 0 +#endif // __aarch64__ +#define NCNN_MSA 0 +#define NCNN_LSX 0 +#define NCNN_MMI 0 +#define NCNN_RVV 0 +#define NCNN_INT8 1 +#define NCNN_BF16 1 +#define NCNN_FORCE_INLINE 1 + +#define NCNN_VERSION_STRING "1.0.20230223" + +#include "ncnn_export.h" + +#ifdef __cplusplus + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#include +#else +#include +#endif +#endif // NCNN_THREADS + +#if __ANDROID_API__ >= 26 +#define VK_USE_PLATFORM_ANDROID_KHR +#endif // __ANDROID_API__ >= 26 + +namespace ncnn { + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { InitializeSRWLock(&srwlock); } + ~Mutex() {} + void lock_() { AcquireSRWLockExclusive(&srwlock); } + void unlock() { ReleaseSRWLockExclusive(&srwlock); } +private: + friend class ConditionVariable; + // NOTE SRWLock is available from windows vista + SRWLOCK srwlock; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { InitializeConditionVariable(&condvar); } + ~ConditionVariable() {} + void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } + void broadcast() { WakeAllConditionVariable(&condvar); } + void signal() { WakeConditionVariable(&condvar); } +private: + CONDITION_VARIABLE condvar; +}; + +static unsigned __stdcall start_wrapper(void* args); +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } + ~Thread() {} + void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } +private: + friend unsigned __stdcall start_wrapper(void* args) + { + Thread* t = (Thread*)args; + t->_start(t->_args); + return 0; + } + HANDLE handle; + void* (*_start)(void*); + void* _args; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { key = TlsAlloc(); } + ~ThreadLocalStorage() { TlsFree(key); } + void set(void* value) { TlsSetValue(key, (LPVOID)value); } + void* get() { return (void*)TlsGetValue(key); } +private: + DWORD key; +}; +#else // (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { pthread_mutex_init(&mutex, 0); } + ~Mutex() { pthread_mutex_destroy(&mutex); } + void lock() { pthread_mutex_lock(&mutex); } + void unlock() { pthread_mutex_unlock(&mutex); } +private: + friend class ConditionVariable; + pthread_mutex_t mutex; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { pthread_cond_init(&cond, 0); } + ~ConditionVariable() { pthread_cond_destroy(&cond); } + void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } + void broadcast() { pthread_cond_broadcast(&cond); } + void signal() { pthread_cond_signal(&cond); } +private: + pthread_cond_t cond; +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } + ~Thread() {} + void join() { pthread_join(t, 0); } +private: + pthread_t t; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { pthread_key_create(&key, 0); } + ~ThreadLocalStorage() { pthread_key_delete(key); } + void set(void* value) { pthread_setspecific(key, value); } + void* get() { return pthread_getspecific(key); } +private: + pthread_key_t key; +}; +#endif // (defined _WIN32 && !(defined __MINGW32__)) +#else // NCNN_THREADS +class NCNN_EXPORT Mutex +{ +public: + Mutex() {} + ~Mutex() {} + void lock_() {} + void unlock() {} +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() {} + ~ConditionVariable() {} + void wait(Mutex& /*mutex*/) {} + void broadcast() {} + void signal() {} +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {} + ~Thread() {} + void join() {} +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { data = 0; } + ~ThreadLocalStorage() {} + void set(void* value) { data = value; } + void* get() { return data; } +private: + void* data; +}; +#endif // NCNN_THREADS + +class NCNN_EXPORT MutexLockGuard +{ +public: + MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock(); } + ~MutexLockGuard() { mutex.unlock(); } +private: + Mutex& mutex; +}; + +} // namespace ncnn + +#if NCNN_SIMPLESTL +#include "simplestl.h" +#else +#include +#include +#include +#include +#endif + +#endif // __cplusplus + +#if NCNN_STDIO +#if NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \ + __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0) +#else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0) +#endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#else +#define NCNN_LOGE(...) +#endif + + +#if NCNN_FORCE_INLINE +#ifdef _MSC_VER + #define NCNN_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) +#elif defined(__CLANG__) + #if __has_attribute(__always_inline__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) + #else + #define NCNN_FORCEINLINE inline + #endif +#else + #define NCNN_FORCEINLINE inline +#endif +#else + #define NCNN_FORCEINLINE inline +#endif + +#endif // NCNN_PLATFORM_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simpleocv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simpleocv.h new file mode 100644 index 0000000..55ede15 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simpleocv.h @@ -0,0 +1,501 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOCV_H +#define NCNN_SIMPLEOCV_H + +#include "platform.h" + +#if NCNN_SIMPLEOCV + +#include +#include +#include "allocator.h" +#include "mat.h" + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif + +#ifndef NCNN_XADD +using ncnn::NCNN_XADD; +#endif + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +enum +{ + CV_LOAD_IMAGE_UNCHANGED = -1, + CV_LOAD_IMAGE_GRAYSCALE = 0, + CV_LOAD_IMAGE_COLOR = 1, +}; + +enum +{ + CV_IMWRITE_JPEG_QUALITY = 1 +}; + +// minimal opencv style data structure implementation +namespace cv { + +template +static inline _Tp saturate_cast(int v) +{ + return _Tp(v); +} +template<> +inline uchar saturate_cast(int v) +{ + return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); +} + +template +struct Scalar_ +{ + Scalar_() + { + v[0] = 0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0) + { + v[0] = _v0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = _v3; + } + + const _Tp operator[](const int i) const + { + return v[i]; + } + + _Tp operator[](const int i) + { + return v[i]; + } + + _Tp v[4]; +}; + +typedef Scalar_ Scalar; + +template +struct Point_ +{ + Point_() + : x(0), y(0) + { + } + Point_(_Tp _x, _Tp _y) + : x(_x), y(_y) + { + } + + template + operator Point_<_Tp2>() const + { + return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); + } + + _Tp x; + _Tp y; +}; + +typedef Point_ Point; +typedef Point_ Point2f; + +template +struct Size_ +{ + Size_() + : width(0), height(0) + { + } + Size_(_Tp _w, _Tp _h) + : width(_w), height(_h) + { + } + + template + operator Size_<_Tp2>() const + { + return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp width; + _Tp height; +}; + +typedef Size_ Size; +typedef Size_ Size2f; + +template +struct Rect_ +{ + Rect_() + : x(0), y(0), width(0), height(0) + { + } + Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) + : x(_x), y(_y), width(_w), height(_h) + { + } + Rect_(Point_<_Tp> _p, Size_<_Tp> _size) + : x(_p.x), y(_p.y), width(_size.width), height(_size.height) + { + } + + template + operator Rect_<_Tp2>() const + { + return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp x; + _Tp y; + _Tp width; + _Tp height; + + // area + _Tp area() const + { + return width * height; + } +}; + +template +static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); + a.width = std::min(a.x + a.width, b.x + b.width) - x1; + a.height = std::min(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + if (a.width <= 0 || a.height <= 0) + a = Rect_<_Tp>(); + return a; +} + +template +static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + return a; +} + +template +static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c &= b; +} + +template +static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c |= b; +} + +typedef Rect_ Rect; +typedef Rect_ Rect2f; + +#define CV_8UC1 1 +#define CV_8UC3 3 +#define CV_8UC4 4 +#define CV_32FC1 4 + +struct NCNN_EXPORT Mat +{ + Mat() + : data(0), refcount(0), rows(0), cols(0), c(0) + { + } + + Mat(int _rows, int _cols, int flags) + : data(0), refcount(0) + { + create(_rows, _cols, flags); + } + + // copy + Mat(const Mat& m) + : data(m.data), refcount(m.refcount) + { + if (refcount) + NCNN_XADD(refcount, 1); + + rows = m.rows; + cols = m.cols; + c = m.c; + } + + Mat(int _rows, int _cols, int flags, void* _data) + : data((unsigned char*)_data), refcount(0) + { + rows = _rows; + cols = _cols; + c = flags; + } + + ~Mat() + { + release(); + } + + // assign + Mat& operator=(const Mat& m) + { + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + + rows = m.rows; + cols = m.cols; + c = m.c; + + return *this; + } + + Mat& operator=(const Scalar& s) + { + if (total() > 0) + { + uchar* p = data; + for (int i = 0; i < cols * rows; i++) + { + for (int j = 0; j < c; j++) + { + *p++ = s[j]; + } + } + } + + return *this; + } + + void create(int _rows, int _cols, int flags) + { + release(); + + rows = _rows; + cols = _cols; + c = flags; + + if (total() > 0) + { + // refcount address must be aligned, so we expand totalsize here + size_t totalsize = (total() + 3) >> 2 << 2; + data = (uchar*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((uchar*)data) + totalsize); + *refcount = 1; + } + } + + void release() + { + if (refcount && NCNN_XADD(refcount, -1) == 1) + ncnn::fastFree(data); + + data = 0; + + rows = 0; + cols = 0; + c = 0; + + refcount = 0; + } + + Mat clone() const + { + if (empty()) + return Mat(); + + Mat m(rows, cols, c); + + if (total() > 0) + { + memcpy(m.data, data, total()); + } + + return m; + } + + bool empty() const + { + return data == 0 || total() == 0; + } + + int channels() const + { + return c; + } + + int type() const + { + return c; + } + + size_t total() const + { + return cols * rows * c; + } + + const uchar* ptr(int y) const + { + return data + y * cols * c; + } + + uchar* ptr(int y) + { + return data + y * cols * c; + } + + template + const _Tp* ptr(int y) const + { + return (const _Tp*)data + y * cols * c; + } + + template + _Tp* ptr(int y) + { + return (_Tp*)data + y * cols * c; + } + + // roi + Mat operator()(const Rect& roi) const + { + if (empty()) + return Mat(); + + Mat m(roi.height, roi.width, c); + + int sy = roi.y; + for (int y = 0; y < roi.height; y++) + { + const uchar* sptr = ptr(sy) + roi.x * c; + uchar* dptr = m.ptr(y); + memcpy(dptr, sptr, roi.width * c); + sy++; + } + + return m; + } + + uchar* data; + + // pointer to the reference counter; + // when points to user-allocated data, the pointer is NULL + int* refcount; + + int rows; + int cols; + + int c; +}; + +enum ImreadModes +{ + IMREAD_UNCHANGED = -1, + IMREAD_GRAYSCALE = 0, + IMREAD_COLOR = 1 +}; + +NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); + +enum ImwriteFlags +{ + IMWRITE_JPEG_QUALITY = 1 +}; + +NCNN_EXPORT bool imwrite(const std::string& path, const Mat& m, const std::vector& params = std::vector()); + +NCNN_EXPORT void imshow(const std::string& name, const Mat& m); + +NCNN_EXPORT int waitKey(int delay = 0); + +#if NCNN_PIXEL +NCNN_EXPORT void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); +#endif // NCNN_PIXEL + +#if NCNN_PIXEL_DRAWING + +enum +{ + FILLED = -1 +}; + +NCNN_EXPORT void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void rectangle(Mat& img, Rect rec, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void circle(Mat& img, Point center, int radius, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void line(Mat& img, Point p0, Point p1, const Scalar& color, int thickness = 1); + +enum +{ + FONT_HERSHEY_SIMPLEX = 0 +}; + +NCNN_EXPORT void putText(Mat& img, const std::string& text, Point org, int fontFace, double fontScale, Scalar color, int thickness = 1); + +NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontScale, int thickness, int* baseLine); + +#endif // NCNN_PIXEL_DRAWING + +} // namespace cv + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma pop_macro("min") +#pragma pop_macro("max") +#endif + +#endif // NCNN_SIMPLEOCV + +#endif // NCNN_SIMPLEOCV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simpleomp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simpleomp.h new file mode 100644 index 0000000..13e2452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simpleomp.h @@ -0,0 +1,53 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOMP_H +#define NCNN_SIMPLEOMP_H + +#include "platform.h" + +#if NCNN_SIMPLEOMP + +#include + +// This minimal openmp runtime implementation only supports the llvm openmp abi +// and only supports #pragma omp parallel for num_threads(X) + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT int omp_get_max_threads(); + +NCNN_EXPORT void omp_set_num_threads(int num_threads); + +NCNN_EXPORT int omp_get_dynamic(); + +NCNN_EXPORT void omp_set_dynamic(int dynamic); + +NCNN_EXPORT int omp_get_num_threads(); + +NCNN_EXPORT int omp_get_thread_num(); + +NCNN_EXPORT int kmp_get_blocktime(); + +NCNN_EXPORT void kmp_set_blocktime(int blocktime); + +#ifdef __cplusplus +} +#endif + +#endif // NCNN_SIMPLEOMP + +#endif // NCNN_SIMPLEOMP_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simplestl.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simplestl.h new file mode 100644 index 0000000..00ff468 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/simplestl.h @@ -0,0 +1,565 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLESTL_H +#define NCNN_SIMPLESTL_H + +#include +#include +#include + +#if !NCNN_SIMPLESTL + +#include + +#else + +// allocation functions +NCNN_EXPORT void* operator new(size_t size); +NCNN_EXPORT void* operator new[](size_t size); +// placement allocation functions +NCNN_EXPORT void* operator new(size_t size, void* ptr); +NCNN_EXPORT void* operator new[](size_t size, void* ptr); +// deallocation functions +NCNN_EXPORT void operator delete(void* ptr); +NCNN_EXPORT void operator delete[](void* ptr); +// deallocation functions since c++14 +#if __cplusplus >= 201402L +NCNN_EXPORT void operator delete(void* ptr, size_t sz); +NCNN_EXPORT void operator delete[](void* ptr, size_t sz); +#endif +// placement deallocation functions +NCNN_EXPORT void operator delete(void* ptr, void* voidptr2); +NCNN_EXPORT void operator delete[](void* ptr, void* voidptr2); + +#endif + +// minimal stl data structure implementation +namespace std { + +template +const T& max(const T& a, const T& b) +{ + return (a < b) ? b : a; +} + +template +const T& min(const T& a, const T& b) +{ + return (a > b) ? b : a; +} + +template +void swap(T& a, T& b) +{ + T temp(a); + a = b; + b = temp; +} + +template +struct pair +{ + pair() + : first(), second() + { + } + pair(const T1& t1, const T2& t2) + : first(t1), second(t2) + { + } + + T1 first; + T2 second; +}; + +template +bool operator==(const pair& x, const pair& y) +{ + return (x.first == y.first && x.second == y.second); +} +template +bool operator<(const pair& x, const pair& y) +{ + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} +template +bool operator!=(const pair& x, const pair& y) +{ + return !(x == y); +} +template +bool operator>(const pair& x, const pair& y) +{ + return y < x; +} +template +bool operator<=(const pair& x, const pair& y) +{ + return !(y < x); +} +template +bool operator>=(const pair& x, const pair& y) +{ + return !(x < y); +} + +template +pair make_pair(const T1& t1, const T2& t2) +{ + return pair(t1, t2); +} + +template +struct node +{ + node* prev_; + node* next_; + T data_; + + node() + : prev_(0), next_(0), data_() + { + } + node(const T& t) + : prev_(0), next_(0), data_(t) + { + } +}; + +template +struct iter_list +{ + iter_list() + : curr_(0) + { + } + iter_list(node* n) + : curr_(n) + { + } + iter_list(const iter_list& i) + : curr_(i.curr_) + { + } + ~iter_list() + { + } + + iter_list& operator=(const iter_list& i) + { + curr_ = i.curr_; + return *this; + } + + T& operator*() + { + return curr_->data_; + } + T* operator->() + { + return &(curr_->data_); + } + + bool operator==(const iter_list& i) + { + return curr_ == i.curr_; + } + bool operator!=(const iter_list& i) + { + return curr_ != i.curr_; + } + + iter_list& operator++() + { + curr_ = curr_->next_; + return *this; + } + iter_list& operator--() + { + curr_ = curr_->prev_; + return *this; + } + + node* curr_; +}; + +template +struct list +{ + typedef iter_list iterator; + + list() + { + head_ = new node(); + tail_ = head_; + count_ = 0; + } + ~list() + { + clear(); + delete head_; + } + list(const list& l) + { + head_ = new node(); + tail_ = head_; + count_ = 0; + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + } + + list& operator=(const list& l) + { + if (this == &l) + { + return *this; + } + clear(); + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + return *this; + } + + void clear() + { + while (count_ > 0) + { + pop_front(); + } + } + + void pop_front() + { + if (count_ > 0) + { + head_ = head_->next_; + delete head_->prev_; + head_->prev_ = 0; + --count_; + } + } + + size_t size() const + { + return count_; + } + iter_list begin() const + { + return iter_list(head_); + } + iter_list end() const + { + return iter_list(tail_); + } + bool empty() const + { + return count_ == 0; + } + + void push_back(const T& t) + { + if (count_ == 0) + { + head_ = new node(t); + head_->prev_ = 0; + head_->next_ = tail_; + tail_->prev_ = head_; + count_ = 1; + } + else + { + node* temp = new node(t); + temp->prev_ = tail_->prev_; + temp->next_ = tail_; + tail_->prev_->next_ = temp; + tail_->prev_ = temp; + ++count_; + } + } + + iter_list erase(iter_list pos) + { + if (pos != end()) + { + node* temp = pos.curr_; + if (temp == head_) + { + ++pos; + temp->next_->prev_ = 0; + head_ = temp->next_; + } + else + { + --pos; + temp->next_->prev_ = temp->prev_; + temp->prev_->next_ = temp->next_; + ++pos; + } + delete temp; + --count_; + } + return pos; + } + +protected: + node* head_; + node* tail_; + size_t count_; +}; + +template +struct greater +{ + bool operator()(const T& x, const T& y) const + { + return (x > y); + } +}; + +template +struct less +{ + bool operator()(const T& x, const T& y) const + { + return (x < y); + } +}; + +template +void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp) +{ + // [TODO] heap sort should be used here, but we simply use bubble sort now + for (RandomAccessIter i = first; i < middle; ++i) + { + // bubble sort + for (RandomAccessIter j = last - 1; j > first; --j) + { + if (comp(*j, *(j - 1))) + { + swap(*j, *(j - 1)); + } + } + } +} + +template +struct vector +{ + vector() + : data_(0), size_(0), capacity_(0) + { + } + vector(const size_t new_size, const T& value = T()) + : data_(0), size_(0), capacity_(0) + { + resize(new_size, value); + } + ~vector() + { + clear(); + } + vector(const vector& v) + : data_(0), size_(0), capacity_(0) + { + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + } + + vector& operator=(const vector& v) + { + if (this == &v) + { + return *this; + } + resize(0); + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + return *this; + } + + void resize(const size_t new_size, const T& value = T()) + { + try_alloc(new_size); + if (new_size > size_) + { + for (size_t i = size_; i < new_size; i++) + { + new (&data_[i]) T(value); + } + } + else if (new_size < size_) + { + for (size_t i = new_size; i < size_; i++) + { + data_[i].~T(); + } + } + size_ = new_size; + } + + void clear() + { + for (size_t i = 0; i < size_; i++) + { + data_[i].~T(); + } + delete[](char*) data_; + data_ = 0; + size_ = 0; + capacity_ = 0; + } + + T* data() const + { + return data_; + } + size_t size() const + { + return size_; + } + T& operator[](size_t i) const + { + return data_[i]; + } + T* begin() const + { + return &data_[0]; + } + T* end() const + { + return &data_[size_]; + } + bool empty() const + { + return size_ == 0; + } + + void push_back(const T& t) + { + try_alloc(size_ + 1); + new (&data_[size_]) T(t); + size_++; + } + + void insert(T* pos, T* b, T* e) + { + vector* v = 0; + if (b >= begin() && b < end()) + { + //the same vector + v = new vector(*this); + b = v->begin() + (b - begin()); + e = v->begin() + (e - begin()); + } + size_t diff = pos - begin(); + try_alloc(size_ + (e - b)); + pos = begin() + diff; + memmove(pos + (e - b), pos, (end() - pos) * sizeof(T)); + size_t len = e - b; + size_ += len; + for (size_t i = 0; i < len; i++) + { + *pos = *b; + pos++; + b++; + } + delete v; + } + + T* erase(T* pos) + { + pos->~T(); + memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T)); + size_--; + return pos; + } + +protected: + T* data_; + size_t size_; + size_t capacity_; + void try_alloc(size_t new_size) + { + if (new_size * 3 / 2 > capacity_ / 2) + { + capacity_ = new_size * 2; + T* new_data = (T*)new char[capacity_ * sizeof(T)]; + memset(static_cast(new_data), 0, capacity_ * sizeof(T)); + if (data_) + { + memmove(new_data, data_, sizeof(T) * size_); + delete[](char*) data_; + } + data_ = new_data; + } + } +}; + +struct NCNN_EXPORT string : public vector +{ + string() + { + } + string(const char* str) + { + size_t len = strlen(str); + resize(len); + memcpy(data_, str, len); + } + const char* c_str() const + { + return (const char*)data_; + } + bool operator==(const string& str2) const + { + return strcmp(data_, str2.data_) == 0; + } + bool operator==(const char* str2) const + { + return strcmp(data_, str2) == 0; + } + bool operator!=(const char* str2) const + { + return strcmp(data_, str2) != 0; + } + string& operator+=(const string& str1) + { + insert(end(), str1.begin(), str1.end()); + return *this; + } +}; + +inline string operator+(const string& str1, const string& str2) +{ + string str(str1); + str.insert(str.end(), str2.begin(), str2.end()); + return str; +} + +} // namespace std + +#endif // NCNN_SIMPLESTL_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/vulkan_header_fix.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/vulkan_header_fix.h new file mode 100644 index 0000000..103ac3e --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/include/ncnn/vulkan_header_fix.h @@ -0,0 +1,259 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_VULKAN_HEADER_FIX_H +#define NCNN_VULKAN_HEADER_FIX_H + +#include + +// This header contains new structure and function declearation to fix build with old vulkan sdk + +#if VK_HEADER_VERSION < 70 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES (VkStructureType)1000094000 +typedef enum VkSubgroupFeatureFlagBits +{ + VK_SUBGROUP_FEATURE_BASIC_BIT = 0x00000001, + VK_SUBGROUP_FEATURE_VOTE_BIT = 0x00000002, + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT = 0x00000004, + VK_SUBGROUP_FEATURE_BALLOT_BIT = 0x00000008, + VK_SUBGROUP_FEATURE_SHUFFLE_BIT = 0x00000010, + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT = 0x00000020, + VK_SUBGROUP_FEATURE_CLUSTERED_BIT = 0x00000040, + VK_SUBGROUP_FEATURE_QUAD_BIT = 0x00000080, + VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV = 0x00000100, + VK_SUBGROUP_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSubgroupFeatureFlagBits; +typedef VkFlags VkSubgroupFeatureFlags; +typedef struct VkPhysicalDeviceSubgroupProperties +{ + VkStructureType sType; + void* pNext; + uint32_t subgroupSize; + VkShaderStageFlags supportedStages; + VkSubgroupFeatureFlags supportedOperations; + VkBool32 quadOperationsInAllStages; +} VkPhysicalDeviceSubgroupProperties; +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES (VkStructureType)1000168000 +#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT (VkStructureType)1000168001 +typedef struct VkPhysicalDeviceMaintenance3Properties +{ + VkStructureType sType; + void* pNext; + uint32_t maxPerSetDescriptors; + VkDeviceSize maxMemoryAllocationSize; +} VkPhysicalDeviceMaintenance3Properties; +typedef struct VkDescriptorSetLayoutSupport +{ + VkStructureType sType; + void* pNext; + VkBool32 supported; +} VkDescriptorSetLayoutSupport; +typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR; +typedef VkDescriptorSetLayoutSupport VkDescriptorSetLayoutSupportKHR; +typedef void(VKAPI_PTR* PFN_vkGetDescriptorSetLayoutSupportKHR)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport); +#endif // VK_HEADER_VERSION < 70 + +#if VK_HEADER_VERSION < 80 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR (VkStructureType)1000177000 +typedef struct VkPhysicalDevice8BitStorageFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; +} VkPhysicalDevice8BitStorageFeaturesKHR; +#define VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR (VkStructureType)1000109000 +#define VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR (VkStructureType)1000109001 +#define VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR (VkStructureType)1000109002 +#define VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR (VkStructureType)1000109003 +#define VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR (VkStructureType)1000109004 +#define VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR (VkStructureType)1000109005 +#define VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR (VkStructureType)1000109006 +typedef struct VkAttachmentDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription2KHR; +typedef struct VkAttachmentReference2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t attachment; + VkImageLayout layout; + VkImageAspectFlags aspectMask; +} VkAttachmentReference2KHR; +typedef struct VkSubpassDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t viewMask; + uint32_t inputAttachmentCount; + const VkAttachmentReference2KHR* pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference2KHR* pColorAttachments; + const VkAttachmentReference2KHR* pResolveAttachments; + const VkAttachmentReference2KHR* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t* pPreserveAttachments; +} VkSubpassDescription2KHR; +typedef struct VkSubpassDependency2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; +} VkSubpassDependency2KHR; +typedef struct VkRenderPassCreateInfo2KHR +{ + VkStructureType sType; + const void* pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription2KHR* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription2KHR* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency2KHR* pDependencies; + uint32_t correlatedViewMaskCount; + const uint32_t* pCorrelatedViewMasks; +} VkRenderPassCreateInfo2KHR; +typedef struct VkSubpassBeginInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassContents contents; +} VkSubpassBeginInfoKHR; + +typedef struct VkSubpassEndInfoKHR +{ + VkStructureType sType; + const void* pNext; +} VkSubpassEndInfoKHR; +typedef VkResult(VKAPI_PTR* PFN_vkCreateRenderPass2KHR)(VkDevice device, const VkRenderPassCreateInfo2KHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void(VKAPI_PTR* PFN_vkCmdBeginRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfoKHR* pSubpassBeginInfo); +typedef void(VKAPI_PTR* PFN_vkCmdNextSubpass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfoKHR* pSubpassBeginInfo, const VkSubpassEndInfoKHR* pSubpassEndInfo); +typedef void(VKAPI_PTR* PFN_vkCmdEndRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR* pSubpassEndInfo); +#endif // VK_HEADER_VERSION < 80 + +#if VK_HEADER_VERSION < 95 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR (VkStructureType)1000082000 +typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceFloat16Int8FeaturesKHR; +#endif // VK_HEADER_VERSION < 95 + +#if VK_HEADER_VERSION < 97 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT +{ + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; +#endif // VK_HEADER_VERSION < 97 + +#if VK_HEADER_VERSION < 101 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV (VkStructureType)1000249000 +#define VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249002 +typedef enum VkComponentTypeNV +{ + VK_COMPONENT_TYPE_FLOAT16_NV = 0, + VK_COMPONENT_TYPE_FLOAT32_NV = 1, + VK_COMPONENT_TYPE_FLOAT64_NV = 2, + VK_COMPONENT_TYPE_SINT8_NV = 3, + VK_COMPONENT_TYPE_SINT16_NV = 4, + VK_COMPONENT_TYPE_SINT32_NV = 5, + VK_COMPONENT_TYPE_SINT64_NV = 6, + VK_COMPONENT_TYPE_UINT8_NV = 7, + VK_COMPONENT_TYPE_UINT16_NV = 8, + VK_COMPONENT_TYPE_UINT32_NV = 9, + VK_COMPONENT_TYPE_UINT64_NV = 10, + VK_COMPONENT_TYPE_BEGIN_RANGE_NV = VK_COMPONENT_TYPE_FLOAT16_NV, + VK_COMPONENT_TYPE_END_RANGE_NV = VK_COMPONENT_TYPE_UINT64_NV, + VK_COMPONENT_TYPE_RANGE_SIZE_NV = (VK_COMPONENT_TYPE_UINT64_NV - VK_COMPONENT_TYPE_FLOAT16_NV + 1), + VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkComponentTypeNV; +typedef enum VkScopeNV +{ + VK_SCOPE_DEVICE_NV = 1, + VK_SCOPE_WORKGROUP_NV = 2, + VK_SCOPE_SUBGROUP_NV = 3, + VK_SCOPE_QUEUE_FAMILY_NV = 5, + VK_SCOPE_BEGIN_RANGE_NV = VK_SCOPE_DEVICE_NV, + VK_SCOPE_END_RANGE_NV = VK_SCOPE_QUEUE_FAMILY_NV, + VK_SCOPE_RANGE_SIZE_NV = (VK_SCOPE_QUEUE_FAMILY_NV - VK_SCOPE_DEVICE_NV + 1), + VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkScopeNV; +typedef struct VkCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + uint32_t MSize; + uint32_t NSize; + uint32_t KSize; + VkComponentTypeNV AType; + VkComponentTypeNV BType; + VkComponentTypeNV CType; + VkComponentTypeNV DType; + VkScopeNV scope; +} VkCooperativeMatrixPropertiesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV +{ + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrix; + VkBool32 cooperativeMatrixRobustBufferAccess; +} VkPhysicalDeviceCooperativeMatrixFeaturesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + VkShaderStageFlags cooperativeMatrixSupportedStages; +} VkPhysicalDeviceCooperativeMatrixPropertiesNV; +typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); +#endif // VK_HEADER_VERSION < 101 + +#if VK_HEADER_VERSION < 208 +typedef enum VkInstanceCreateFlagBits +{ + VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR = 0x00000001, + VK_INSTANCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkInstanceCreateFlagBits; +#endif // VK_HEADER_VERSION < 208 + +#endif // NCNN_VULKAN_HEADER_FIX_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/OGLCompilerTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/OGLCompilerTargets.cmake new file mode 100644 index 0000000..89bb144 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/OGLCompilerTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OGLCompilerTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OGLCompiler) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OGLCompiler ALIAS glslang::OGLCompiler) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/OSDependentTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/OSDependentTargets.cmake new file mode 100644 index 0000000..c345456 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/OSDependentTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OSDependentTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OSDependent) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OSDependent ALIAS glslang::OSDependent) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/SPIRVTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/SPIRVTargets.cmake new file mode 100644 index 0000000..1c614ab --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/SPIRVTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `SPIRVTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::SPIRV) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(SPIRV ALIAS glslang::SPIRV) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-config-version.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-config-version.cmake new file mode 100644 index 0000000..83b16db --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-config-version.cmake @@ -0,0 +1,70 @@ +# This is a basic version file for the Config-mode of find_package(). +# It is used by write_basic_package_version_file() as input file for configure_file() +# to create a version-file which can be installed along a config.cmake file. +# +# The created file sets PACKAGE_VERSION_EXACT if the current version string and +# the requested version string are exactly the same and it sets +# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, +# but only if the requested major version is the same as the current one. +# The variable CVF_VERSION must be set before calling configure_file(). + + +set(PACKAGE_VERSION "11.12.0") + +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION) + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + + if("11.12.0" MATCHES "^([0-9]+)\\.") + set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}") + if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0) + string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}") + endif() + else() + set(CVF_VERSION_MAJOR "11.12.0") + endif() + + if(PACKAGE_FIND_VERSION_RANGE) + # both endpoints of the range must have the expected major version + math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1") + if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT))) + set(PACKAGE_VERSION_COMPATIBLE FALSE) + elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX))) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + else() + if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + + if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + endif() + endif() +endif() + + +# if the installed project requested no architecture check, don't perform the check +if("FALSE") + return() +endif() + +# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: +if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "4" STREQUAL "") + return() +endif() + +# check that the installed version has the same 32/64bit-ness as the one which is currently searching: +if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "4") + math(EXPR installedBits "4 * 8") + set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") + set(PACKAGE_VERSION_UNSUITABLE TRUE) +endif() diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-config.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-config.cmake new file mode 100644 index 0000000..5ebe599 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-config.cmake @@ -0,0 +1,27 @@ + +####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() ####### +####### Any changes to this file will be overwritten by the next CMake run #### +####### The input file was glslang-config.cmake.in ######## + +get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) + +macro(set_and_check _var _file) + set(${_var} "${_file}") + if(NOT EXISTS "${_file}") + message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") + endif() +endmacro() + +macro(check_required_components _NAME) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(NOT ${_NAME}_${comp}_FOUND) + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_NAME}_FOUND FALSE) + endif() + endif() + endforeach() +endmacro() + +#################################################################################### + include("${PACKAGE_PREFIX_DIR}/lib/cmake/glslang/glslang-targets.cmake") + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-targets-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-targets-release.cmake new file mode 100644 index 0000000..e4de522 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-targets-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "glslang::OSDependent" for configuration "Release" +set_property(TARGET glslang::OSDependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OSDependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOSDependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OSDependent ) +list(APPEND _cmake_import_check_files_for_glslang::OSDependent "${_IMPORT_PREFIX}/lib/libOSDependent.a" ) + +# Import target "glslang::glslang" for configuration "Release" +set_property(TARGET glslang::glslang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::glslang PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libglslang.a" + ) + +list(APPEND _cmake_import_check_targets glslang::glslang ) +list(APPEND _cmake_import_check_files_for_glslang::glslang "${_IMPORT_PREFIX}/lib/libglslang.a" ) + +# Import target "glslang::MachineIndependent" for configuration "Release" +set_property(TARGET glslang::MachineIndependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::MachineIndependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::MachineIndependent ) +list(APPEND _cmake_import_check_files_for_glslang::MachineIndependent "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" ) + +# Import target "glslang::GenericCodeGen" for configuration "Release" +set_property(TARGET glslang::GenericCodeGen APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::GenericCodeGen PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" + ) + +list(APPEND _cmake_import_check_targets glslang::GenericCodeGen ) +list(APPEND _cmake_import_check_files_for_glslang::GenericCodeGen "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" ) + +# Import target "glslang::OGLCompiler" for configuration "Release" +set_property(TARGET glslang::OGLCompiler APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OGLCompiler PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OGLCompiler ) +list(APPEND _cmake_import_check_files_for_glslang::OGLCompiler "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" ) + +# Import target "glslang::SPIRV" for configuration "Release" +set_property(TARGET glslang::SPIRV APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::SPIRV PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libSPIRV.a" + ) + +list(APPEND _cmake_import_check_targets glslang::SPIRV ) +list(APPEND _cmake_import_check_files_for_glslang::SPIRV "${_IMPORT_PREFIX}/lib/libSPIRV.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-targets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-targets.cmake new file mode 100644 index 0000000..2173b87 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslang/glslang-targets.cmake @@ -0,0 +1,135 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS glslang::OSDependent glslang::glslang glslang::MachineIndependent glslang::GenericCodeGen glslang::OGLCompiler glslang::SPIRV) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target glslang::OSDependent +add_library(glslang::OSDependent STATIC IMPORTED) + +set_target_properties(glslang::OSDependent PROPERTIES + INTERFACE_LINK_LIBRARIES "-pthread" +) + +# Create imported target glslang::glslang +add_library(glslang::glslang STATIC IMPORTED) + +set_target_properties(glslang::glslang PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::MachineIndependent +add_library(glslang::MachineIndependent STATIC IMPORTED) + +set_target_properties(glslang::MachineIndependent PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::GenericCodeGen +add_library(glslang::GenericCodeGen STATIC IMPORTED) + +# Create imported target glslang::OGLCompiler +add_library(glslang::OGLCompiler STATIC IMPORTED) + +# Create imported target glslang::SPIRV +add_library(glslang::SPIRV STATIC IMPORTED) + +set_target_properties(glslang::SPIRV PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/glslang-targets-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslangTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslangTargets.cmake new file mode 100644 index 0000000..6f4e3bc --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/glslangTargets.cmake @@ -0,0 +1,15 @@ + + message(WARNING "Using `glslangTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::glslang) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + if(OFF) + add_library(glslang ALIAS glslang::glslang) + else() + add_library(glslang ALIAS glslang::glslang) + add_library(MachineIndependent ALIAS glslang::MachineIndependent) + add_library(GenericCodeGen ALIAS glslang::GenericCodeGen) + endif() + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnn-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnn-release.cmake new file mode 100644 index 0000000..37f24ba --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnn-release.cmake @@ -0,0 +1,19 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "ncnn" for configuration "Release" +set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(ncnn PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a" + ) + +list(APPEND _cmake_import_check_targets ncnn ) +list(APPEND _cmake_import_check_files_for_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnn.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnn.cmake new file mode 100644 index 0000000..f224c1b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnn.cmake @@ -0,0 +1,125 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS ncnn) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target ncnn +add_library(ncnn STATIC IMPORTED) + +set_target_properties(ncnn PROPERTIES + INTERFACE_COMPILE_OPTIONS "-fno-rtti;-fno-exceptions" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" + INTERFACE_LINK_LIBRARIES "-fopenmp;-static-openmp;-Wl,-wrap,__kmp_affinity_determine_capable;Threads::Threads;Vulkan::Vulkan;\$;\$;android;jnigraphics;log" + INTERFACE_POSITION_INDEPENDENT_CODE "ON" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/ncnn-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# Make sure the targets which have been exported in some other +# export set exist. +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) +foreach(_target "glslang::glslang" "glslang::SPIRV" ) + if(NOT TARGET "${_target}" ) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets "${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets} ${_target}") + endif() +endforeach() + +if(DEFINED ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + if(CMAKE_FIND_PACKAGE_NAME) + set( ${CMAKE_FIND_PACKAGE_NAME}_FOUND FALSE) + set( ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + else() + message(FATAL_ERROR "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + endif() +endif() +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnnConfig.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnnConfig.cmake new file mode 100644 index 0000000..c1e5613 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/cmake/ncnn/ncnnConfig.cmake @@ -0,0 +1,42 @@ +set(NCNN_OPENMP ON) +set(NCNN_THREADS ON) +set(NCNN_VULKAN ON) +set(NCNN_SHARED_LIB OFF) +set(NCNN_SYSTEM_GLSLANG OFF) + +if(NCNN_OPENMP) + find_package(OpenMP) +endif() + +if(NCNN_THREADS) + set(CMAKE_THREAD_PREFER_PTHREAD TRUE) + set(THREADS_PREFER_PTHREAD_FLAG TRUE) + find_package(Threads REQUIRED) +endif() + +if(NCNN_VULKAN) + find_package(Vulkan REQUIRED) + + if(NOT NCNN_SHARED_LIB) + if(NCNN_SYSTEM_GLSLANG) + find_package(glslang QUIET) + if(NOT glslang_FOUND) + set(GLSLANG_TARGET_DIR "") + include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake) + include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake) + if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + # hlsl support can be optional + include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + endif() + include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake) + include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake) + endif() + else() + set(glslang_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/glslang") + find_package(glslang QUIET) + endif() + + endif() +endif() + +include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libGenericCodeGen.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libGenericCodeGen.a new file mode 100644 index 0000000..79782f1 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libGenericCodeGen.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libMachineIndependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libMachineIndependent.a new file mode 100644 index 0000000..adb562d Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libMachineIndependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libOGLCompiler.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libOGLCompiler.a new file mode 100644 index 0000000..6e67689 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libOGLCompiler.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libOSDependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libOSDependent.a new file mode 100644 index 0000000..f464e5c Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libOSDependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libSPIRV.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libSPIRV.a new file mode 100644 index 0000000..ddc4390 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libSPIRV.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libglslang.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libglslang.a new file mode 100644 index 0000000..bf65f66 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libglslang.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libncnn.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libncnn.a new file mode 100644 index 0000000..63b7764 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/libncnn.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/pkgconfig/ncnn.pc b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/pkgconfig/ncnn.pc new file mode 100644 index 0000000..e683e4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86/lib/pkgconfig/ncnn.pc @@ -0,0 +1,11 @@ +prefix=${pcfiledir}/../.. +librarydir=${prefix}/lib +includedir=${prefix}/include + +Name: ncnn +Description: high-performance neural network inference framework optimized for the mobile platform +Version: 1.0.20230223 +URL: https://github.com/Tencent/ncnn +Libs: -L"${librarydir}" -lncnn +Cflags: -I"${includedir}" + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/BaseTypes.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/BaseTypes.h new file mode 100644 index 0000000..156d98b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/BaseTypes.h @@ -0,0 +1,609 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _BASICTYPES_INCLUDED_ +#define _BASICTYPES_INCLUDED_ + +namespace glslang { + +// +// Basic type. Arrays, vectors, sampler details, etc., are orthogonal to this. +// +enum TBasicType { + EbtVoid, + EbtFloat, + EbtDouble, + EbtFloat16, + EbtInt8, + EbtUint8, + EbtInt16, + EbtUint16, + EbtInt, + EbtUint, + EbtInt64, + EbtUint64, + EbtBool, + EbtAtomicUint, + EbtSampler, + EbtStruct, + EbtBlock, + EbtAccStruct, + EbtReference, + EbtRayQuery, +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type + EbtSpirvType, +#endif + + // HLSL types that live only temporarily. + EbtString, + + EbtNumTypes +}; + +// +// Storage qualifiers. Should align with different kinds of storage or +// resource or GLSL storage qualifier. Expansion is deprecated. +// +// N.B.: You probably DON'T want to add anything here, but rather just add it +// to the built-in variables. See the comment above TBuiltInVariable. +// +// A new built-in variable will normally be an existing qualifier, like 'in', 'out', etc. +// DO NOT follow the design pattern of, say EvqInstanceId, etc. +// +enum TStorageQualifier { + EvqTemporary, // For temporaries (within a function), read/write + EvqGlobal, // For globals read/write + EvqConst, // User-defined constant values, will be semantically constant and constant folded + EvqVaryingIn, // pipeline input, read only, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqVaryingOut, // pipeline output, read/write, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) + EvqUniform, // read only, shared with app + EvqBuffer, // read/write, shared with app + EvqShared, // compute shader's read/write 'shared' qualifier +#ifndef GLSLANG_WEB + EvqSpirvStorageClass, // spirv_storage_class +#endif + + EvqPayload, + EvqPayloadIn, + EvqHitAttr, + EvqCallableData, + EvqCallableDataIn, + + EvqtaskPayloadSharedEXT, + + // parameters + EvqIn, // also, for 'in' in the grammar before we know if it's a pipeline input or an 'in' parameter + EvqOut, // also, for 'out' in the grammar before we know if it's a pipeline output or an 'out' parameter + EvqInOut, + EvqConstReadOnly, // input; also other read-only types having neither a constant value nor constant-value semantics + + // built-ins read by vertex shader + EvqVertexId, + EvqInstanceId, + + // built-ins written by vertex shader + EvqPosition, + EvqPointSize, + EvqClipVertex, + + // built-ins read by fragment shader + EvqFace, + EvqFragCoord, + EvqPointCoord, + + // built-ins written by fragment shader + EvqFragColor, + EvqFragDepth, + EvqFragStencil, + + // end of list + EvqLast +}; + +// +// Subcategories of the TStorageQualifier, simply to give a direct mapping +// between built-in variable names and an numerical value (the enum). +// +// For backward compatibility, there is some redundancy between the +// TStorageQualifier and these. Existing members should both be maintained accurately. +// However, any new built-in variable (and any existing non-redundant one) +// must follow the pattern that the specific built-in is here, and only its +// general qualifier is in TStorageQualifier. +// +// Something like gl_Position, which is sometimes 'in' and sometimes 'out' +// shows up as two different built-in variables in a single stage, but +// only has a single enum in TBuiltInVariable, so both the +// TStorageQualifier and the TBuitinVariable are needed to distinguish +// between them. +// +enum TBuiltInVariable { + EbvNone, + EbvNumWorkGroups, + EbvWorkGroupSize, + EbvWorkGroupId, + EbvLocalInvocationId, + EbvGlobalInvocationId, + EbvLocalInvocationIndex, + EbvNumSubgroups, + EbvSubgroupID, + EbvSubGroupSize, + EbvSubGroupInvocation, + EbvSubGroupEqMask, + EbvSubGroupGeMask, + EbvSubGroupGtMask, + EbvSubGroupLeMask, + EbvSubGroupLtMask, + EbvSubgroupSize2, + EbvSubgroupInvocation2, + EbvSubgroupEqMask2, + EbvSubgroupGeMask2, + EbvSubgroupGtMask2, + EbvSubgroupLeMask2, + EbvSubgroupLtMask2, + EbvVertexId, + EbvInstanceId, + EbvVertexIndex, + EbvInstanceIndex, + EbvBaseVertex, + EbvBaseInstance, + EbvDrawId, + EbvPosition, + EbvPointSize, + EbvClipVertex, + EbvClipDistance, + EbvCullDistance, + EbvNormal, + EbvVertex, + EbvMultiTexCoord0, + EbvMultiTexCoord1, + EbvMultiTexCoord2, + EbvMultiTexCoord3, + EbvMultiTexCoord4, + EbvMultiTexCoord5, + EbvMultiTexCoord6, + EbvMultiTexCoord7, + EbvFrontColor, + EbvBackColor, + EbvFrontSecondaryColor, + EbvBackSecondaryColor, + EbvTexCoord, + EbvFogFragCoord, + EbvInvocationId, + EbvPrimitiveId, + EbvLayer, + EbvViewportIndex, + EbvPatchVertices, + EbvTessLevelOuter, + EbvTessLevelInner, + EbvBoundingBox, + EbvTessCoord, + EbvColor, + EbvSecondaryColor, + EbvFace, + EbvFragCoord, + EbvPointCoord, + EbvFragColor, + EbvFragData, + EbvFragDepth, + EbvFragStencilRef, + EbvSampleId, + EbvSamplePosition, + EbvSampleMask, + EbvHelperInvocation, + + EbvBaryCoordNoPersp, + EbvBaryCoordNoPerspCentroid, + EbvBaryCoordNoPerspSample, + EbvBaryCoordSmooth, + EbvBaryCoordSmoothCentroid, + EbvBaryCoordSmoothSample, + EbvBaryCoordPullModel, + + EbvViewIndex, + EbvDeviceIndex, + + EbvShadingRateKHR, + EbvPrimitiveShadingRateKHR, + + EbvFragSizeEXT, + EbvFragInvocationCountEXT, + + EbvSecondaryFragDataEXT, + EbvSecondaryFragColorEXT, + + EbvViewportMaskNV, + EbvSecondaryPositionNV, + EbvSecondaryViewportMaskNV, + EbvPositionPerViewNV, + EbvViewportMaskPerViewNV, + EbvFragFullyCoveredNV, + EbvFragmentSizeNV, + EbvInvocationsPerPixelNV, + // ray tracing + EbvLaunchId, + EbvLaunchSize, + EbvInstanceCustomIndex, + EbvGeometryIndex, + EbvWorldRayOrigin, + EbvWorldRayDirection, + EbvObjectRayOrigin, + EbvObjectRayDirection, + EbvRayTmin, + EbvRayTmax, + EbvCullMask, + EbvHitT, + EbvHitKind, + EbvObjectToWorld, + EbvObjectToWorld3x4, + EbvWorldToObject, + EbvWorldToObject3x4, + EbvIncomingRayFlags, + EbvCurrentRayTimeNV, + // barycentrics + EbvBaryCoordNV, + EbvBaryCoordNoPerspNV, + EbvBaryCoordEXT, + EbvBaryCoordNoPerspEXT, + // mesh shaders + EbvTaskCountNV, + EbvPrimitiveCountNV, + EbvPrimitiveIndicesNV, + EbvClipDistancePerViewNV, + EbvCullDistancePerViewNV, + EbvLayerPerViewNV, + EbvMeshViewCountNV, + EbvMeshViewIndicesNV, + //GL_EXT_mesh_shader + EbvPrimitivePointIndicesEXT, + EbvPrimitiveLineIndicesEXT, + EbvPrimitiveTriangleIndicesEXT, + EbvCullPrimitiveEXT, + + // sm builtins + EbvWarpsPerSM, + EbvSMCount, + EbvWarpID, + EbvSMID, + + // HLSL built-ins that live only temporarily, until they get remapped + // to one of the above. + EbvFragDepthGreater, + EbvFragDepthLesser, + EbvGsOutputStream, + EbvOutputPatch, + EbvInputPatch, + + // structbuffer types + EbvAppendConsume, // no need to differentiate append and consume + EbvRWStructuredBuffer, + EbvStructuredBuffer, + EbvByteAddressBuffer, + EbvRWByteAddressBuffer, + + EbvLast +}; + +// In this enum, order matters; users can assume higher precision is a bigger value +// and EpqNone is 0. +enum TPrecisionQualifier { + EpqNone = 0, + EpqLow, + EpqMedium, + EpqHigh +}; + +#ifdef GLSLANG_WEB +__inline const char* GetStorageQualifierString(TStorageQualifier q) { return ""; } +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) { return ""; } +#else +// These will show up in error messages +__inline const char* GetStorageQualifierString(TStorageQualifier q) +{ + switch (q) { + case EvqTemporary: return "temp"; break; + case EvqGlobal: return "global"; break; + case EvqConst: return "const"; break; + case EvqConstReadOnly: return "const (read only)"; break; +#ifndef GLSLANG_WEB + case EvqSpirvStorageClass: return "spirv_storage_class"; break; +#endif + case EvqVaryingIn: return "in"; break; + case EvqVaryingOut: return "out"; break; + case EvqUniform: return "uniform"; break; + case EvqBuffer: return "buffer"; break; + case EvqShared: return "shared"; break; + case EvqIn: return "in"; break; + case EvqOut: return "out"; break; + case EvqInOut: return "inout"; break; + case EvqVertexId: return "gl_VertexId"; break; + case EvqInstanceId: return "gl_InstanceId"; break; + case EvqPosition: return "gl_Position"; break; + case EvqPointSize: return "gl_PointSize"; break; + case EvqClipVertex: return "gl_ClipVertex"; break; + case EvqFace: return "gl_FrontFacing"; break; + case EvqFragCoord: return "gl_FragCoord"; break; + case EvqPointCoord: return "gl_PointCoord"; break; + case EvqFragColor: return "fragColor"; break; + case EvqFragDepth: return "gl_FragDepth"; break; + case EvqFragStencil: return "gl_FragStencilRefARB"; break; + case EvqPayload: return "rayPayloadNV"; break; + case EvqPayloadIn: return "rayPayloadInNV"; break; + case EvqHitAttr: return "hitAttributeNV"; break; + case EvqCallableData: return "callableDataNV"; break; + case EvqCallableDataIn: return "callableDataInNV"; break; + case EvqtaskPayloadSharedEXT: return "taskPayloadSharedEXT"; break; + default: return "unknown qualifier"; + } +} + +__inline const char* GetBuiltInVariableString(TBuiltInVariable v) +{ + switch (v) { + case EbvNone: return ""; + case EbvNumWorkGroups: return "NumWorkGroups"; + case EbvWorkGroupSize: return "WorkGroupSize"; + case EbvWorkGroupId: return "WorkGroupID"; + case EbvLocalInvocationId: return "LocalInvocationID"; + case EbvGlobalInvocationId: return "GlobalInvocationID"; + case EbvLocalInvocationIndex: return "LocalInvocationIndex"; + case EbvNumSubgroups: return "NumSubgroups"; + case EbvSubgroupID: return "SubgroupID"; + case EbvSubGroupSize: return "SubGroupSize"; + case EbvSubGroupInvocation: return "SubGroupInvocation"; + case EbvSubGroupEqMask: return "SubGroupEqMask"; + case EbvSubGroupGeMask: return "SubGroupGeMask"; + case EbvSubGroupGtMask: return "SubGroupGtMask"; + case EbvSubGroupLeMask: return "SubGroupLeMask"; + case EbvSubGroupLtMask: return "SubGroupLtMask"; + case EbvSubgroupSize2: return "SubgroupSize"; + case EbvSubgroupInvocation2: return "SubgroupInvocationID"; + case EbvSubgroupEqMask2: return "SubgroupEqMask"; + case EbvSubgroupGeMask2: return "SubgroupGeMask"; + case EbvSubgroupGtMask2: return "SubgroupGtMask"; + case EbvSubgroupLeMask2: return "SubgroupLeMask"; + case EbvSubgroupLtMask2: return "SubgroupLtMask"; + case EbvVertexId: return "VertexId"; + case EbvInstanceId: return "InstanceId"; + case EbvVertexIndex: return "VertexIndex"; + case EbvInstanceIndex: return "InstanceIndex"; + case EbvBaseVertex: return "BaseVertex"; + case EbvBaseInstance: return "BaseInstance"; + case EbvDrawId: return "DrawId"; + case EbvPosition: return "Position"; + case EbvPointSize: return "PointSize"; + case EbvClipVertex: return "ClipVertex"; + case EbvClipDistance: return "ClipDistance"; + case EbvCullDistance: return "CullDistance"; + case EbvNormal: return "Normal"; + case EbvVertex: return "Vertex"; + case EbvMultiTexCoord0: return "MultiTexCoord0"; + case EbvMultiTexCoord1: return "MultiTexCoord1"; + case EbvMultiTexCoord2: return "MultiTexCoord2"; + case EbvMultiTexCoord3: return "MultiTexCoord3"; + case EbvMultiTexCoord4: return "MultiTexCoord4"; + case EbvMultiTexCoord5: return "MultiTexCoord5"; + case EbvMultiTexCoord6: return "MultiTexCoord6"; + case EbvMultiTexCoord7: return "MultiTexCoord7"; + case EbvFrontColor: return "FrontColor"; + case EbvBackColor: return "BackColor"; + case EbvFrontSecondaryColor: return "FrontSecondaryColor"; + case EbvBackSecondaryColor: return "BackSecondaryColor"; + case EbvTexCoord: return "TexCoord"; + case EbvFogFragCoord: return "FogFragCoord"; + case EbvInvocationId: return "InvocationID"; + case EbvPrimitiveId: return "PrimitiveID"; + case EbvLayer: return "Layer"; + case EbvViewportIndex: return "ViewportIndex"; + case EbvPatchVertices: return "PatchVertices"; + case EbvTessLevelOuter: return "TessLevelOuter"; + case EbvTessLevelInner: return "TessLevelInner"; + case EbvBoundingBox: return "BoundingBox"; + case EbvTessCoord: return "TessCoord"; + case EbvColor: return "Color"; + case EbvSecondaryColor: return "SecondaryColor"; + case EbvFace: return "Face"; + case EbvFragCoord: return "FragCoord"; + case EbvPointCoord: return "PointCoord"; + case EbvFragColor: return "FragColor"; + case EbvFragData: return "FragData"; + case EbvFragDepth: return "FragDepth"; + case EbvFragStencilRef: return "FragStencilRef"; + case EbvSampleId: return "SampleId"; + case EbvSamplePosition: return "SamplePosition"; + case EbvSampleMask: return "SampleMaskIn"; + case EbvHelperInvocation: return "HelperInvocation"; + + case EbvBaryCoordNoPersp: return "BaryCoordNoPersp"; + case EbvBaryCoordNoPerspCentroid: return "BaryCoordNoPerspCentroid"; + case EbvBaryCoordNoPerspSample: return "BaryCoordNoPerspSample"; + case EbvBaryCoordSmooth: return "BaryCoordSmooth"; + case EbvBaryCoordSmoothCentroid: return "BaryCoordSmoothCentroid"; + case EbvBaryCoordSmoothSample: return "BaryCoordSmoothSample"; + case EbvBaryCoordPullModel: return "BaryCoordPullModel"; + + case EbvViewIndex: return "ViewIndex"; + case EbvDeviceIndex: return "DeviceIndex"; + + case EbvFragSizeEXT: return "FragSizeEXT"; + case EbvFragInvocationCountEXT: return "FragInvocationCountEXT"; + + case EbvSecondaryFragDataEXT: return "SecondaryFragDataEXT"; + case EbvSecondaryFragColorEXT: return "SecondaryFragColorEXT"; + + case EbvViewportMaskNV: return "ViewportMaskNV"; + case EbvSecondaryPositionNV: return "SecondaryPositionNV"; + case EbvSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; + case EbvPositionPerViewNV: return "PositionPerViewNV"; + case EbvViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; + case EbvFragFullyCoveredNV: return "FragFullyCoveredNV"; + case EbvFragmentSizeNV: return "FragmentSizeNV"; + case EbvInvocationsPerPixelNV: return "InvocationsPerPixelNV"; + case EbvLaunchId: return "LaunchIdNV"; + case EbvLaunchSize: return "LaunchSizeNV"; + case EbvInstanceCustomIndex: return "InstanceCustomIndexNV"; + case EbvGeometryIndex: return "GeometryIndexEXT"; + case EbvWorldRayOrigin: return "WorldRayOriginNV"; + case EbvWorldRayDirection: return "WorldRayDirectionNV"; + case EbvObjectRayOrigin: return "ObjectRayOriginNV"; + case EbvObjectRayDirection: return "ObjectRayDirectionNV"; + case EbvRayTmin: return "ObjectRayTminNV"; + case EbvRayTmax: return "ObjectRayTmaxNV"; + case EbvHitT: return "HitTNV"; + case EbvHitKind: return "HitKindNV"; + case EbvIncomingRayFlags: return "IncomingRayFlagsNV"; + case EbvObjectToWorld: return "ObjectToWorldNV"; + case EbvWorldToObject: return "WorldToObjectNV"; + case EbvCurrentRayTimeNV: return "CurrentRayTimeNV"; + + case EbvBaryCoordEXT: + case EbvBaryCoordNV: return "BaryCoordKHR"; + case EbvBaryCoordNoPerspEXT: + case EbvBaryCoordNoPerspNV: return "BaryCoordNoPerspKHR"; + + case EbvTaskCountNV: return "TaskCountNV"; + case EbvPrimitiveCountNV: return "PrimitiveCountNV"; + case EbvPrimitiveIndicesNV: return "PrimitiveIndicesNV"; + case EbvClipDistancePerViewNV: return "ClipDistancePerViewNV"; + case EbvCullDistancePerViewNV: return "CullDistancePerViewNV"; + case EbvLayerPerViewNV: return "LayerPerViewNV"; + case EbvMeshViewCountNV: return "MeshViewCountNV"; + case EbvMeshViewIndicesNV: return "MeshViewIndicesNV"; + // GL_EXT_mesh_shader + case EbvPrimitivePointIndicesEXT: return "PrimitivePointIndicesEXT"; + case EbvPrimitiveLineIndicesEXT: return "PrimitiveLineIndicesEXT"; + case EbvPrimitiveTriangleIndicesEXT: return "PrimitiveTriangleIndicesEXT"; + case EbvCullPrimitiveEXT: return "CullPrimitiveEXT"; + + case EbvWarpsPerSM: return "WarpsPerSMNV"; + case EbvSMCount: return "SMCountNV"; + case EbvWarpID: return "WarpIDNV"; + case EbvSMID: return "SMIDNV"; + + case EbvShadingRateKHR: return "ShadingRateKHR"; + case EbvPrimitiveShadingRateKHR: return "PrimitiveShadingRateKHR"; + + default: return "unknown built-in variable"; + } +} + +__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) +{ + switch (p) { + case EpqNone: return ""; break; + case EpqLow: return "lowp"; break; + case EpqMedium: return "mediump"; break; + case EpqHigh: return "highp"; break; + default: return "unknown precision qualifier"; + } +} +#endif + +__inline bool isTypeSignedInt(TBasicType type) +{ + switch (type) { + case EbtInt8: + case EbtInt16: + case EbtInt: + case EbtInt64: + return true; + default: + return false; + } +} + +__inline bool isTypeUnsignedInt(TBasicType type) +{ + switch (type) { + case EbtUint8: + case EbtUint16: + case EbtUint: + case EbtUint64: + return true; + default: + return false; + } +} + +__inline bool isTypeInt(TBasicType type) +{ + return isTypeSignedInt(type) || isTypeUnsignedInt(type); +} + +__inline bool isTypeFloat(TBasicType type) +{ + switch (type) { + case EbtFloat: + case EbtDouble: + case EbtFloat16: + return true; + default: + return false; + } +} + +__inline int getTypeRank(TBasicType type) +{ + int res = -1; + switch(type) { + case EbtInt8: + case EbtUint8: + res = 0; + break; + case EbtInt16: + case EbtUint16: + res = 1; + break; + case EbtInt: + case EbtUint: + res = 2; + break; + case EbtInt64: + case EbtUint64: + res = 3; + break; + default: + assert(false); + break; + } + return res; +} + +} // end namespace glslang + +#endif // _BASICTYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/Common.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/Common.h new file mode 100644 index 0000000..a5b41cb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/Common.h @@ -0,0 +1,340 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _COMMON_INCLUDED_ +#define _COMMON_INCLUDED_ + +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__ANDROID__) || (defined(_MSC_VER) && _MSC_VER < 1700) +#include +namespace std { +template +std::string to_string(const T& val) { + std::ostringstream os; + os << val; + return os.str(); +} +} +#endif + +#if (defined(_MSC_VER) && _MSC_VER < 1900 /*vs2015*/) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) + #include + #ifndef snprintf + #define snprintf sprintf_s + #endif + #define safe_vsprintf(buf,max,format,args) vsnprintf_s((buf), (max), (max), (format), (args)) +#elif defined (solaris) + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#else + #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) + #include + #define UINT_PTR uintptr_t +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1800 + #include + inline long long int strtoll (const char* str, char** endptr, int base) + { + return _strtoi64(str, endptr, base); + } + inline unsigned long long int strtoull (const char* str, char** endptr, int base) + { + return _strtoui64(str, endptr, base); + } + inline long long int atoll (const char* str) + { + return strtoll(str, NULL, 10); + } +#endif + +#if defined(_MSC_VER) +#define strdup _strdup +#endif + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4786) // Don't warn about too long identifiers + #pragma warning(disable : 4514) // unused inline method + #pragma warning(disable : 4201) // nameless union +#endif + +#include "PoolAlloc.h" + +// +// Put POOL_ALLOCATOR_NEW_DELETE in base classes to make them use this scheme. +// +#define POOL_ALLOCATOR_NEW_DELETE(A) \ + void* operator new(size_t s) { return (A).allocate(s); } \ + void* operator new(size_t, void *_Where) { return (_Where); } \ + void operator delete(void*) { } \ + void operator delete(void *, void *) { } \ + void* operator new[](size_t s) { return (A).allocate(s); } \ + void* operator new[](size_t, void *_Where) { return (_Where); } \ + void operator delete[](void*) { } \ + void operator delete[](void *, void *) { } + +namespace glslang { + + // + // Pool version of string. + // + typedef pool_allocator TStringAllocator; + typedef std::basic_string , TStringAllocator> TString; + +} // end namespace glslang + +// Repackage the std::hash for use by unordered map/set with a TString key. +namespace std { + + template<> struct hash { + std::size_t operator()(const glslang::TString& s) const + { + const unsigned _FNV_offset_basis = 2166136261U; + const unsigned _FNV_prime = 16777619U; + unsigned _Val = _FNV_offset_basis; + size_t _Count = s.size(); + const char* _First = s.c_str(); + for (size_t _Next = 0; _Next < _Count; ++_Next) + { + _Val ^= (unsigned)_First[_Next]; + _Val *= _FNV_prime; + } + + return _Val; + } + }; +} + +namespace glslang { + +inline TString* NewPoolTString(const char* s) +{ + void* memory = GetThreadPoolAllocator().allocate(sizeof(TString)); + return new(memory) TString(s); +} + +template inline T* NewPoolObject(T*) +{ + return new(GetThreadPoolAllocator().allocate(sizeof(T))) T; +} + +template inline T* NewPoolObject(T, int instances) +{ + return new(GetThreadPoolAllocator().allocate(instances * sizeof(T))) T[instances]; +} + +// +// Pool allocator versions of vectors, lists, and maps +// +template class TVector : public std::vector > { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + typedef typename std::vector >::size_type size_type; + TVector() : std::vector >() {} + TVector(const pool_allocator& a) : std::vector >(a) {} + TVector(size_type i) : std::vector >(i) {} + TVector(size_type i, const T& val) : std::vector >(i, val) {} +}; + +template class TList : public std::list > { +}; + +template > +class TMap : public std::map > > { +}; + +template , class PRED = std::equal_to > +class TUnorderedMap : public std::unordered_map > > { +}; + +template > +class TSet : public std::set > { +}; + +// +// Persistent string memory. Should only be used for strings that survive +// across compiles/links. +// +typedef std::basic_string TPersistString; + +// +// templatized min and max functions. +// +template T Min(const T a, const T b) { return a < b ? a : b; } +template T Max(const T a, const T b) { return a > b ? a : b; } + +// +// Create a TString object from an integer. +// +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) +inline const TString String(const int i, const int base = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + _itoa_s(i, text, sizeof(text), base); + return text; +} +#else +inline const TString String(const int i, const int /*base*/ = 10) +{ + char text[16]; // 32 bit ints are at most 10 digits in base 10 + + // we assume base 10 for all cases + snprintf(text, sizeof(text), "%d", i); + + return text; +} +#endif + +struct TSourceLoc { + void init() + { + name = nullptr; string = 0; line = 0; column = 0; + } + void init(int stringNum) { init(); string = stringNum; } + // Returns the name if it exists. Otherwise, returns the string number. + std::string getStringNameOrNum(bool quoteStringName = true) const + { + if (name != nullptr) { + TString qstr = quoteStringName ? ("\"" + *name + "\"") : *name; + std::string ret_str(qstr.c_str()); + return ret_str; + } + return std::to_string((long long)string); + } + const char* getFilename() const + { + if (name == nullptr) + return nullptr; + return name->c_str(); + } + const char* getFilenameStr() const { return name == nullptr ? "" : name->c_str(); } + TString* name; // descriptive name for this string, when a textual name is available, otherwise nullptr + int string; + int line; + int column; +}; + +class TPragmaTable : public TMap { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) +}; + +const int MaxTokenLength = 1024; + +template bool IsPow2(T powerOf2) +{ + if (powerOf2 <= 0) + return false; + + return (powerOf2 & (powerOf2 - 1)) == 0; +} + +// Round number up to a multiple of the given powerOf2, which is not +// a power, just a number that must be a power of 2. +template void RoundToPow2(T& number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + number = (number + powerOf2 - 1) & ~(powerOf2 - 1); +} + +template bool IsMultipleOfPow2(T number, int powerOf2) +{ + assert(IsPow2(powerOf2)); + return ! (number & (powerOf2 - 1)); +} + +// Returns log2 of an integer power of 2. +// T should be integral. +template int IntLog2(T n) +{ + assert(IsPow2(n)); + int result = 0; + while ((T(1) << result) != n) { + result++; + } + return result; +} + +inline bool IsInfinity(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_NINF: + case _FPCLASS_PINF: + return true; + default: + return false; + } +#else + return std::isinf(x); +#endif +} + +inline bool IsNan(double x) { +#ifdef _MSC_VER + switch (_fpclass(x)) { + case _FPCLASS_SNAN: + case _FPCLASS_QNAN: + return true; + default: + return false; + } +#else + return std::isnan(x); +#endif +} + +} // end namespace glslang + +#endif // _COMMON_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ConstantUnion.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ConstantUnion.h new file mode 100644 index 0000000..c4ffb85 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ConstantUnion.h @@ -0,0 +1,974 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _CONSTANT_UNION_INCLUDED_ +#define _CONSTANT_UNION_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" + +namespace glslang { + +class TConstUnion { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnion() : iConst(0), type(EbtInt) { } + + void setI8Const(signed char i) + { + i8Const = i; + type = EbtInt8; + } + + void setU8Const(unsigned char u) + { + u8Const = u; + type = EbtUint8; + } + + void setI16Const(signed short i) + { + i16Const = i; + type = EbtInt16; + } + + void setU16Const(unsigned short u) + { + u16Const = u; + type = EbtUint16; + } + + void setIConst(int i) + { + iConst = i; + type = EbtInt; + } + + void setUConst(unsigned int u) + { + uConst = u; + type = EbtUint; + } + + void setI64Const(long long i64) + { + i64Const = i64; + type = EbtInt64; + } + + void setU64Const(unsigned long long u64) + { + u64Const = u64; + type = EbtUint64; + } + + void setDConst(double d) + { + dConst = d; + type = EbtDouble; + } + + void setBConst(bool b) + { + bConst = b; + type = EbtBool; + } + + void setSConst(const TString* s) + { + sConst = s; + type = EbtString; + } + + signed char getI8Const() const { return i8Const; } + unsigned char getU8Const() const { return u8Const; } + signed short getI16Const() const { return i16Const; } + unsigned short getU16Const() const { return u16Const; } + int getIConst() const { return iConst; } + unsigned int getUConst() const { return uConst; } + long long getI64Const() const { return i64Const; } + unsigned long long getU64Const() const { return u64Const; } + double getDConst() const { return dConst; } + bool getBConst() const { return bConst; } + const TString* getSConst() const { return sConst; } + + bool operator==(const signed char i) const + { + if (i == i8Const) + return true; + + return false; + } + + bool operator==(const unsigned char u) const + { + if (u == u8Const) + return true; + + return false; + } + + bool operator==(const signed short i) const + { + if (i == i16Const) + return true; + + return false; + } + + bool operator==(const unsigned short u) const + { + if (u == u16Const) + return true; + + return false; + } + + bool operator==(const int i) const + { + if (i == iConst) + return true; + + return false; + } + + bool operator==(const unsigned int u) const + { + if (u == uConst) + return true; + + return false; + } + + bool operator==(const long long i64) const + { + if (i64 == i64Const) + return true; + + return false; + } + + bool operator==(const unsigned long long u64) const + { + if (u64 == u64Const) + return true; + + return false; + } + + bool operator==(const double d) const + { + if (d == dConst) + return true; + + return false; + } + + bool operator==(const bool b) const + { + if (b == bConst) + return true; + + return false; + } + + bool operator==(const TConstUnion& constant) const + { + if (constant.type != type) + return false; + + switch (type) { + case EbtInt: + if (constant.iConst == iConst) + return true; + + break; + case EbtUint: + if (constant.uConst == uConst) + return true; + + break; + case EbtBool: + if (constant.bConst == bConst) + return true; + + break; + case EbtDouble: + if (constant.dConst == dConst) + return true; + + break; + +#ifndef GLSLANG_WEB + case EbtInt16: + if (constant.i16Const == i16Const) + return true; + + break; + case EbtUint16: + if (constant.u16Const == u16Const) + return true; + + break; + case EbtInt8: + if (constant.i8Const == i8Const) + return true; + + break; + case EbtUint8: + if (constant.u8Const == u8Const) + return true; + + break; + case EbtInt64: + if (constant.i64Const == i64Const) + return true; + + break; + case EbtUint64: + if (constant.u64Const == u64Const) + return true; + + break; +#endif + default: + assert(false && "Default missing"); + } + + return false; + } + + bool operator!=(const signed char i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned char u) const + { + return !operator==(u); + } + + bool operator!=(const signed short i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned short u) const + { + return !operator==(u); + } + + bool operator!=(const int i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned int u) const + { + return !operator==(u); + } + + bool operator!=(const long long i) const + { + return !operator==(i); + } + + bool operator!=(const unsigned long long u) const + { + return !operator==(u); + } + + bool operator!=(const float f) const + { + return !operator==(f); + } + + bool operator!=(const bool b) const + { + return !operator==(b); + } + + bool operator!=(const TConstUnion& constant) const + { + return !operator==(constant); + } + + bool operator>(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { + case EbtInt: + if (iConst > constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst > constant.uConst) + return true; + + return false; + case EbtDouble: + if (dConst > constant.dConst) + return true; + + return false; +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const > constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const > constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const > constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const > constant.u16Const) + return true; + + return false; + case EbtInt64: + if (i64Const > constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const > constant.u64Const) + return true; + + return false; +#endif + default: + assert(false && "Default missing"); + return false; + } + } + + bool operator<(const TConstUnion& constant) const + { + assert(type == constant.type); + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + if (i8Const < constant.i8Const) + return true; + + return false; + case EbtUint8: + if (u8Const < constant.u8Const) + return true; + + return false; + case EbtInt16: + if (i16Const < constant.i16Const) + return true; + + return false; + case EbtUint16: + if (u16Const < constant.u16Const) + return true; + return false; + case EbtInt64: + if (i64Const < constant.i64Const) + return true; + + return false; + case EbtUint64: + if (u64Const < constant.u64Const) + return true; + + return false; +#endif + case EbtDouble: + if (dConst < constant.dConst) + return true; + + return false; + case EbtInt: + if (iConst < constant.iConst) + return true; + + return false; + case EbtUint: + if (uConst < constant.uConst) + return true; + + return false; + default: + assert(false && "Default missing"); + return false; + } + } + + TConstUnion operator+(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst + constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst + constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst + constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const + constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const + constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const + constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const + constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const + constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const + constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator-(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst - constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst - constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst - constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const - constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const - constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const - constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const - constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const - constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const - constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator*(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst * constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst * constant.uConst); break; + case EbtDouble: returnValue.setDConst(dConst * constant.dConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const * constant.i8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const * constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const * constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const * constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const * constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const * constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator%(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst % constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst % constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const % constant.i8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const % constant.i16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const % constant.i64Const); break; + case EbtUint8: returnValue.setU8Const(u8Const % constant.u8Const); break; + case EbtUint16: returnValue.setU16Const(u16Const % constant.u16Const); break; + case EbtUint64: returnValue.setU64Const(u64Const % constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator>>(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const >> constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const >> constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const >> constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const >> constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const >> constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const >> constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const >> constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const >> constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const >> constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const >> constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const >> constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const >> constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst >> constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst >> constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst >> constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst >> constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst >> constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst >> constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst >> constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst >> constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst >> constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst >> constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst >> constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst >> constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst >> constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst >> constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; +#ifndef GLSLANG_WEB + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const >> constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const >> constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const >> constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const >> constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const >> constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const >> constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const >> constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const >> constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const >> constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const >> constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const >> constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator<<(const TConstUnion& constant) const + { + TConstUnion returnValue; + switch (type) { +#ifndef GLSLANG_WEB + case EbtInt8: + switch (constant.type) { + case EbtInt8: returnValue.setI8Const(i8Const << constant.i8Const); break; + case EbtUint8: returnValue.setI8Const(i8Const << constant.u8Const); break; + case EbtInt16: returnValue.setI8Const(i8Const << constant.i16Const); break; + case EbtUint16: returnValue.setI8Const(i8Const << constant.u16Const); break; + case EbtInt: returnValue.setI8Const(i8Const << constant.iConst); break; + case EbtUint: returnValue.setI8Const(i8Const << constant.uConst); break; + case EbtInt64: returnValue.setI8Const(i8Const << constant.i64Const); break; + case EbtUint64: returnValue.setI8Const(i8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint8: + switch (constant.type) { + case EbtInt8: returnValue.setU8Const(u8Const << constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const << constant.u8Const); break; + case EbtInt16: returnValue.setU8Const(u8Const << constant.i16Const); break; + case EbtUint16: returnValue.setU8Const(u8Const << constant.u16Const); break; + case EbtInt: returnValue.setU8Const(u8Const << constant.iConst); break; + case EbtUint: returnValue.setU8Const(u8Const << constant.uConst); break; + case EbtInt64: returnValue.setU8Const(u8Const << constant.i64Const); break; + case EbtUint64: returnValue.setU8Const(u8Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt16: + switch (constant.type) { + case EbtInt8: returnValue.setI16Const(i16Const << constant.i8Const); break; + case EbtUint8: returnValue.setI16Const(i16Const << constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const << constant.i16Const); break; + case EbtUint16: returnValue.setI16Const(i16Const << constant.u16Const); break; + case EbtInt: returnValue.setI16Const(i16Const << constant.iConst); break; + case EbtUint: returnValue.setI16Const(i16Const << constant.uConst); break; + case EbtInt64: returnValue.setI16Const(i16Const << constant.i64Const); break; + case EbtUint64: returnValue.setI16Const(i16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint16: + switch (constant.type) { + case EbtInt8: returnValue.setU16Const(u16Const << constant.i8Const); break; + case EbtUint8: returnValue.setU16Const(u16Const << constant.u8Const); break; + case EbtInt16: returnValue.setU16Const(u16Const << constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const << constant.u16Const); break; + case EbtInt: returnValue.setU16Const(u16Const << constant.iConst); break; + case EbtUint: returnValue.setU16Const(u16Const << constant.uConst); break; + case EbtInt64: returnValue.setU16Const(u16Const << constant.i64Const); break; + case EbtUint64: returnValue.setU16Const(u16Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtInt64: + switch (constant.type) { + case EbtInt8: returnValue.setI64Const(i64Const << constant.i8Const); break; + case EbtUint8: returnValue.setI64Const(i64Const << constant.u8Const); break; + case EbtInt16: returnValue.setI64Const(i64Const << constant.i16Const); break; + case EbtUint16: returnValue.setI64Const(i64Const << constant.u16Const); break; + case EbtInt: returnValue.setI64Const(i64Const << constant.iConst); break; + case EbtUint: returnValue.setI64Const(i64Const << constant.uConst); break; + case EbtInt64: returnValue.setI64Const(i64Const << constant.i64Const); break; + case EbtUint64: returnValue.setI64Const(i64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; + case EbtUint64: + switch (constant.type) { + case EbtInt8: returnValue.setU64Const(u64Const << constant.i8Const); break; + case EbtUint8: returnValue.setU64Const(u64Const << constant.u8Const); break; + case EbtInt16: returnValue.setU64Const(u64Const << constant.i16Const); break; + case EbtUint16: returnValue.setU64Const(u64Const << constant.u16Const); break; + case EbtInt: returnValue.setU64Const(u64Const << constant.iConst); break; + case EbtUint: returnValue.setU64Const(u64Const << constant.uConst); break; + case EbtInt64: returnValue.setU64Const(u64Const << constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const << constant.u64Const); break; + default: assert(false && "Default missing"); + } + break; +#endif + case EbtInt: + switch (constant.type) { + case EbtInt: returnValue.setIConst(iConst << constant.iConst); break; + case EbtUint: returnValue.setIConst(iConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setIConst(iConst << constant.i8Const); break; + case EbtUint8: returnValue.setIConst(iConst << constant.u8Const); break; + case EbtInt16: returnValue.setIConst(iConst << constant.i16Const); break; + case EbtUint16: returnValue.setIConst(iConst << constant.u16Const); break; + case EbtInt64: returnValue.setIConst(iConst << constant.i64Const); break; + case EbtUint64: returnValue.setIConst(iConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + case EbtUint: + switch (constant.type) { + case EbtInt: returnValue.setUConst(uConst << constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst << constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setUConst(uConst << constant.i8Const); break; + case EbtUint8: returnValue.setUConst(uConst << constant.u8Const); break; + case EbtInt16: returnValue.setUConst(uConst << constant.i16Const); break; + case EbtUint16: returnValue.setUConst(uConst << constant.u16Const); break; + case EbtInt64: returnValue.setUConst(uConst << constant.i64Const); break; + case EbtUint64: returnValue.setUConst(uConst << constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst & constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst & constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const & constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const & constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const & constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const & constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const & constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const & constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator|(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst | constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst | constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const | constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const | constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const | constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const | constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const | constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const | constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator^(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtInt: returnValue.setIConst(iConst ^ constant.iConst); break; + case EbtUint: returnValue.setUConst(uConst ^ constant.uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(i8Const ^ constant.i8Const); break; + case EbtUint8: returnValue.setU8Const(u8Const ^ constant.u8Const); break; + case EbtInt16: returnValue.setI16Const(i16Const ^ constant.i16Const); break; + case EbtUint16: returnValue.setU16Const(u16Const ^ constant.u16Const); break; + case EbtInt64: returnValue.setI64Const(i64Const ^ constant.i64Const); break; + case EbtUint64: returnValue.setU64Const(u64Const ^ constant.u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator~() const + { + TConstUnion returnValue; + switch (type) { + case EbtInt: returnValue.setIConst(~iConst); break; + case EbtUint: returnValue.setUConst(~uConst); break; +#ifndef GLSLANG_WEB + case EbtInt8: returnValue.setI8Const(~i8Const); break; + case EbtUint8: returnValue.setU8Const(~u8Const); break; + case EbtInt16: returnValue.setI16Const(~i16Const); break; + case EbtUint16: returnValue.setU16Const(~u16Const); break; + case EbtInt64: returnValue.setI64Const(~i64Const); break; + case EbtUint64: returnValue.setU64Const(~u64Const); break; +#endif + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator&&(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst && constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TConstUnion operator||(const TConstUnion& constant) const + { + TConstUnion returnValue; + assert(type == constant.type); + switch (type) { + case EbtBool: returnValue.setBConst(bConst || constant.bConst); break; + default: assert(false && "Default missing"); + } + + return returnValue; + } + + TBasicType getType() const { return type; } + +private: + union { + signed char i8Const; // used for i8vec, scalar int8s + unsigned char u8Const; // used for u8vec, scalar uint8s + signed short i16Const; // used for i16vec, scalar int16s + unsigned short u16Const; // used for u16vec, scalar uint16s + int iConst; // used for ivec, scalar ints + unsigned int uConst; // used for uvec, scalar uints + long long i64Const; // used for i64vec, scalar int64s + unsigned long long u64Const; // used for u64vec, scalar uint64s + bool bConst; // used for bvec, scalar bools + double dConst; // used for vec, dvec, mat, dmat, scalar floats and doubles + const TString* sConst; // string constant + }; + + TBasicType type; +}; + +// Encapsulate having a pointer to an array of TConstUnion, +// which only needs to be allocated if its size is going to be +// bigger than 0. +// +// One convenience is being able to use [] to go inside the array, instead +// of C++ assuming it as an array of pointers to vectors. +// +// General usage is that the size is known up front, and it is +// created once with the proper size. +// +class TConstUnionArray { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TConstUnionArray() : unionArray(nullptr) { } + virtual ~TConstUnionArray() { } + + explicit TConstUnionArray(int size) + { + if (size == 0) + unionArray = nullptr; + else + unionArray = new TConstUnionVector(size); + } + TConstUnionArray(const TConstUnionArray& a) = default; + TConstUnionArray(const TConstUnionArray& a, int start, int size) + { + unionArray = new TConstUnionVector(size); + for (int i = 0; i < size; ++i) + (*unionArray)[i] = a[start + i]; + } + + // Use this constructor for a smear operation + TConstUnionArray(int size, const TConstUnion& val) + { + unionArray = new TConstUnionVector(size, val); + } + + int size() const { return unionArray ? (int)unionArray->size() : 0; } + TConstUnion& operator[](size_t index) { return (*unionArray)[index]; } + const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; } + bool operator==(const TConstUnionArray& rhs) const + { + // this includes the case that both are unallocated + if (unionArray == rhs.unionArray) + return true; + + if (! unionArray || ! rhs.unionArray) + return false; + + return *unionArray == *rhs.unionArray; + } + bool operator!=(const TConstUnionArray& rhs) const { return ! operator==(rhs); } + + double dot(const TConstUnionArray& rhs) + { + assert(rhs.unionArray->size() == unionArray->size()); + double sum = 0.0; + + for (size_t comp = 0; comp < unionArray->size(); ++comp) + sum += (*this)[comp].getDConst() * rhs[comp].getDConst(); + + return sum; + } + + bool empty() const { return unionArray == nullptr; } + +protected: + typedef TVector TConstUnionVector; + TConstUnionVector* unionArray; +}; + +} // end namespace glslang + +#endif // _CONSTANT_UNION_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/InfoSink.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/InfoSink.h new file mode 100644 index 0000000..dceb603 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/InfoSink.h @@ -0,0 +1,144 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INFOSINK_INCLUDED_ +#define _INFOSINK_INCLUDED_ + +#include "../Include/Common.h" +#include + +namespace glslang { + +// +// TPrefixType is used to centralize how info log messages start. +// See below. +// +enum TPrefixType { + EPrefixNone, + EPrefixWarning, + EPrefixError, + EPrefixInternalError, + EPrefixUnimplemented, + EPrefixNote +}; + +enum TOutputStream { + ENull = 0, + EDebugger = 0x01, + EStdOut = 0x02, + EString = 0x04, +}; +// +// Encapsulate info logs for all objects that have them. +// +// The methods are a general set of tools for getting a variety of +// messages and types inserted into the log. +// +class TInfoSinkBase { +public: + TInfoSinkBase() : outputStream(4) {} + void erase() { sink.erase(); } + TInfoSinkBase& operator<<(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(char c) { append(1, c); return *this; } + TInfoSinkBase& operator<<(const char* s) { append(s); return *this; } + TInfoSinkBase& operator<<(int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(unsigned int n) { append(String(n)); return *this; } + TInfoSinkBase& operator<<(float n) { const int size = 40; char buf[size]; + snprintf(buf, size, (fabs(n) > 1e-8 && fabs(n) < 1e8) || n == 0.0f ? "%f" : "%g", n); + append(buf); + return *this; } + TInfoSinkBase& operator+(const TPersistString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator<<(const TString& t) { append(t); return *this; } + TInfoSinkBase& operator+(const char* s) { append(s); return *this; } + const char* c_str() const { return sink.c_str(); } + void prefix(TPrefixType message) { + switch(message) { + case EPrefixNone: break; + case EPrefixWarning: append("WARNING: "); break; + case EPrefixError: append("ERROR: "); break; + case EPrefixInternalError: append("INTERNAL ERROR: "); break; + case EPrefixUnimplemented: append("UNIMPLEMENTED: "); break; + case EPrefixNote: append("NOTE: "); break; + default: append("UNKNOWN ERROR: "); break; + } + } + void location(const TSourceLoc& loc) { + const int maxSize = 24; + char locText[maxSize]; + snprintf(locText, maxSize, ":%d", loc.line); + append(loc.getStringNameOrNum(false).c_str()); + append(locText); + append(": "); + } + void message(TPrefixType message, const char* s) { + prefix(message); + append(s); + append("\n"); + } + void message(TPrefixType message, const char* s, const TSourceLoc& loc) { + prefix(message); + location(loc); + append(s); + append("\n"); + } + + void setOutputStream(int output = 4) + { + outputStream = output; + } + +protected: + void append(const char* s); + + void append(int count, char c); + void append(const TPersistString& t); + void append(const TString& t); + + void checkMem(size_t growth) { if (sink.capacity() < sink.size() + growth + 2) + sink.reserve(sink.capacity() + sink.capacity() / 2); } + void appendToStream(const char* s); + TPersistString sink; + int outputStream; +}; + +} // end namespace glslang + +class TInfoSink { +public: + glslang::TInfoSinkBase info; + glslang::TInfoSinkBase debug; +}; + +#endif // _INFOSINK_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/InitializeGlobals.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/InitializeGlobals.h new file mode 100644 index 0000000..95d0a40 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/InitializeGlobals.h @@ -0,0 +1,44 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef __INITIALIZE_GLOBALS_INCLUDED_ +#define __INITIALIZE_GLOBALS_INCLUDED_ + +namespace glslang { + +bool InitializePoolIndex(); + +} // end namespace glslang + +#endif // __INITIALIZE_GLOBALS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/PoolAlloc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/PoolAlloc.h new file mode 100644 index 0000000..1f5cac7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/PoolAlloc.h @@ -0,0 +1,318 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _POOLALLOC_INCLUDED_ +#define _POOLALLOC_INCLUDED_ + +#ifdef _DEBUG +# define GUARD_BLOCKS // define to enable guard block sanity checking +#endif + +// +// This header defines an allocator that can be used to efficiently +// allocate a large number of small requests for heap memory, with the +// intention that they are not individually deallocated, but rather +// collectively deallocated at one time. +// +// This simultaneously +// +// * Makes each individual allocation much more efficient; the +// typical allocation is trivial. +// * Completely avoids the cost of doing individual deallocation. +// * Saves the trouble of tracking down and plugging a large class of leaks. +// +// Individual classes can use this allocator by supplying their own +// new and delete methods. +// +// STL containers can use this allocator by using the pool_allocator +// class as the allocator (second) template argument. +// + +#include +#include +#include + +namespace glslang { + +// If we are using guard blocks, we must track each individual +// allocation. If we aren't using guard blocks, these +// never get instantiated, so won't have any impact. +// + +class TAllocation { +public: + TAllocation(size_t size, unsigned char* mem, TAllocation* prev = 0) : + size(size), mem(mem), prevAlloc(prev) { + // Allocations are bracketed: + // [allocationHeader][initialGuardBlock][userData][finalGuardBlock] + // This would be cleaner with if (guardBlockSize)..., but that + // makes the compiler print warnings about 0 length memsets, + // even with the if() protecting them. +# ifdef GUARD_BLOCKS + memset(preGuard(), guardBlockBeginVal, guardBlockSize); + memset(data(), userDataFill, size); + memset(postGuard(), guardBlockEndVal, guardBlockSize); +# endif + } + + void check() const { + checkGuardBlock(preGuard(), guardBlockBeginVal, "before"); + checkGuardBlock(postGuard(), guardBlockEndVal, "after"); + } + + void checkAllocList() const; + + // Return total size needed to accommodate user buffer of 'size', + // plus our tracking data. + inline static size_t allocationSize(size_t size) { + return size + 2 * guardBlockSize + headerSize(); + } + + // Offset from surrounding buffer to get to user data buffer. + inline static unsigned char* offsetAllocation(unsigned char* m) { + return m + guardBlockSize + headerSize(); + } + +private: + void checkGuardBlock(unsigned char* blockMem, unsigned char val, const char* locText) const; + + // Find offsets to pre and post guard blocks, and user data buffer + unsigned char* preGuard() const { return mem + headerSize(); } + unsigned char* data() const { return preGuard() + guardBlockSize; } + unsigned char* postGuard() const { return data() + size; } + + size_t size; // size of the user data area + unsigned char* mem; // beginning of our allocation (pts to header) + TAllocation* prevAlloc; // prior allocation in the chain + + const static unsigned char guardBlockBeginVal; + const static unsigned char guardBlockEndVal; + const static unsigned char userDataFill; + + const static size_t guardBlockSize; +# ifdef GUARD_BLOCKS + inline static size_t headerSize() { return sizeof(TAllocation); } +# else + inline static size_t headerSize() { return 0; } +# endif +}; + +// +// There are several stacks. One is to track the pushing and popping +// of the user, and not yet implemented. The others are simply a +// repositories of free pages or used pages. +// +// Page stacks are linked together with a simple header at the beginning +// of each allocation obtained from the underlying OS. Multi-page allocations +// are returned to the OS. Individual page allocations are kept for future +// re-use. +// +// The "page size" used is not, nor must it match, the underlying OS +// page size. But, having it be about that size or equal to a set of +// pages is likely most optimal. +// +class TPoolAllocator { +public: + TPoolAllocator(int growthIncrement = 8*1024, int allocationAlignment = 16); + + // + // Don't call the destructor just to free up the memory, call pop() + // + ~TPoolAllocator(); + + // + // Call push() to establish a new place to pop memory too. Does not + // have to be called to get things started. + // + void push(); + + // + // Call pop() to free all memory allocated since the last call to push(), + // or if no last call to push, frees all memory since first allocation. + // + void pop(); + + // + // Call popAll() to free all memory allocated. + // + void popAll(); + + // + // Call allocate() to actually acquire memory. Returns 0 if no memory + // available, otherwise a properly aligned pointer to 'numBytes' of memory. + // + void* allocate(size_t numBytes); + + // + // There is no deallocate. The point of this class is that + // deallocation can be skipped by the user of it, as the model + // of use is to simultaneously deallocate everything at once + // by calling pop(), and to not have to solve memory leak problems. + // + +protected: + friend struct tHeader; + + struct tHeader { + tHeader(tHeader* nextPage, size_t pageCount) : +#ifdef GUARD_BLOCKS + lastAllocation(0), +#endif + nextPage(nextPage), pageCount(pageCount) { } + + ~tHeader() { +#ifdef GUARD_BLOCKS + if (lastAllocation) + lastAllocation->checkAllocList(); +#endif + } + +#ifdef GUARD_BLOCKS + TAllocation* lastAllocation; +#endif + tHeader* nextPage; + size_t pageCount; + }; + + struct tAllocState { + size_t offset; + tHeader* page; + }; + typedef std::vector tAllocStack; + + // Track allocations if and only if we're using guard blocks +#ifndef GUARD_BLOCKS + void* initializeAllocation(tHeader*, unsigned char* memory, size_t) { +#else + void* initializeAllocation(tHeader* block, unsigned char* memory, size_t numBytes) { + new(memory) TAllocation(numBytes, memory, block->lastAllocation); + block->lastAllocation = reinterpret_cast(memory); +#endif + + // This is optimized entirely away if GUARD_BLOCKS is not defined. + return TAllocation::offsetAllocation(memory); + } + + size_t pageSize; // granularity of allocation from the OS + size_t alignment; // all returned allocations will be aligned at + // this granularity, which will be a power of 2 + size_t alignmentMask; + size_t headerSkip; // amount of memory to skip to make room for the + // header (basically, size of header, rounded + // up to make it aligned + size_t currentPageOffset; // next offset in top of inUseList to allocate from + tHeader* freeList; // list of popped memory + tHeader* inUseList; // list of all memory currently being used + tAllocStack stack; // stack of where to allocate from, to partition pool + + int numCalls; // just an interesting statistic + size_t totalBytes; // just an interesting statistic +private: + TPoolAllocator& operator=(const TPoolAllocator&); // don't allow assignment operator + TPoolAllocator(const TPoolAllocator&); // don't allow default copy constructor +}; + +// +// There could potentially be many pools with pops happening at +// different times. But a simple use is to have a global pop +// with everyone using the same global allocator. +// +extern TPoolAllocator& GetThreadPoolAllocator(); +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); + +// +// This STL compatible allocator is intended to be used as the allocator +// parameter to templatized STL containers, like vector and map. +// +// It will use the pools for allocation, and not +// do any deallocation, but will still do destruction. +// +template +class pool_allocator { +public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + template + struct rebind { + typedef pool_allocator other; + }; + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pool_allocator() : allocator(GetThreadPoolAllocator()) { } + pool_allocator(TPoolAllocator& a) : allocator(a) { } + pool_allocator(const pool_allocator& p) : allocator(p.allocator) { } + + template + pool_allocator(const pool_allocator& p) : allocator(p.getAllocator()) { } + + pointer allocate(size_type n) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + pointer allocate(size_type n, const void*) { + return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } + + void deallocate(void*, size_type) { } + void deallocate(pointer, size_type) { } + + pointer _Charalloc(size_t n) { + return reinterpret_cast(getAllocator().allocate(n)); } + + void construct(pointer p, const T& val) { new ((void *)p) T(val); } + void destroy(pointer p) { p->T::~T(); } + + bool operator==(const pool_allocator& rhs) const { return &getAllocator() == &rhs.getAllocator(); } + bool operator!=(const pool_allocator& rhs) const { return &getAllocator() != &rhs.getAllocator(); } + + size_type max_size() const { return static_cast(-1) / sizeof(T); } + size_type max_size(int size) const { return static_cast(-1) / size; } + + TPoolAllocator& getAllocator() const { return allocator; } + + pool_allocator select_on_container_copy_construction() const { return pool_allocator{}; } + +protected: + pool_allocator& operator=(const pool_allocator&) { return *this; } + TPoolAllocator& allocator; +}; + +} // end namespace glslang + +#endif // _POOLALLOC_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ResourceLimits.h new file mode 100644 index 0000000..b36c8d6 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ResourceLimits.h @@ -0,0 +1,159 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _RESOURCE_LIMITS_INCLUDED_ +#define _RESOURCE_LIMITS_INCLUDED_ + +struct TLimits { + bool nonInductiveForLoops; + bool whileLoops; + bool doWhileLoops; + bool generalUniformIndexing; + bool generalAttributeMatrixVectorIndexing; + bool generalVaryingIndexing; + bool generalSamplerIndexing; + bool generalVariableIndexing; + bool generalConstantMatrixVectorIndexing; +}; + +struct TBuiltInResource { + int maxLights; + int maxClipPlanes; + int maxTextureUnits; + int maxTextureCoords; + int maxVertexAttribs; + int maxVertexUniformComponents; + int maxVaryingFloats; + int maxVertexTextureImageUnits; + int maxCombinedTextureImageUnits; + int maxTextureImageUnits; + int maxFragmentUniformComponents; + int maxDrawBuffers; + int maxVertexUniformVectors; + int maxVaryingVectors; + int maxFragmentUniformVectors; + int maxVertexOutputVectors; + int maxFragmentInputVectors; + int minProgramTexelOffset; + int maxProgramTexelOffset; + int maxClipDistances; + int maxComputeWorkGroupCountX; + int maxComputeWorkGroupCountY; + int maxComputeWorkGroupCountZ; + int maxComputeWorkGroupSizeX; + int maxComputeWorkGroupSizeY; + int maxComputeWorkGroupSizeZ; + int maxComputeUniformComponents; + int maxComputeTextureImageUnits; + int maxComputeImageUniforms; + int maxComputeAtomicCounters; + int maxComputeAtomicCounterBuffers; + int maxVaryingComponents; + int maxVertexOutputComponents; + int maxGeometryInputComponents; + int maxGeometryOutputComponents; + int maxFragmentInputComponents; + int maxImageUnits; + int maxCombinedImageUnitsAndFragmentOutputs; + int maxCombinedShaderOutputResources; + int maxImageSamples; + int maxVertexImageUniforms; + int maxTessControlImageUniforms; + int maxTessEvaluationImageUniforms; + int maxGeometryImageUniforms; + int maxFragmentImageUniforms; + int maxCombinedImageUniforms; + int maxGeometryTextureImageUnits; + int maxGeometryOutputVertices; + int maxGeometryTotalOutputComponents; + int maxGeometryUniformComponents; + int maxGeometryVaryingComponents; + int maxTessControlInputComponents; + int maxTessControlOutputComponents; + int maxTessControlTextureImageUnits; + int maxTessControlUniformComponents; + int maxTessControlTotalOutputComponents; + int maxTessEvaluationInputComponents; + int maxTessEvaluationOutputComponents; + int maxTessEvaluationTextureImageUnits; + int maxTessEvaluationUniformComponents; + int maxTessPatchComponents; + int maxPatchVertices; + int maxTessGenLevel; + int maxViewports; + int maxVertexAtomicCounters; + int maxTessControlAtomicCounters; + int maxTessEvaluationAtomicCounters; + int maxGeometryAtomicCounters; + int maxFragmentAtomicCounters; + int maxCombinedAtomicCounters; + int maxAtomicCounterBindings; + int maxVertexAtomicCounterBuffers; + int maxTessControlAtomicCounterBuffers; + int maxTessEvaluationAtomicCounterBuffers; + int maxGeometryAtomicCounterBuffers; + int maxFragmentAtomicCounterBuffers; + int maxCombinedAtomicCounterBuffers; + int maxAtomicCounterBufferSize; + int maxTransformFeedbackBuffers; + int maxTransformFeedbackInterleavedComponents; + int maxCullDistances; + int maxCombinedClipAndCullDistances; + int maxSamples; + int maxMeshOutputVerticesNV; + int maxMeshOutputPrimitivesNV; + int maxMeshWorkGroupSizeX_NV; + int maxMeshWorkGroupSizeY_NV; + int maxMeshWorkGroupSizeZ_NV; + int maxTaskWorkGroupSizeX_NV; + int maxTaskWorkGroupSizeY_NV; + int maxTaskWorkGroupSizeZ_NV; + int maxMeshViewCountNV; + int maxMeshOutputVerticesEXT; + int maxMeshOutputPrimitivesEXT; + int maxMeshWorkGroupSizeX_EXT; + int maxMeshWorkGroupSizeY_EXT; + int maxMeshWorkGroupSizeZ_EXT; + int maxTaskWorkGroupSizeX_EXT; + int maxTaskWorkGroupSizeY_EXT; + int maxTaskWorkGroupSizeZ_EXT; + int maxMeshViewCountEXT; + int maxDualSourceDrawBuffersEXT; + + TLimits limits; +}; + +#endif // _RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ShHandle.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ShHandle.h new file mode 100644 index 0000000..df07bd8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/ShHandle.h @@ -0,0 +1,176 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SHHANDLE_INCLUDED_ +#define _SHHANDLE_INCLUDED_ + +// +// Machine independent part of the compiler private objects +// sent as ShHandle to the driver. +// +// This should not be included by driver code. +// + +#define SH_EXPORTING +#include "../Public/ShaderLang.h" +#include "../MachineIndependent/Versions.h" +#include "InfoSink.h" + +class TCompiler; +class TLinker; +class TUniformMap; + +// +// The base class used to back handles returned to the driver. +// +class TShHandleBase { +public: + TShHandleBase() { pool = new glslang::TPoolAllocator; } + virtual ~TShHandleBase() { delete pool; } + virtual TCompiler* getAsCompiler() { return 0; } + virtual TLinker* getAsLinker() { return 0; } + virtual TUniformMap* getAsUniformMap() { return 0; } + virtual glslang::TPoolAllocator* getPool() const { return pool; } +private: + glslang::TPoolAllocator* pool; +}; + +// +// The base class for the machine dependent linker to derive from +// for managing where uniforms live. +// +class TUniformMap : public TShHandleBase { +public: + TUniformMap() { } + virtual ~TUniformMap() { } + virtual TUniformMap* getAsUniformMap() { return this; } + virtual int getLocation(const char* name) = 0; + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink infoSink; +}; + +class TIntermNode; + +// +// The base class for the machine dependent compiler to derive from +// for managing object code from the compile. +// +class TCompiler : public TShHandleBase { +public: + TCompiler(EShLanguage l, TInfoSink& sink) : infoSink(sink) , language(l), haveValidObjectCode(false) { } + virtual ~TCompiler() { } + EShLanguage getLanguage() { return language; } + virtual TInfoSink& getInfoSink() { return infoSink; } + + virtual bool compile(TIntermNode* root, int version = 0, EProfile profile = ENoProfile) = 0; + + virtual TCompiler* getAsCompiler() { return this; } + virtual bool linkable() { return haveValidObjectCode; } + + TInfoSink& infoSink; +protected: + TCompiler& operator=(TCompiler&); + + EShLanguage language; + bool haveValidObjectCode; +}; + +// +// Link operations are based on a list of compile results... +// +typedef glslang::TVector TCompilerList; +typedef glslang::TVector THandleList; + +// +// The base class for the machine dependent linker to derive from +// to manage the resulting executable. +// + +class TLinker : public TShHandleBase { +public: + TLinker(EShExecutable e, TInfoSink& iSink) : + infoSink(iSink), + executable(e), + haveReturnableObjectCode(false), + appAttributeBindings(0), + fixedAttributeBindings(0), + excludedAttributes(0), + excludedCount(0), + uniformBindings(0) { } + virtual TLinker* getAsLinker() { return this; } + virtual ~TLinker() { } + virtual bool link(TCompilerList&, TUniformMap*) = 0; + virtual bool link(THandleList&) { return false; } + virtual void setAppAttributeBindings(const ShBindingTable* t) { appAttributeBindings = t; } + virtual void setFixedAttributeBindings(const ShBindingTable* t) { fixedAttributeBindings = t; } + virtual void getAttributeBindings(ShBindingTable const **t) const = 0; + virtual void setExcludedAttributes(const int* attributes, int count) { excludedAttributes = attributes; excludedCount = count; } + virtual ShBindingTable* getUniformBindings() const { return uniformBindings; } + virtual const void* getObjectCode() const { return 0; } // a real compiler would be returning object code here + virtual TInfoSink& getInfoSink() { return infoSink; } + TInfoSink& infoSink; +protected: + TLinker& operator=(TLinker&); + EShExecutable executable; + bool haveReturnableObjectCode; // true when objectCode is acceptable to send to driver + + const ShBindingTable* appAttributeBindings; + const ShBindingTable* fixedAttributeBindings; + const int* excludedAttributes; + int excludedCount; + ShBindingTable* uniformBindings; // created by the linker +}; + +// +// This is the interface between the machine independent code +// and the machine dependent code. +// +// The machine dependent code should derive from the classes +// above. Then Construct*() and Delete*() will create and +// destroy the machine dependent objects, which contain the +// above machine independent information. +// +TCompiler* ConstructCompiler(EShLanguage, int); + +TShHandleBase* ConstructLinker(EShExecutable, int); +TShHandleBase* ConstructBindings(); +void DeleteLinker(TShHandleBase*); +void DeleteBindingList(TShHandleBase* bindingList); + +TUniformMap* ConstructUniformMap(); +void DeleteCompiler(TCompiler*); + +void DeleteUniformMap(TUniformMap*); + +#endif // _SHHANDLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/SpirvIntrinsics.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/SpirvIntrinsics.h new file mode 100644 index 0000000..3c7d72c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/SpirvIntrinsics.h @@ -0,0 +1,128 @@ +// +// Copyright(C) 2021 Advanced Micro Devices, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#ifndef GLSLANG_WEB + +// +// GL_EXT_spirv_intrinsics +// +#include "Common.h" + +namespace glslang { + +class TIntermTyped; +class TIntermConstantUnion; +class TType; + +// SPIR-V requirements +struct TSpirvRequirement { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // capability = [..] + TSet extensions; + // extension = [..] + TSet capabilities; +}; + +// SPIR-V execution modes +struct TSpirvExecutionMode { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_execution_mode + TMap> modes; + // spirv_execution_mode_id + TMap > modeIds; +}; + +// SPIR-V decorations +struct TSpirvDecorate { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // spirv_decorate + TMap > decorates; + // spirv_decorate_id + TMap> decorateIds; + // spirv_decorate_string + TMap > decorateStrings; +}; + +// SPIR-V instruction +struct TSpirvInstruction { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvInstruction() { set = ""; id = -1; } + + bool operator==(const TSpirvInstruction& rhs) const { return set == rhs.set && id == rhs.id; } + bool operator!=(const TSpirvInstruction& rhs) const { return !operator==(rhs); } + + // spirv_instruction + TString set; + int id; +}; + +// SPIR-V type parameter +struct TSpirvTypeParameter { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSpirvTypeParameter(const TIntermConstantUnion* arg) { constant = arg; } + + bool operator==(const TSpirvTypeParameter& rhs) const { return constant == rhs.constant; } + bool operator!=(const TSpirvTypeParameter& rhs) const { return !operator==(rhs); } + + const TIntermConstantUnion* constant; +}; + +typedef TVector TSpirvTypeParameters; + +// SPIR-V type +struct TSpirvType { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + bool operator==(const TSpirvType& rhs) const + { + return spirvInst == rhs.spirvInst && typeParams == rhs.typeParams; + } + bool operator!=(const TSpirvType& rhs) const { return !operator==(rhs); } + + // spirv_type + TSpirvInstruction spirvInst; + TSpirvTypeParameters typeParams; +}; + +} // end namespace glslang + +#endif // GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/Types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/Types.h new file mode 100644 index 0000000..a6f47e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/Types.h @@ -0,0 +1,2901 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2015-2016 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _TYPES_INCLUDED +#define _TYPES_INCLUDED + +#include "../Include/Common.h" +#include "../Include/BaseTypes.h" +#include "../Public/ShaderLang.h" +#include "arrays.h" +#include "SpirvIntrinsics.h" + +#include + +namespace glslang { + +class TIntermAggregate; + +const int GlslangMaxTypeLength = 200; // TODO: need to print block/struct one member per line, so this can stay bounded + +const char* const AnonymousPrefix = "anon@"; // for something like a block whose members can be directly accessed +inline bool IsAnonymous(const TString& name) +{ + return name.compare(0, 5, AnonymousPrefix) == 0; +} + +// +// Details within a sampler type +// +enum TSamplerDim { + EsdNone, + Esd1D, + Esd2D, + Esd3D, + EsdCube, + EsdRect, + EsdBuffer, + EsdSubpass, // goes only with non-sampled image (image is true) + EsdNumDims +}; + +struct TSampler { // misnomer now; includes images, textures without sampler, and textures with sampler + TBasicType type : 8; // type returned by sampler + TSamplerDim dim : 8; + bool arrayed : 1; + bool shadow : 1; + bool ms : 1; + bool image : 1; // image, combined should be false + bool combined : 1; // true means texture is combined with a sampler, false means texture with no sampler + bool sampler : 1; // true means a pure sampler, other fields should be clear() + +#ifdef GLSLANG_WEB + bool is1D() const { return false; } + bool isBuffer() const { return false; } + bool isRect() const { return false; } + bool isSubpass() const { return false; } + bool isCombined() const { return true; } + bool isImage() const { return false; } + bool isImageClass() const { return false; } + bool isMultiSample() const { return false; } + bool isExternal() const { return false; } + void setExternal(bool e) { } + bool isYuv() const { return false; } +#else + unsigned int vectorSize : 3; // vector return type size. + // Some languages support structures as sample results. Storing the whole structure in the + // TSampler is too large, so there is an index to a separate table. + static const unsigned structReturnIndexBits = 4; // number of index bits to use. + static const unsigned structReturnSlots = (1< TTypeList; + +typedef TVector TIdentifierList; + +// +// Following are a series of helper enums for managing layouts and qualifiers, +// used for TPublicType, TType, others. +// + +enum TLayoutPacking { + ElpNone, + ElpShared, // default, but different than saying nothing + ElpStd140, + ElpStd430, + ElpPacked, + ElpScalar, + ElpCount // If expanding, see bitfield width below +}; + +enum TLayoutMatrix { + ElmNone, + ElmRowMajor, + ElmColumnMajor, // default, but different than saying nothing + ElmCount // If expanding, see bitfield width below +}; + +// Union of geometry shader and tessellation shader geometry types. +// They don't go into TType, but rather have current state per shader or +// active parser type (TPublicType). +enum TLayoutGeometry { + ElgNone, + ElgPoints, + ElgLines, + ElgLinesAdjacency, + ElgLineStrip, + ElgTriangles, + ElgTrianglesAdjacency, + ElgTriangleStrip, + ElgQuads, + ElgIsolines, +}; + +enum TVertexSpacing { + EvsNone, + EvsEqual, + EvsFractionalEven, + EvsFractionalOdd +}; + +enum TVertexOrder { + EvoNone, + EvoCw, + EvoCcw +}; + +// Note: order matters, as type of format is done by comparison. +enum TLayoutFormat { + ElfNone, + + // Float image + ElfRgba32f, + ElfRgba16f, + ElfR32f, + ElfRgba8, + ElfRgba8Snorm, + + ElfEsFloatGuard, // to help with comparisons + + ElfRg32f, + ElfRg16f, + ElfR11fG11fB10f, + ElfR16f, + ElfRgba16, + ElfRgb10A2, + ElfRg16, + ElfRg8, + ElfR16, + ElfR8, + ElfRgba16Snorm, + ElfRg16Snorm, + ElfRg8Snorm, + ElfR16Snorm, + ElfR8Snorm, + + ElfFloatGuard, // to help with comparisons + + // Int image + ElfRgba32i, + ElfRgba16i, + ElfRgba8i, + ElfR32i, + + ElfEsIntGuard, // to help with comparisons + + ElfRg32i, + ElfRg16i, + ElfRg8i, + ElfR16i, + ElfR8i, + ElfR64i, + + ElfIntGuard, // to help with comparisons + + // Uint image + ElfRgba32ui, + ElfRgba16ui, + ElfRgba8ui, + ElfR32ui, + + ElfEsUintGuard, // to help with comparisons + + ElfRg32ui, + ElfRg16ui, + ElfRgb10a2ui, + ElfRg8ui, + ElfR16ui, + ElfR8ui, + ElfR64ui, + + ElfCount +}; + +enum TLayoutDepth { + EldNone, + EldAny, + EldGreater, + EldLess, + EldUnchanged, + + EldCount +}; + +enum TLayoutStencil { + ElsNone, + ElsRefUnchangedFrontAMD, + ElsRefGreaterFrontAMD, + ElsRefLessFrontAMD, + ElsRefUnchangedBackAMD, + ElsRefGreaterBackAMD, + ElsRefLessBackAMD, + + ElsCount +}; + +enum TBlendEquationShift { + // No 'EBlendNone': + // These are used as bit-shift amounts. A mask of such shifts will have type 'int', + // and in that space, 0 means no bits set, or none. In this enum, 0 means (1 << 0), a bit is set. + EBlendMultiply, + EBlendScreen, + EBlendOverlay, + EBlendDarken, + EBlendLighten, + EBlendColordodge, + EBlendColorburn, + EBlendHardlight, + EBlendSoftlight, + EBlendDifference, + EBlendExclusion, + EBlendHslHue, + EBlendHslSaturation, + EBlendHslColor, + EBlendHslLuminosity, + EBlendAllEquations, + + EBlendCount +}; + +enum TInterlockOrdering { + EioNone, + EioPixelInterlockOrdered, + EioPixelInterlockUnordered, + EioSampleInterlockOrdered, + EioSampleInterlockUnordered, + EioShadingRateInterlockOrdered, + EioShadingRateInterlockUnordered, + + EioCount, +}; + +enum TShaderInterface +{ + // Includes both uniform blocks and buffer blocks + EsiUniform = 0, + EsiInput, + EsiOutput, + EsiNone, + + EsiCount +}; + +class TQualifier { +public: + static const int layoutNotSet = -1; + + void clear() + { + precision = EpqNone; + invariant = false; + makeTemporary(); + declaredBuiltIn = EbvNone; +#ifndef GLSLANG_WEB + noContraction = false; + nullInit = false; + spirvByReference = false; + spirvLiteral = false; +#endif + defaultBlock = false; + } + + // drop qualifiers that don't belong in a temporary variable + void makeTemporary() + { + semanticName = nullptr; + storage = EvqTemporary; + builtIn = EbvNone; + clearInterstage(); + clearMemory(); + specConstant = false; + nonUniform = false; + nullInit = false; + defaultBlock = false; + clearLayout(); +#ifndef GLSLANG_WEB + spirvStorageClass = -1; + spirvDecorate = nullptr; + spirvByReference = false; + spirvLiteral = false; +#endif + } + + void clearInterstage() + { + clearInterpolation(); +#ifndef GLSLANG_WEB + patch = false; + sample = false; +#endif + } + + void clearInterpolation() + { + centroid = false; + smooth = false; + flat = false; +#ifndef GLSLANG_WEB + nopersp = false; + explicitInterp = false; + pervertexNV = false; + perPrimitiveNV = false; + perViewNV = false; + perTaskNV = false; +#endif + pervertexEXT = false; + } + + void clearMemory() + { +#ifndef GLSLANG_WEB + coherent = false; + devicecoherent = false; + queuefamilycoherent = false; + workgroupcoherent = false; + subgroupcoherent = false; + shadercallcoherent = false; + nonprivate = false; + volatil = false; + restrict = false; + readonly = false; + writeonly = false; +#endif + } + + const char* semanticName; + TStorageQualifier storage : 6; + TBuiltInVariable builtIn : 9; + TBuiltInVariable declaredBuiltIn : 9; + static_assert(EbvLast < 256, "need to increase size of TBuiltInVariable bitfields!"); + TPrecisionQualifier precision : 3; + bool invariant : 1; // require canonical treatment for cross-shader invariance + bool centroid : 1; + bool smooth : 1; + bool flat : 1; + // having a constant_id is not sufficient: expressions have no id, but are still specConstant + bool specConstant : 1; + bool nonUniform : 1; + bool explicitOffset : 1; + bool defaultBlock : 1; // default blocks with matching names have structures merged when linking + +#ifdef GLSLANG_WEB + bool isWriteOnly() const { return false; } + bool isReadOnly() const { return false; } + bool isRestrict() const { return false; } + bool isCoherent() const { return false; } + bool isVolatile() const { return false; } + bool isSample() const { return false; } + bool isMemory() const { return false; } + bool isMemoryQualifierImageAndSSBOOnly() const { return false; } + bool bufferReferenceNeedsVulkanMemoryModel() const { return false; } + bool isInterpolation() const { return flat || smooth; } + bool isExplicitInterpolation() const { return false; } + bool isAuxiliary() const { return centroid; } + bool isPatch() const { return false; } + bool isNoContraction() const { return false; } + void setNoContraction() { } + bool isPervertexNV() const { return false; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() {} + bool isNullInit() const { return false; } + void setSpirvByReference() { } + bool isSpirvByReference() { return false; } + void setSpirvLiteral() { } + bool isSpirvLiteral() { return false; } +#else + bool noContraction: 1; // prevent contraction and reassociation, e.g., for 'precise' keyword, and expressions it affects + bool nopersp : 1; + bool explicitInterp : 1; + bool pervertexNV : 1; + bool pervertexEXT : 1; + bool perPrimitiveNV : 1; + bool perViewNV : 1; + bool perTaskNV : 1; + bool patch : 1; + bool sample : 1; + bool restrict : 1; + bool readonly : 1; + bool writeonly : 1; + bool coherent : 1; + bool volatil : 1; + bool devicecoherent : 1; + bool queuefamilycoherent : 1; + bool workgroupcoherent : 1; + bool subgroupcoherent : 1; + bool shadercallcoherent : 1; + bool nonprivate : 1; + bool nullInit : 1; + bool spirvByReference : 1; + bool spirvLiteral : 1; + bool isWriteOnly() const { return writeonly; } + bool isReadOnly() const { return readonly; } + bool isRestrict() const { return restrict; } + bool isCoherent() const { return coherent; } + bool isVolatile() const { return volatil; } + bool isSample() const { return sample; } + bool isMemory() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly || nonprivate; + } + bool isMemoryQualifierImageAndSSBOOnly() const + { + return shadercallcoherent || subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || volatil || restrict || readonly || writeonly; + } + bool bufferReferenceNeedsVulkanMemoryModel() const + { + // include qualifiers that map to load/store availability/visibility/nonprivate memory access operands + return subgroupcoherent || workgroupcoherent || queuefamilycoherent || devicecoherent || coherent || nonprivate; + } + bool isInterpolation() const + { + return flat || smooth || nopersp || explicitInterp; + } + bool isExplicitInterpolation() const + { + return explicitInterp; + } + bool isAuxiliary() const + { + return centroid || patch || sample || pervertexNV || pervertexEXT; + } + bool isPatch() const { return patch; } + bool isNoContraction() const { return noContraction; } + void setNoContraction() { noContraction = true; } + bool isPervertexNV() const { return pervertexNV; } + bool isPervertexEXT() const { return pervertexEXT; } + void setNullInit() { nullInit = true; } + bool isNullInit() const { return nullInit; } + void setSpirvByReference() { spirvByReference = true; } + bool isSpirvByReference() const { return spirvByReference; } + void setSpirvLiteral() { spirvLiteral = true; } + bool isSpirvLiteral() const { return spirvLiteral; } +#endif + + bool isPipeInput() const + { + switch (storage) { + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + return true; + default: + return false; + } + } + + bool isPipeOutput() const + { + switch (storage) { + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + bool isParamInput() const + { + switch (storage) { + case EvqIn: + case EvqInOut: + case EvqConstReadOnly: + return true; + default: + return false; + } + } + + bool isParamOutput() const + { + switch (storage) { + case EvqOut: + case EvqInOut: + return true; + default: + return false; + } + } + + bool isUniformOrBuffer() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + return true; + default: + return false; + } + } + + bool isUniform() const + { + switch (storage) { + case EvqUniform: + return true; + default: + return false; + } + } + + bool isIo() const + { + switch (storage) { + case EvqUniform: + case EvqBuffer: + case EvqVaryingIn: + case EvqFragCoord: + case EvqPointCoord: + case EvqFace: + case EvqVertexId: + case EvqInstanceId: + case EvqPosition: + case EvqPointSize: + case EvqClipVertex: + case EvqVaryingOut: + case EvqFragColor: + case EvqFragDepth: + case EvqFragStencil: + return true; + default: + return false; + } + } + + // non-built-in symbols that might link between compilation units + bool isLinkable() const + { + switch (storage) { + case EvqGlobal: + case EvqVaryingIn: + case EvqVaryingOut: + case EvqUniform: + case EvqBuffer: + case EvqShared: + return true; + default: + return false; + } + } + + TBlockStorageClass getBlockStorage() const { + if (storage == EvqUniform && !isPushConstant()) { + return EbsUniform; + } + else if (storage == EvqUniform) { + return EbsPushConstant; + } + else if (storage == EvqBuffer) { + return EbsStorageBuffer; + } + return EbsNone; + } + + void setBlockStorage(TBlockStorageClass newBacking) { +#ifndef GLSLANG_WEB + layoutPushConstant = (newBacking == EbsPushConstant); +#endif + switch (newBacking) { + case EbsUniform : + if (layoutPacking == ElpStd430) { + // std430 would not be valid + layoutPacking = ElpStd140; + } + storage = EvqUniform; + break; + case EbsStorageBuffer : + storage = EvqBuffer; + break; +#ifndef GLSLANG_WEB + case EbsPushConstant : + storage = EvqUniform; + layoutSet = TQualifier::layoutSetEnd; + layoutBinding = TQualifier::layoutBindingEnd; + break; +#endif + default: + break; + } + } + +#ifdef GLSLANG_WEB + bool isPerView() const { return false; } + bool isTaskMemory() const { return false; } + bool isArrayedIo(EShLanguage language) const { return false; } +#else + bool isPerPrimitive() const { return perPrimitiveNV; } + bool isPerView() const { return perViewNV; } + bool isTaskMemory() const { return perTaskNV; } + bool isTaskPayload() const { return storage == EvqtaskPayloadSharedEXT; } + bool isAnyPayload() const { + return storage == EvqPayload || storage == EvqPayloadIn; + } + bool isAnyCallable() const { + return storage == EvqCallableData || storage == EvqCallableDataIn; + } + + // True if this type of IO is supposed to be arrayed with extra level for per-vertex data + bool isArrayedIo(EShLanguage language) const + { + switch (language) { + case EShLangGeometry: + return isPipeInput(); + case EShLangTessControl: + return ! patch && (isPipeInput() || isPipeOutput()); + case EShLangTessEvaluation: + return ! patch && isPipeInput(); + case EShLangFragment: + return (pervertexNV || pervertexEXT) && isPipeInput(); + case EShLangMesh: + return ! perTaskNV && isPipeOutput(); + + default: + return false; + } + } +#endif + + // Implementing an embedded layout-qualifier class here, since C++ can't have a real class bitfield + void clearLayout() // all layout + { + clearUniformLayout(); + +#ifndef GLSLANG_WEB + layoutPushConstant = false; + layoutBufferReference = false; + layoutPassthrough = false; + layoutViewportRelative = false; + // -2048 as the default value indicating layoutSecondaryViewportRelative is not set + layoutSecondaryViewportRelativeOffset = -2048; + layoutShaderRecord = false; + layoutBufferReferenceAlign = layoutBufferReferenceAlignEnd; + layoutFormat = ElfNone; +#endif + + clearInterstageLayout(); + + layoutSpecConstantId = layoutSpecConstantIdEnd; + } + void clearInterstageLayout() + { + layoutLocation = layoutLocationEnd; + layoutComponent = layoutComponentEnd; +#ifndef GLSLANG_WEB + layoutIndex = layoutIndexEnd; + clearStreamLayout(); + clearXfbLayout(); +#endif + } + +#ifndef GLSLANG_WEB + void clearStreamLayout() + { + layoutStream = layoutStreamEnd; + } + void clearXfbLayout() + { + layoutXfbBuffer = layoutXfbBufferEnd; + layoutXfbStride = layoutXfbStrideEnd; + layoutXfbOffset = layoutXfbOffsetEnd; + } +#endif + + bool hasNonXfbLayout() const + { + return hasUniformLayout() || + hasAnyLocation() || + hasStream() || + hasFormat() || + isShaderRecord() || + isPushConstant() || + hasBufferReference(); + } + bool hasLayout() const + { + return hasNonXfbLayout() || + hasXfb(); + } + + TLayoutMatrix layoutMatrix : 3; + TLayoutPacking layoutPacking : 4; + int layoutOffset; + int layoutAlign; + + unsigned int layoutLocation : 12; + static const unsigned int layoutLocationEnd = 0xFFF; + + unsigned int layoutComponent : 3; + static const unsigned int layoutComponentEnd = 4; + + unsigned int layoutSet : 7; + static const unsigned int layoutSetEnd = 0x3F; + + unsigned int layoutBinding : 16; + static const unsigned int layoutBindingEnd = 0xFFFF; + + unsigned int layoutIndex : 8; + static const unsigned int layoutIndexEnd = 0xFF; + + unsigned int layoutStream : 8; + static const unsigned int layoutStreamEnd = 0xFF; + + unsigned int layoutXfbBuffer : 4; + static const unsigned int layoutXfbBufferEnd = 0xF; + + unsigned int layoutXfbStride : 14; + static const unsigned int layoutXfbStrideEnd = 0x3FFF; + + unsigned int layoutXfbOffset : 13; + static const unsigned int layoutXfbOffsetEnd = 0x1FFF; + + unsigned int layoutAttachment : 8; // for input_attachment_index + static const unsigned int layoutAttachmentEnd = 0XFF; + + unsigned int layoutSpecConstantId : 11; + static const unsigned int layoutSpecConstantIdEnd = 0x7FF; + +#ifndef GLSLANG_WEB + // stored as log2 of the actual alignment value + unsigned int layoutBufferReferenceAlign : 6; + static const unsigned int layoutBufferReferenceAlignEnd = 0x3F; + + TLayoutFormat layoutFormat : 8; + + bool layoutPushConstant; + bool layoutBufferReference; + bool layoutPassthrough; + bool layoutViewportRelative; + int layoutSecondaryViewportRelativeOffset; + bool layoutShaderRecord; + + // GL_EXT_spirv_intrinsics + int spirvStorageClass; + TSpirvDecorate* spirvDecorate; +#endif + + bool hasUniformLayout() const + { + return hasMatrix() || + hasPacking() || + hasOffset() || + hasBinding() || + hasSet() || + hasAlign(); + } + void clearUniformLayout() // only uniform specific + { + layoutMatrix = ElmNone; + layoutPacking = ElpNone; + layoutOffset = layoutNotSet; + layoutAlign = layoutNotSet; + + layoutSet = layoutSetEnd; + layoutBinding = layoutBindingEnd; +#ifndef GLSLANG_WEB + layoutAttachment = layoutAttachmentEnd; +#endif + } + + bool hasMatrix() const + { + return layoutMatrix != ElmNone; + } + bool hasPacking() const + { + return layoutPacking != ElpNone; + } + bool hasAlign() const + { + return layoutAlign != layoutNotSet; + } + bool hasAnyLocation() const + { + return hasLocation() || + hasComponent() || + hasIndex(); + } + bool hasLocation() const + { + return layoutLocation != layoutLocationEnd; + } + bool hasSet() const + { + return layoutSet != layoutSetEnd; + } + bool hasBinding() const + { + return layoutBinding != layoutBindingEnd; + } +#ifdef GLSLANG_WEB + bool hasOffset() const { return false; } + bool isNonPerspective() const { return false; } + bool hasIndex() const { return false; } + unsigned getIndex() const { return 0; } + bool hasComponent() const { return false; } + bool hasStream() const { return false; } + bool hasFormat() const { return false; } + bool hasXfb() const { return false; } + bool hasXfbBuffer() const { return false; } + bool hasXfbStride() const { return false; } + bool hasXfbOffset() const { return false; } + bool hasAttachment() const { return false; } + TLayoutFormat getFormat() const { return ElfNone; } + bool isPushConstant() const { return false; } + bool isShaderRecord() const { return false; } + bool hasBufferReference() const { return false; } + bool hasBufferReferenceAlign() const { return false; } + bool isNonUniform() const { return false; } +#else + bool hasOffset() const + { + return layoutOffset != layoutNotSet; + } + bool isNonPerspective() const { return nopersp; } + bool hasIndex() const + { + return layoutIndex != layoutIndexEnd; + } + unsigned getIndex() const { return layoutIndex; } + bool hasComponent() const + { + return layoutComponent != layoutComponentEnd; + } + bool hasStream() const + { + return layoutStream != layoutStreamEnd; + } + bool hasFormat() const + { + return layoutFormat != ElfNone; + } + bool hasXfb() const + { + return hasXfbBuffer() || + hasXfbStride() || + hasXfbOffset(); + } + bool hasXfbBuffer() const + { + return layoutXfbBuffer != layoutXfbBufferEnd; + } + bool hasXfbStride() const + { + return layoutXfbStride != layoutXfbStrideEnd; + } + bool hasXfbOffset() const + { + return layoutXfbOffset != layoutXfbOffsetEnd; + } + bool hasAttachment() const + { + return layoutAttachment != layoutAttachmentEnd; + } + TLayoutFormat getFormat() const { return layoutFormat; } + bool isPushConstant() const { return layoutPushConstant; } + bool isShaderRecord() const { return layoutShaderRecord; } + bool hasBufferReference() const { return layoutBufferReference; } + bool hasBufferReferenceAlign() const + { + return layoutBufferReferenceAlign != layoutBufferReferenceAlignEnd; + } + bool isNonUniform() const + { + return nonUniform; + } + + // GL_EXT_spirv_intrinsics + bool hasSprivDecorate() const { return spirvDecorate != nullptr; } + void setSpirvDecorate(int decoration, const TIntermAggregate* args = nullptr); + void setSpirvDecorateId(int decoration, const TIntermAggregate* args); + void setSpirvDecorateString(int decoration, const TIntermAggregate* args); + const TSpirvDecorate& getSpirvDecorate() const { assert(spirvDecorate); return *spirvDecorate; } + TSpirvDecorate& getSpirvDecorate() { assert(spirvDecorate); return *spirvDecorate; } + TString getSpirvDecorateQualifierString() const; +#endif + bool hasSpecConstantId() const + { + // Not the same thing as being a specialization constant, this + // is just whether or not it was declared with an ID. + return layoutSpecConstantId != layoutSpecConstantIdEnd; + } + bool isSpecConstant() const + { + // True if type is a specialization constant, whether or not it + // had a specialization-constant ID, and false if it is not a + // true front-end constant. + return specConstant; + } + bool isFrontEndConstant() const + { + // True if the front-end knows the final constant value. + // This allows front-end constant folding. + return storage == EvqConst && ! specConstant; + } + bool isConstant() const + { + // True if is either kind of constant; specialization or regular. + return isFrontEndConstant() || isSpecConstant(); + } + void makeSpecConstant() + { + storage = EvqConst; + specConstant = true; + } + static const char* getLayoutPackingString(TLayoutPacking packing) + { + switch (packing) { + case ElpStd140: return "std140"; +#ifndef GLSLANG_WEB + case ElpPacked: return "packed"; + case ElpShared: return "shared"; + case ElpStd430: return "std430"; + case ElpScalar: return "scalar"; +#endif + default: return "none"; + } + } + static const char* getLayoutMatrixString(TLayoutMatrix m) + { + switch (m) { + case ElmColumnMajor: return "column_major"; + case ElmRowMajor: return "row_major"; + default: return "none"; + } + } +#ifdef GLSLANG_WEB + static const char* getLayoutFormatString(TLayoutFormat f) { return "none"; } +#else + static const char* getLayoutFormatString(TLayoutFormat f) + { + switch (f) { + case ElfRgba32f: return "rgba32f"; + case ElfRgba16f: return "rgba16f"; + case ElfRg32f: return "rg32f"; + case ElfRg16f: return "rg16f"; + case ElfR11fG11fB10f: return "r11f_g11f_b10f"; + case ElfR32f: return "r32f"; + case ElfR16f: return "r16f"; + case ElfRgba16: return "rgba16"; + case ElfRgb10A2: return "rgb10_a2"; + case ElfRgba8: return "rgba8"; + case ElfRg16: return "rg16"; + case ElfRg8: return "rg8"; + case ElfR16: return "r16"; + case ElfR8: return "r8"; + case ElfRgba16Snorm: return "rgba16_snorm"; + case ElfRgba8Snorm: return "rgba8_snorm"; + case ElfRg16Snorm: return "rg16_snorm"; + case ElfRg8Snorm: return "rg8_snorm"; + case ElfR16Snorm: return "r16_snorm"; + case ElfR8Snorm: return "r8_snorm"; + + case ElfRgba32i: return "rgba32i"; + case ElfRgba16i: return "rgba16i"; + case ElfRgba8i: return "rgba8i"; + case ElfRg32i: return "rg32i"; + case ElfRg16i: return "rg16i"; + case ElfRg8i: return "rg8i"; + case ElfR32i: return "r32i"; + case ElfR16i: return "r16i"; + case ElfR8i: return "r8i"; + + case ElfRgba32ui: return "rgba32ui"; + case ElfRgba16ui: return "rgba16ui"; + case ElfRgba8ui: return "rgba8ui"; + case ElfRg32ui: return "rg32ui"; + case ElfRg16ui: return "rg16ui"; + case ElfRgb10a2ui: return "rgb10_a2ui"; + case ElfRg8ui: return "rg8ui"; + case ElfR32ui: return "r32ui"; + case ElfR16ui: return "r16ui"; + case ElfR8ui: return "r8ui"; + case ElfR64ui: return "r64ui"; + case ElfR64i: return "r64i"; + default: return "none"; + } + } + static const char* getLayoutDepthString(TLayoutDepth d) + { + switch (d) { + case EldAny: return "depth_any"; + case EldGreater: return "depth_greater"; + case EldLess: return "depth_less"; + case EldUnchanged: return "depth_unchanged"; + default: return "none"; + } + } + static const char* getLayoutStencilString(TLayoutStencil s) + { + switch (s) { + case ElsRefUnchangedFrontAMD: return "stencil_ref_unchanged_front_amd"; + case ElsRefGreaterFrontAMD: return "stencil_ref_greater_front_amd"; + case ElsRefLessFrontAMD: return "stencil_ref_less_front_amd"; + case ElsRefUnchangedBackAMD: return "stencil_ref_unchanged_back_amd"; + case ElsRefGreaterBackAMD: return "stencil_ref_greater_back_amd"; + case ElsRefLessBackAMD: return "stencil_ref_less_back_amd"; + default: return "none"; + } + } + static const char* getBlendEquationString(TBlendEquationShift e) + { + switch (e) { + case EBlendMultiply: return "blend_support_multiply"; + case EBlendScreen: return "blend_support_screen"; + case EBlendOverlay: return "blend_support_overlay"; + case EBlendDarken: return "blend_support_darken"; + case EBlendLighten: return "blend_support_lighten"; + case EBlendColordodge: return "blend_support_colordodge"; + case EBlendColorburn: return "blend_support_colorburn"; + case EBlendHardlight: return "blend_support_hardlight"; + case EBlendSoftlight: return "blend_support_softlight"; + case EBlendDifference: return "blend_support_difference"; + case EBlendExclusion: return "blend_support_exclusion"; + case EBlendHslHue: return "blend_support_hsl_hue"; + case EBlendHslSaturation: return "blend_support_hsl_saturation"; + case EBlendHslColor: return "blend_support_hsl_color"; + case EBlendHslLuminosity: return "blend_support_hsl_luminosity"; + case EBlendAllEquations: return "blend_support_all_equations"; + default: return "unknown"; + } + } + static const char* getGeometryString(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return "points"; + case ElgLines: return "lines"; + case ElgLinesAdjacency: return "lines_adjacency"; + case ElgLineStrip: return "line_strip"; + case ElgTriangles: return "triangles"; + case ElgTrianglesAdjacency: return "triangles_adjacency"; + case ElgTriangleStrip: return "triangle_strip"; + case ElgQuads: return "quads"; + case ElgIsolines: return "isolines"; + default: return "none"; + } + } + static const char* getVertexSpacingString(TVertexSpacing spacing) + { + switch (spacing) { + case EvsEqual: return "equal_spacing"; + case EvsFractionalEven: return "fractional_even_spacing"; + case EvsFractionalOdd: return "fractional_odd_spacing"; + default: return "none"; + } + } + static const char* getVertexOrderString(TVertexOrder order) + { + switch (order) { + case EvoCw: return "cw"; + case EvoCcw: return "ccw"; + default: return "none"; + } + } + static int mapGeometryToSize(TLayoutGeometry geometry) + { + switch (geometry) { + case ElgPoints: return 1; + case ElgLines: return 2; + case ElgLinesAdjacency: return 4; + case ElgTriangles: return 3; + case ElgTrianglesAdjacency: return 6; + default: return 0; + } + } + static const char* getInterlockOrderingString(TInterlockOrdering order) + { + switch (order) { + case EioPixelInterlockOrdered: return "pixel_interlock_ordered"; + case EioPixelInterlockUnordered: return "pixel_interlock_unordered"; + case EioSampleInterlockOrdered: return "sample_interlock_ordered"; + case EioSampleInterlockUnordered: return "sample_interlock_unordered"; + case EioShadingRateInterlockOrdered: return "shading_rate_interlock_ordered"; + case EioShadingRateInterlockUnordered: return "shading_rate_interlock_unordered"; + default: return "none"; + } + } +#endif +}; + +// Qualifiers that don't need to be keep per object. They have shader scope, not object scope. +// So, they will not be part of TType, TQualifier, etc. +struct TShaderQualifiers { + TLayoutGeometry geometry; // geometry/tessellation shader in/out primitives + bool pixelCenterInteger; // fragment shader + bool originUpperLeft; // fragment shader + int invocations; + int vertices; // for tessellation "vertices", geometry & mesh "max_vertices" + TVertexSpacing spacing; + TVertexOrder order; + bool pointMode; + int localSize[3]; // compute shader + bool localSizeNotDefault[3]; // compute shader + int localSizeSpecId[3]; // compute shader specialization id for gl_WorkGroupSize +#ifndef GLSLANG_WEB + bool earlyFragmentTests; // fragment input + bool postDepthCoverage; // fragment input + bool earlyAndLateFragmentTestsAMD; //fragment input + TLayoutDepth layoutDepth; + TLayoutStencil layoutStencil; + bool blendEquation; // true if any blend equation was specified + int numViews; // multiview extenstions + TInterlockOrdering interlockOrdering; + bool layoutOverrideCoverage; // true if layout override_coverage set + bool layoutDerivativeGroupQuads; // true if layout derivative_group_quadsNV set + bool layoutDerivativeGroupLinear; // true if layout derivative_group_linearNV set + int primitives; // mesh shader "max_primitives"DerivativeGroupLinear; // true if layout derivative_group_linearNV set + bool layoutPrimitiveCulling; // true if layout primitive_culling set + TLayoutDepth getDepth() const { return layoutDepth; } + TLayoutStencil getStencil() const { return layoutStencil; } +#else + TLayoutDepth getDepth() const { return EldNone; } +#endif + + void init() + { + geometry = ElgNone; + originUpperLeft = false; + pixelCenterInteger = false; + invocations = TQualifier::layoutNotSet; + vertices = TQualifier::layoutNotSet; + spacing = EvsNone; + order = EvoNone; + pointMode = false; + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + earlyFragmentTests = false; + earlyAndLateFragmentTestsAMD = false; + postDepthCoverage = false; + layoutDepth = EldNone; + layoutStencil = ElsNone; + blendEquation = false; + numViews = TQualifier::layoutNotSet; + layoutOverrideCoverage = false; + layoutDerivativeGroupQuads = false; + layoutDerivativeGroupLinear = false; + layoutPrimitiveCulling = false; + primitives = TQualifier::layoutNotSet; + interlockOrdering = EioNone; +#endif + } + +#ifdef GLSLANG_WEB + bool hasBlendEquation() const { return false; } +#else + bool hasBlendEquation() const { return blendEquation; } +#endif + + // Merge in characteristics from the 'src' qualifier. They can override when + // set, but never erase when not set. + void merge(const TShaderQualifiers& src) + { + if (src.geometry != ElgNone) + geometry = src.geometry; + if (src.pixelCenterInteger) + pixelCenterInteger = src.pixelCenterInteger; + if (src.originUpperLeft) + originUpperLeft = src.originUpperLeft; + if (src.invocations != TQualifier::layoutNotSet) + invocations = src.invocations; + if (src.vertices != TQualifier::layoutNotSet) + vertices = src.vertices; + if (src.spacing != EvsNone) + spacing = src.spacing; + if (src.order != EvoNone) + order = src.order; + if (src.pointMode) + pointMode = true; + for (int i = 0; i < 3; ++i) { + if (src.localSize[i] > 1) + localSize[i] = src.localSize[i]; + } + for (int i = 0; i < 3; ++i) { + localSizeNotDefault[i] = src.localSizeNotDefault[i] || localSizeNotDefault[i]; + } + for (int i = 0; i < 3; ++i) { + if (src.localSizeSpecId[i] != TQualifier::layoutNotSet) + localSizeSpecId[i] = src.localSizeSpecId[i]; + } +#ifndef GLSLANG_WEB + if (src.earlyFragmentTests) + earlyFragmentTests = true; + if (src.earlyAndLateFragmentTestsAMD) + earlyAndLateFragmentTestsAMD = true; + if (src.postDepthCoverage) + postDepthCoverage = true; + if (src.layoutDepth) + layoutDepth = src.layoutDepth; + if (src.layoutStencil) + layoutStencil = src.layoutStencil; + if (src.blendEquation) + blendEquation = src.blendEquation; + if (src.numViews != TQualifier::layoutNotSet) + numViews = src.numViews; + if (src.layoutOverrideCoverage) + layoutOverrideCoverage = src.layoutOverrideCoverage; + if (src.layoutDerivativeGroupQuads) + layoutDerivativeGroupQuads = src.layoutDerivativeGroupQuads; + if (src.layoutDerivativeGroupLinear) + layoutDerivativeGroupLinear = src.layoutDerivativeGroupLinear; + if (src.primitives != TQualifier::layoutNotSet) + primitives = src.primitives; + if (src.interlockOrdering != EioNone) + interlockOrdering = src.interlockOrdering; + if (src.layoutPrimitiveCulling) + layoutPrimitiveCulling = src.layoutPrimitiveCulling; +#endif + } +}; + +// +// TPublicType is just temporarily used while parsing and not quite the same +// information kept per node in TType. Due to the bison stack, it can't have +// types that it thinks have non-trivial constructors. It should +// just be used while recognizing the grammar, not anything else. +// Once enough is known about the situation, the proper information +// moved into a TType, or the parse context, etc. +// +class TPublicType { +public: + TBasicType basicType; + TSampler sampler; + TQualifier qualifier; + TShaderQualifiers shaderQualifiers; + int vectorSize : 4; + int matrixCols : 4; + int matrixRows : 4; + bool coopmat : 1; + TArraySizes* arraySizes; + const TType* userDef; + TSourceLoc loc; + TArraySizes* typeParameters; +#ifndef GLSLANG_WEB + // SPIR-V type defined by spirv_type directive + TSpirvType* spirvType; +#endif + +#ifdef GLSLANG_WEB + bool isCoopmat() const { return false; } +#else + bool isCoopmat() const { return coopmat; } +#endif + + void initType(const TSourceLoc& l) + { + basicType = EbtVoid; + vectorSize = 1; + matrixRows = 0; + matrixCols = 0; + arraySizes = nullptr; + userDef = nullptr; + loc = l; + typeParameters = nullptr; + coopmat = false; +#ifndef GLSLANG_WEB + spirvType = nullptr; +#endif + } + + void initQualifiers(bool global = false) + { + qualifier.clear(); + if (global) + qualifier.storage = EvqGlobal; + } + + void init(const TSourceLoc& l, bool global = false) + { + initType(l); + sampler.clear(); + initQualifiers(global); + shaderQualifiers.init(); + } + + void setVector(int s) + { + matrixRows = 0; + matrixCols = 0; + vectorSize = s; + } + + void setMatrix(int c, int r) + { + matrixRows = r; + matrixCols = c; + vectorSize = 0; + } + + bool isScalar() const + { + return matrixCols == 0 && vectorSize == 1 && arraySizes == nullptr && userDef == nullptr; + } + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + void setSpirvType(const TSpirvInstruction& spirvInst, const TSpirvTypeParameters* typeParams = nullptr); +#endif + + // "Image" is a superset of "Subpass" + bool isImage() const { return basicType == EbtSampler && sampler.isImage(); } + bool isSubpass() const { return basicType == EbtSampler && sampler.isSubpass(); } +}; + +// +// Base class for things that have a type. +// +class TType { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + // for "empty" type (no args) or simple scalar/vector/matrix + explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for explicit precision qualifier + TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, + bool isVector = false) : + basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), coopmat(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + qualifier.storage = q; + qualifier.precision = p; + assert(p >= EpqNone && p <= EpqHigh); + assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices + } + // for turning a TPublicType into a TType, using a shallow copy + explicit TType(const TPublicType& p) : + basicType(p.basicType), + vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), coopmat(p.coopmat), + arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr), typeParameters(p.typeParameters) +#ifndef GLSLANG_WEB + , spirvType(p.spirvType) +#endif + { + if (basicType == EbtSampler) + sampler = p.sampler; + else + sampler.clear(); + qualifier = p.qualifier; + if (p.userDef) { + if (p.userDef->basicType == EbtReference) { + basicType = EbtReference; + referentType = p.userDef->referentType; + } else { + structure = p.userDef->getWritableStruct(); // public type is short-lived; there are no sharing issues + } + typeName = NewPoolTString(p.userDef->getTypeName().c_str()); + } + if (p.isCoopmat() && p.typeParameters && p.typeParameters->getNumDims() > 0) { + int numBits = p.typeParameters->getDimSize(0); + if (p.basicType == EbtFloat && numBits == 16) { + basicType = EbtFloat16; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtUint && numBits == 8) { + basicType = EbtUint8; + qualifier.precision = EpqNone; + } else if (p.basicType == EbtInt && numBits == 8) { + basicType = EbtInt8; + qualifier.precision = EpqNone; + } + } + } + // for construction of sampler types + TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) : + basicType(EbtSampler), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), + sampler(sampler), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + qualifier.clear(); + qualifier.storage = q; + } + // to efficiently make a dereferenced type + // without ever duplicating the outer structure that will be thrown away + // and using only shallow copy + TType(const TType& type, int derefIndex, bool rowMajor = false) + { + if (type.isArray()) { + shallowCopy(type); + if (type.getArraySizes()->getNumDims() == 1) { + arraySizes = nullptr; + } else { + // want our own copy of the array, so we can edit it + arraySizes = new TArraySizes; + arraySizes->copyDereferenced(*type.arraySizes); + } + } else if (type.basicType == EbtStruct || type.basicType == EbtBlock) { + // do a structure dereference + const TTypeList& memberList = *type.getStruct(); + shallowCopy(*memberList[derefIndex].type); + return; + } else { + // do a vector/matrix dereference + shallowCopy(type); + if (matrixCols > 0) { + // dereference from matrix to vector + if (rowMajor) + vectorSize = matrixCols; + else + vectorSize = matrixRows; + matrixCols = 0; + matrixRows = 0; + if (vectorSize == 1) + vector1 = true; + } else if (isVector()) { + // dereference from vector to scalar + vectorSize = 1; + vector1 = false; + } else if (isCoopMat()) { + coopmat = false; + typeParameters = nullptr; + } + } + } + // for making structures, ... + TType(TTypeList* userDef, const TString& n) : + basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + qualifier.clear(); + typeName = NewPoolTString(n.c_str()); + } + // For interface blocks + TType(TTypeList* userDef, const TString& n, const TQualifier& q) : + basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), coopmat(false), + qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr), typeParameters(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + sampler.clear(); + typeName = NewPoolTString(n.c_str()); + } + // for block reference (first parameter must be EbtReference) + explicit TType(TBasicType t, const TType &p, const TString& n) : + basicType(t), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), + arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) +#ifndef GLSLANG_WEB + , spirvType(nullptr) +#endif + { + assert(t == EbtReference); + typeName = NewPoolTString(n.c_str()); + qualifier.clear(); + qualifier.storage = p.qualifier.storage; + referentType = p.clone(); + } + virtual ~TType() {} + + // Not for use across pool pops; it will cause multiple instances of TType to point to the same information. + // This only works if that information (like a structure's list of types) does not change and + // the instances are sharing the same pool. + void shallowCopy(const TType& copyOf) + { + basicType = copyOf.basicType; + sampler = copyOf.sampler; + qualifier = copyOf.qualifier; + vectorSize = copyOf.vectorSize; + matrixCols = copyOf.matrixCols; + matrixRows = copyOf.matrixRows; + vector1 = copyOf.vector1; + arraySizes = copyOf.arraySizes; // copying the pointer only, not the contents + fieldName = copyOf.fieldName; + typeName = copyOf.typeName; + if (isStruct()) { + structure = copyOf.structure; + } else { + referentType = copyOf.referentType; + } + typeParameters = copyOf.typeParameters; +#ifndef GLSLANG_WEB + spirvType = copyOf.spirvType; +#endif + coopmat = copyOf.isCoopMat(); + } + + // Make complete copy of the whole type graph rooted at 'copyOf'. + void deepCopy(const TType& copyOf) + { + TMap copied; // to enable copying a type graph as a graph, not a tree + deepCopy(copyOf, copied); + } + + // Recursively make temporary + void makeTemporary() + { + getQualifier().makeTemporary(); + + if (isStruct()) + for (unsigned int i = 0; i < structure->size(); ++i) + (*structure)[i].type->makeTemporary(); + } + + TType* clone() const + { + TType *newType = new TType(); + newType->deepCopy(*this); + + return newType; + } + + void makeVector() { vector1 = true; } + + virtual void hideMember() { basicType = EbtVoid; vectorSize = 1; } + virtual bool hiddenMember() const { return basicType == EbtVoid; } + + virtual void setFieldName(const TString& n) { fieldName = NewPoolTString(n.c_str()); } + virtual const TString& getTypeName() const + { + assert(typeName); + return *typeName; + } + + virtual const TString& getFieldName() const + { + assert(fieldName); + return *fieldName; + } + TShaderInterface getShaderInterface() const + { + if (basicType != EbtBlock) + return EsiNone; + + switch (qualifier.storage) { + default: + return EsiNone; + case EvqVaryingIn: + return EsiInput; + case EvqVaryingOut: + return EsiOutput; + case EvqUniform: + case EvqBuffer: + return EsiUniform; + } + } + + virtual TBasicType getBasicType() const { return basicType; } + virtual const TSampler& getSampler() const { return sampler; } + virtual TSampler& getSampler() { return sampler; } + + virtual TQualifier& getQualifier() { return qualifier; } + virtual const TQualifier& getQualifier() const { return qualifier; } + + virtual int getVectorSize() const { return vectorSize; } // returns 1 for either scalar or vector of size 1, valid for both + virtual int getMatrixCols() const { return matrixCols; } + virtual int getMatrixRows() const { return matrixRows; } + virtual int getOuterArraySize() const { return arraySizes->getOuterSize(); } + virtual TIntermTyped* getOuterArrayNode() const { return arraySizes->getOuterNode(); } + virtual int getCumulativeArraySize() const { return arraySizes->getCumulativeSize(); } +#ifdef GLSLANG_WEB + bool isArrayOfArrays() const { return false; } +#else + bool isArrayOfArrays() const { return arraySizes != nullptr && arraySizes->getNumDims() > 1; } +#endif + virtual int getImplicitArraySize() const { return arraySizes->getImplicitSize(); } + virtual const TArraySizes* getArraySizes() const { return arraySizes; } + virtual TArraySizes* getArraySizes() { return arraySizes; } + virtual TType* getReferentType() const { return referentType; } + virtual const TArraySizes* getTypeParameters() const { return typeParameters; } + virtual TArraySizes* getTypeParameters() { return typeParameters; } + + virtual bool isScalar() const { return ! isVector() && ! isMatrix() && ! isStruct() && ! isArray(); } + virtual bool isScalarOrVec1() const { return isScalar() || vector1; } + virtual bool isScalarOrVector() const { return !isMatrix() && !isStruct() && !isArray(); } + virtual bool isVector() const { return vectorSize > 1 || vector1; } + virtual bool isMatrix() const { return matrixCols ? true : false; } + virtual bool isArray() const { return arraySizes != nullptr; } + virtual bool isSizedArray() const { return isArray() && arraySizes->isSized(); } + virtual bool isUnsizedArray() const { return isArray() && !arraySizes->isSized(); } + virtual bool isArrayVariablyIndexed() const { assert(isArray()); return arraySizes->isVariablyIndexed(); } + virtual void setArrayVariablyIndexed() { assert(isArray()); arraySizes->setVariablyIndexed(); } + virtual void updateImplicitArraySize(int size) { assert(isArray()); arraySizes->updateImplicitSize(size); } + virtual bool isStruct() const { return basicType == EbtStruct || basicType == EbtBlock; } + virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16; } + virtual bool isIntegerDomain() const + { + switch (basicType) { + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtAtomicUint: + return true; + default: + break; + } + return false; + } + virtual bool isOpaque() const { return basicType == EbtSampler +#ifndef GLSLANG_WEB + || basicType == EbtAtomicUint || basicType == EbtAccStruct || basicType == EbtRayQuery +#endif + ; } + virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; } + + // "Image" is a superset of "Subpass" + virtual bool isImage() const { return basicType == EbtSampler && getSampler().isImage(); } + virtual bool isSubpass() const { return basicType == EbtSampler && getSampler().isSubpass(); } + virtual bool isTexture() const { return basicType == EbtSampler && getSampler().isTexture(); } + // Check the block-name convention of creating a block without populating it's members: + virtual bool isUnusableName() const { return isStruct() && structure == nullptr; } + virtual bool isParameterized() const { return typeParameters != nullptr; } +#ifdef GLSLANG_WEB + bool isAtomic() const { return false; } + bool isCoopMat() const { return false; } + bool isReference() const { return false; } + bool isSpirvType() const { return false; } +#else + bool isAtomic() const { return basicType == EbtAtomicUint; } + bool isCoopMat() const { return coopmat; } + bool isReference() const { return getBasicType() == EbtReference; } + bool isSpirvType() const { return getBasicType() == EbtSpirvType; } +#endif + + // return true if this type contains any subtype which satisfies the given predicate. + template + bool contains(P predicate) const + { + if (predicate(this)) + return true; + + const auto hasa = [predicate](const TTypeLoc& tl) { return tl.type->contains(predicate); }; + + return isStruct() && std::any_of(structure->begin(), structure->end(), hasa); + } + + // Recursively checks if the type contains the given basic type + virtual bool containsBasicType(TBasicType checkType) const + { + return contains([checkType](const TType* t) { return t->basicType == checkType; } ); + } + + // Recursively check the structure for any arrays, needed for some error checks + virtual bool containsArray() const + { + return contains([](const TType* t) { return t->isArray(); } ); + } + + // Check the structure for any structures, needed for some error checks + virtual bool containsStructure() const + { + return contains([this](const TType* t) { return t != this && t->isStruct(); } ); + } + + // Recursively check the structure for any unsized arrays, needed for triggering a copyUp(). + virtual bool containsUnsizedArray() const + { + return contains([](const TType* t) { return t->isUnsizedArray(); } ); + } + + virtual bool containsOpaque() const + { + return contains([](const TType* t) { return t->isOpaque(); } ); + } + + // Recursively checks if the type contains a built-in variable + virtual bool containsBuiltIn() const + { + return contains([](const TType* t) { return t->isBuiltIn(); } ); + } + + virtual bool containsNonOpaque() const + { + const auto nonOpaque = [](const TType* t) { + switch (t->basicType) { + case EbtVoid: + case EbtFloat: + case EbtDouble: + case EbtFloat16: + case EbtInt8: + case EbtUint8: + case EbtInt16: + case EbtUint16: + case EbtInt: + case EbtUint: + case EbtInt64: + case EbtUint64: + case EbtBool: + case EbtReference: + return true; + default: + return false; + } + }; + + return contains(nonOpaque); + } + + virtual bool containsSpecializationSize() const + { + return contains([](const TType* t) { return t->isArray() && t->arraySizes->isOuterSpecialization(); } ); + } + +#ifdef GLSLANG_WEB + bool containsDouble() const { return false; } + bool contains16BitFloat() const { return false; } + bool contains64BitInt() const { return false; } + bool contains16BitInt() const { return false; } + bool contains8BitInt() const { return false; } + bool containsCoopMat() const { return false; } + bool containsReference() const { return false; } +#else + bool containsDouble() const + { + return containsBasicType(EbtDouble); + } + bool contains16BitFloat() const + { + return containsBasicType(EbtFloat16); + } + bool contains64BitInt() const + { + return containsBasicType(EbtInt64) || containsBasicType(EbtUint64); + } + bool contains16BitInt() const + { + return containsBasicType(EbtInt16) || containsBasicType(EbtUint16); + } + bool contains8BitInt() const + { + return containsBasicType(EbtInt8) || containsBasicType(EbtUint8); + } + bool containsCoopMat() const + { + return contains([](const TType* t) { return t->coopmat; } ); + } + bool containsReference() const + { + return containsBasicType(EbtReference); + } +#endif + + // Array editing methods. Array descriptors can be shared across + // type instances. This allows all uses of the same array + // to be updated at once. E.g., all nodes can be explicitly sized + // by tracking and correcting one implicit size. Or, all nodes + // can get the explicit size on a redeclaration that gives size. + // + // N.B.: Don't share with the shared symbol tables (symbols are + // marked as isReadOnly(). Such symbols with arrays that will be + // edited need to copyUp() on first use, so that + // A) the edits don't effect the shared symbol table, and + // B) the edits are shared across all users. + void updateArraySizes(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(arraySizes != nullptr); + assert(type.arraySizes != nullptr); + *arraySizes = *type.arraySizes; + } + void copyArraySizes(const TArraySizes& s) + { + // For setting a fresh new set of array sizes, not yet worrying about sharing. + arraySizes = new TArraySizes; + *arraySizes = s; + } + void transferArraySizes(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + arraySizes = s; + } + void clearArraySizes() + { + arraySizes = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyArrayInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (arraySizes == nullptr) + copyArraySizes(*s); + else + arraySizes->addInnerSizes(*s); + } + } + void changeOuterArraySize(int s) { arraySizes->changeOuterSize(s); } + + // Recursively make the implicit array size the explicit array size. + // Expicit arrays are compile-time or link-time sized, never run-time sized. + // Sometimes, policy calls for an array to be run-time sized even if it was + // never variably indexed: Don't turn a 'skipNonvariablyIndexed' array into + // an explicit array. + void adoptImplicitArraySizes(bool skipNonvariablyIndexed) + { + if (isUnsizedArray() && !(skipNonvariablyIndexed || isArrayVariablyIndexed())) + changeOuterArraySize(getImplicitArraySize()); + // For multi-dim per-view arrays, set unsized inner dimension size to 1 + if (qualifier.isPerView() && arraySizes && arraySizes->isInnerUnsized()) + arraySizes->clearInnerUnsized(); + if (isStruct() && structure->size() > 0) { + int lastMember = (int)structure->size() - 1; + for (int i = 0; i < lastMember; ++i) + (*structure)[i].type->adoptImplicitArraySizes(false); + // implement the "last member of an SSBO" policy + (*structure)[lastMember].type->adoptImplicitArraySizes(getQualifier().storage == EvqBuffer); + } + } + + + void updateTypeParameters(const TType& type) + { + // For when we may already be sharing existing array descriptors, + // keeping the pointers the same, just updating the contents. + assert(typeParameters != nullptr); + assert(type.typeParameters != nullptr); + *typeParameters = *type.typeParameters; + } + void copyTypeParameters(const TArraySizes& s) + { + // For setting a fresh new set of type parameters, not yet worrying about sharing. + typeParameters = new TArraySizes; + *typeParameters = s; + } + void transferTypeParameters(TArraySizes* s) + { + // For setting an already allocated set of sizes that this type can use + // (no copy made). + typeParameters = s; + } + void clearTypeParameters() + { + typeParameters = nullptr; + } + + // Add inner array sizes, to any existing sizes, via copy; the + // sizes passed in can still be reused for other purposes. + void copyTypeParametersInnerSizes(const TArraySizes* s) + { + if (s != nullptr) { + if (typeParameters == nullptr) + copyTypeParameters(*s); + else + typeParameters->addInnerSizes(*s); + } + } + + const char* getBasicString() const + { + return TType::getBasicString(basicType); + } + + static const char* getBasicString(TBasicType t) + { + switch (t) { + case EbtFloat: return "float"; + case EbtInt: return "int"; + case EbtUint: return "uint"; + case EbtSampler: return "sampler/image"; +#ifndef GLSLANG_WEB + case EbtVoid: return "void"; + case EbtDouble: return "double"; + case EbtFloat16: return "float16_t"; + case EbtInt8: return "int8_t"; + case EbtUint8: return "uint8_t"; + case EbtInt16: return "int16_t"; + case EbtUint16: return "uint16_t"; + case EbtInt64: return "int64_t"; + case EbtUint64: return "uint64_t"; + case EbtBool: return "bool"; + case EbtAtomicUint: return "atomic_uint"; + case EbtStruct: return "structure"; + case EbtBlock: return "block"; + case EbtAccStruct: return "accelerationStructureNV"; + case EbtRayQuery: return "rayQueryEXT"; + case EbtReference: return "reference"; + case EbtString: return "string"; + case EbtSpirvType: return "spirv_type"; +#endif + default: return "unknown type"; + } + } + +#ifdef GLSLANG_WEB + TString getCompleteString() const { return ""; } + const char* getStorageQualifierString() const { return ""; } + const char* getBuiltInVariableString() const { return ""; } + const char* getPrecisionQualifierString() const { return ""; } + TString getBasicTypeString() const { return ""; } +#else + TString getCompleteString(bool syntactic = false, bool getQualifiers = true, bool getPrecision = true, + bool getType = true, TString name = "", TString structName = "") const + { + TString typeString; + + const auto appendStr = [&](const char* s) { typeString.append(s); }; + const auto appendUint = [&](unsigned int u) { typeString.append(std::to_string(u).c_str()); }; + const auto appendInt = [&](int i) { typeString.append(std::to_string(i).c_str()); }; + + if (getQualifiers) { + if (qualifier.hasSprivDecorate()) + appendStr(qualifier.getSpirvDecorateQualifierString().c_str()); + + if (qualifier.hasLayout()) { + // To reduce noise, skip this if the only layout is an xfb_buffer + // with no triggering xfb_offset. + TQualifier noXfbBuffer = qualifier; + noXfbBuffer.layoutXfbBuffer = TQualifier::layoutXfbBufferEnd; + if (noXfbBuffer.hasLayout()) { + appendStr("layout("); + if (qualifier.hasAnyLocation()) { + appendStr(" location="); + appendUint(qualifier.layoutLocation); + if (qualifier.hasComponent()) { + appendStr(" component="); + appendUint(qualifier.layoutComponent); + } + if (qualifier.hasIndex()) { + appendStr(" index="); + appendUint(qualifier.layoutIndex); + } + } + if (qualifier.hasSet()) { + appendStr(" set="); + appendUint(qualifier.layoutSet); + } + if (qualifier.hasBinding()) { + appendStr(" binding="); + appendUint(qualifier.layoutBinding); + } + if (qualifier.hasStream()) { + appendStr(" stream="); + appendUint(qualifier.layoutStream); + } + if (qualifier.hasMatrix()) { + appendStr(" "); + appendStr(TQualifier::getLayoutMatrixString(qualifier.layoutMatrix)); + } + if (qualifier.hasPacking()) { + appendStr(" "); + appendStr(TQualifier::getLayoutPackingString(qualifier.layoutPacking)); + } + if (qualifier.hasOffset()) { + appendStr(" offset="); + appendInt(qualifier.layoutOffset); + } + if (qualifier.hasAlign()) { + appendStr(" align="); + appendInt(qualifier.layoutAlign); + } + if (qualifier.hasFormat()) { + appendStr(" "); + appendStr(TQualifier::getLayoutFormatString(qualifier.layoutFormat)); + } + if (qualifier.hasXfbBuffer() && qualifier.hasXfbOffset()) { + appendStr(" xfb_buffer="); + appendUint(qualifier.layoutXfbBuffer); + } + if (qualifier.hasXfbOffset()) { + appendStr(" xfb_offset="); + appendUint(qualifier.layoutXfbOffset); + } + if (qualifier.hasXfbStride()) { + appendStr(" xfb_stride="); + appendUint(qualifier.layoutXfbStride); + } + if (qualifier.hasAttachment()) { + appendStr(" input_attachment_index="); + appendUint(qualifier.layoutAttachment); + } + if (qualifier.hasSpecConstantId()) { + appendStr(" constant_id="); + appendUint(qualifier.layoutSpecConstantId); + } + if (qualifier.layoutPushConstant) + appendStr(" push_constant"); + if (qualifier.layoutBufferReference) + appendStr(" buffer_reference"); + if (qualifier.hasBufferReferenceAlign()) { + appendStr(" buffer_reference_align="); + appendUint(1u << qualifier.layoutBufferReferenceAlign); + } + + if (qualifier.layoutPassthrough) + appendStr(" passthrough"); + if (qualifier.layoutViewportRelative) + appendStr(" layoutViewportRelative"); + if (qualifier.layoutSecondaryViewportRelativeOffset != -2048) { + appendStr(" layoutSecondaryViewportRelativeOffset="); + appendInt(qualifier.layoutSecondaryViewportRelativeOffset); + } + if (qualifier.layoutShaderRecord) + appendStr(" shaderRecordNV"); + + appendStr(")"); + } + } + + if (qualifier.invariant) + appendStr(" invariant"); + if (qualifier.noContraction) + appendStr(" noContraction"); + if (qualifier.centroid) + appendStr(" centroid"); + if (qualifier.smooth) + appendStr(" smooth"); + if (qualifier.flat) + appendStr(" flat"); + if (qualifier.nopersp) + appendStr(" noperspective"); + if (qualifier.explicitInterp) + appendStr(" __explicitInterpAMD"); + if (qualifier.pervertexNV) + appendStr(" pervertexNV"); + if (qualifier.pervertexEXT) + appendStr(" pervertexEXT"); + if (qualifier.perPrimitiveNV) + appendStr(" perprimitiveNV"); + if (qualifier.perViewNV) + appendStr(" perviewNV"); + if (qualifier.perTaskNV) + appendStr(" taskNV"); + if (qualifier.patch) + appendStr(" patch"); + if (qualifier.sample) + appendStr(" sample"); + if (qualifier.coherent) + appendStr(" coherent"); + if (qualifier.devicecoherent) + appendStr(" devicecoherent"); + if (qualifier.queuefamilycoherent) + appendStr(" queuefamilycoherent"); + if (qualifier.workgroupcoherent) + appendStr(" workgroupcoherent"); + if (qualifier.subgroupcoherent) + appendStr(" subgroupcoherent"); + if (qualifier.shadercallcoherent) + appendStr(" shadercallcoherent"); + if (qualifier.nonprivate) + appendStr(" nonprivate"); + if (qualifier.volatil) + appendStr(" volatile"); + if (qualifier.restrict) + appendStr(" restrict"); + if (qualifier.readonly) + appendStr(" readonly"); + if (qualifier.writeonly) + appendStr(" writeonly"); + if (qualifier.specConstant) + appendStr(" specialization-constant"); + if (qualifier.nonUniform) + appendStr(" nonuniform"); + if (qualifier.isNullInit()) + appendStr(" null-init"); + if (qualifier.isSpirvByReference()) + appendStr(" spirv_by_reference"); + if (qualifier.isSpirvLiteral()) + appendStr(" spirv_literal"); + appendStr(" "); + appendStr(getStorageQualifierString()); + } + if (getType) { + if (syntactic) { + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isVector() || isMatrix()) { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("d"); + break; + case EbtInt: + appendStr("i"); + break; + case EbtUint: + appendStr("u"); + break; + case EbtBool: + appendStr("b"); + break; + case EbtFloat: + default: + break; + } + if (isVector()) { + appendStr("vec"); + appendInt(vectorSize); + } else { + appendStr("mat"); + appendInt(matrixCols); + appendStr("x"); + appendInt(matrixRows); + } + } else if (isStruct() && structure) { + appendStr(" "); + appendStr(structName.c_str()); + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString(syntactic, getQualifiers, getPrecision, getType, (*structure)[i].type->getFieldName())); + hasHiddenMember = false; + } + } + appendStr("}"); + } else { + appendStr(" "); + switch (basicType) { + case EbtDouble: + appendStr("double"); + break; + case EbtInt: + appendStr("int"); + break; + case EbtUint: + appendStr("uint"); + break; + case EbtBool: + appendStr("bool"); + break; + case EbtFloat: + appendStr("float"); + break; + default: + appendStr("unexpected"); + break; + } + } + if (name.length() > 0) { + appendStr(" "); + appendStr(name.c_str()); + } + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr("[]"); + else { + if (size == UnsizedArraySize) { + appendStr("["); + if (i == 0) + appendInt(arraySizes->getImplicitSize()); + appendStr("]"); + } + else { + appendStr("["); + appendInt(arraySizes->getDimSize(i)); + appendStr("]"); + } + } + } + } + } + else { + if (isArray()) { + for (int i = 0; i < (int)arraySizes->getNumDims(); ++i) { + int size = arraySizes->getDimSize(i); + if (size == UnsizedArraySize && i == 0 && arraySizes->isVariablyIndexed()) + appendStr(" runtime-sized array of"); + else { + if (size == UnsizedArraySize) { + appendStr(" unsized"); + if (i == 0) { + appendStr(" "); + appendInt(arraySizes->getImplicitSize()); + } + } + else { + appendStr(" "); + appendInt(arraySizes->getDimSize(i)); + } + appendStr("-element array of"); + } + } + } + if (isParameterized()) { + appendStr("<"); + for (int i = 0; i < (int)typeParameters->getNumDims(); ++i) { + appendInt(typeParameters->getDimSize(i)); + if (i != (int)typeParameters->getNumDims() - 1) + appendStr(", "); + } + appendStr(">"); + } + if (getPrecision && qualifier.precision != EpqNone) { + appendStr(" "); + appendStr(getPrecisionQualifierString()); + } + if (isMatrix()) { + appendStr(" "); + appendInt(matrixCols); + appendStr("X"); + appendInt(matrixRows); + appendStr(" matrix of"); + } + else if (isVector()) { + appendStr(" "); + appendInt(vectorSize); + appendStr("-component vector of"); + } + + appendStr(" "); + typeString.append(getBasicTypeString()); + + if (qualifier.builtIn != EbvNone) { + appendStr(" "); + appendStr(getBuiltInVariableString()); + } + + // Add struct/block members + if (isStruct() && structure) { + appendStr("{"); + bool hasHiddenMember = true; + for (size_t i = 0; i < structure->size(); ++i) { + if (!(*structure)[i].type->hiddenMember()) { + if (!hasHiddenMember) + appendStr(", "); + typeString.append((*structure)[i].type->getCompleteString()); + typeString.append(" "); + typeString.append((*structure)[i].type->getFieldName()); + hasHiddenMember = false; + } + } + appendStr("}"); + } + } + } + + return typeString; + } + + TString getBasicTypeString() const + { + if (basicType == EbtSampler) + return sampler.getString(); + else + return getBasicString(); + } + + const char* getStorageQualifierString() const { return GetStorageQualifierString(qualifier.storage); } + const char* getBuiltInVariableString() const { return GetBuiltInVariableString(qualifier.builtIn); } + const char* getPrecisionQualifierString() const { return GetPrecisionQualifierString(qualifier.precision); } +#endif + + const TTypeList* getStruct() const { assert(isStruct()); return structure; } + void setStruct(TTypeList* s) { assert(isStruct()); structure = s; } + TTypeList* getWritableStruct() const { assert(isStruct()); return structure; } // This should only be used when known to not be sharing with other threads + void setBasicType(const TBasicType& t) { basicType = t; } + + int computeNumComponents() const + { + int components = 0; + + if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) { + for (TTypeList::const_iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) + components += ((*tl).type)->computeNumComponents(); + } else if (matrixCols) + components = matrixCols * matrixRows; + else + components = vectorSize; + + if (arraySizes != nullptr) { + components *= arraySizes->getCumulativeSize(); + } + + return components; + } + + // append this type's mangled name to the passed in 'name' + void appendMangledName(TString& name) const + { + buildMangledName(name); + name += ';' ; + } + + // These variables are inconsistently declared inside and outside of gl_PerVertex in glslang right now. + // They are declared inside of 'in gl_PerVertex', but sitting as standalone when they are 'out'puts. + bool isInconsistentGLPerVertexMember(const TString& name) const + { + if (name == "gl_SecondaryPositionNV" || + name == "gl_PositionPerViewNV") + return true; + return false; + } + + + // Do two structure types match? They could be declared independently, + // in different places, but still might satisfy the definition of matching. + // From the spec: + // + // "Structures must have the same name, sequence of type names, and + // type definitions, and member names to be considered the same type. + // This rule applies recursively for nested or embedded types." + // + // If type mismatch in structure, return member indices through lpidx and rpidx. + // If matching members for either block are exhausted, return -1 for exhausted + // block and the index of the unmatched member. Otherwise return {-1,-1}. + // + bool sameStructType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + // Initialize error to general type mismatch. + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + + // Most commonly, they are both nullptr, or the same pointer to the same actual structure + // TODO: Why return true when neither types are structures? + if ((!isStruct() && !right.isStruct()) || + (isStruct() && right.isStruct() && structure == right.structure)) + return true; + + if (!isStruct() || !right.isStruct()) + return false; + + // Structure names have to match + if (*typeName != *right.typeName) + return false; + + // There are inconsistencies with how gl_PerVertex is setup. For now ignore those as errors if they + // are known inconsistencies. + bool isGLPerVertex = *typeName == "gl_PerVertex"; + + // Both being nullptr was caught above, now they both have to be structures of the same number of elements + if (lpidx == nullptr && + (structure->size() != right.structure->size() && !isGLPerVertex)) { + return false; + } + + // Compare the names and types of all the members, which have to match + for (size_t li = 0, ri = 0; li < structure->size() || ri < right.structure->size(); ++li, ++ri) { + if (lpidx != nullptr) { + *lpidx = static_cast(li); + *rpidx = static_cast(ri); + } + if (li < structure->size() && ri < right.structure->size()) { + if ((*structure)[li].type->getFieldName() == (*right.structure)[ri].type->getFieldName()) { + if (*(*structure)[li].type != *(*right.structure)[ri].type) + return false; + } else { + // Skip hidden members + if ((*structure)[li].type->hiddenMember()) { + ri--; + continue; + } else if ((*right.structure)[ri].type->hiddenMember()) { + li--; + continue; + } + // If one of the members is something that's inconsistently declared, skip over it + // for now. + if (isGLPerVertex) { + if (isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + ri--; + continue; + } else if (isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + li--; + continue; + } + } else { + return false; + } + } + // If we get here, then there should only be inconsistently declared members left + } else if (li < structure->size()) { + if (!(*structure)[li].type->hiddenMember() && !isInconsistentGLPerVertexMember((*structure)[li].type->getFieldName())) { + if (lpidx != nullptr) { + *rpidx = -1; + } + return false; + } + } else { + if (!(*right.structure)[ri].type->hiddenMember() && !isInconsistentGLPerVertexMember((*right.structure)[ri].type->getFieldName())) { + if (lpidx != nullptr) { + *lpidx = -1; + } + return false; + } + } + } + + return true; + } + + bool sameReferenceType(const TType& right) const + { + if (isReference() != right.isReference()) + return false; + + if (!isReference() && !right.isReference()) + return true; + + assert(referentType != nullptr); + assert(right.referentType != nullptr); + + if (referentType == right.referentType) + return true; + + return *referentType == *right.referentType; + } + + // See if two types match, in all aspects except arrayness + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementType(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return basicType == right.basicType && sameElementShape(right, lpidx, rpidx); + } + + // See if two type's arrayness match + bool sameArrayness(const TType& right) const + { + return ((arraySizes == nullptr && right.arraySizes == nullptr) || + (arraySizes != nullptr && right.arraySizes != nullptr && *arraySizes == *right.arraySizes)); + } + + // See if two type's arrayness match in everything except their outer dimension + bool sameInnerArrayness(const TType& right) const + { + assert(arraySizes != nullptr && right.arraySizes != nullptr); + return arraySizes->sameInnerArrayness(*right.arraySizes); + } + + // See if two type's parameters match + bool sameTypeParameters(const TType& right) const + { + return ((typeParameters == nullptr && right.typeParameters == nullptr) || + (typeParameters != nullptr && right.typeParameters != nullptr && *typeParameters == *right.typeParameters)); + } + +#ifndef GLSLANG_WEB + // See if two type's SPIR-V type contents match + bool sameSpirvType(const TType& right) const + { + return ((spirvType == nullptr && right.spirvType == nullptr) || + (spirvType != nullptr && right.spirvType != nullptr && *spirvType == *right.spirvType)); + } +#endif + + // See if two type's elements match in all ways except basic type + // If mismatch in structure members, return member indices in lpidx and rpidx. + bool sameElementShape(const TType& right, int* lpidx = nullptr, int* rpidx = nullptr) const + { + if (lpidx != nullptr) { + *lpidx = -1; + *rpidx = -1; + } + return ((basicType != EbtSampler && right.basicType != EbtSampler) || sampler == right.sampler) && + vectorSize == right.vectorSize && + matrixCols == right.matrixCols && + matrixRows == right.matrixRows && + vector1 == right.vector1 && + isCoopMat() == right.isCoopMat() && + sameStructType(right, lpidx, rpidx) && + sameReferenceType(right); + } + + // See if a cooperative matrix type parameter with unspecified parameters is + // an OK function parameter + bool coopMatParameterOK(const TType& right) const + { + return isCoopMat() && right.isCoopMat() && (getBasicType() == right.getBasicType()) && + typeParameters == nullptr && right.typeParameters != nullptr; + } + + bool sameCoopMatBaseType(const TType &right) const { + bool rv = coopmat && right.coopmat; + if (getBasicType() == EbtFloat || getBasicType() == EbtFloat16) + rv = right.getBasicType() == EbtFloat || right.getBasicType() == EbtFloat16; + else if (getBasicType() == EbtUint || getBasicType() == EbtUint8) + rv = right.getBasicType() == EbtUint || right.getBasicType() == EbtUint8; + else if (getBasicType() == EbtInt || getBasicType() == EbtInt8) + rv = right.getBasicType() == EbtInt || right.getBasicType() == EbtInt8; + else + rv = false; + return rv; + } + + + // See if two types match in all ways (just the actual type, not qualification) + bool operator==(const TType& right) const + { +#ifndef GLSLANG_WEB + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right) && sameSpirvType(right); +#else + return sameElementType(right) && sameArrayness(right) && sameTypeParameters(right); +#endif + } + + bool operator!=(const TType& right) const + { + return ! operator==(right); + } + + unsigned int getBufferReferenceAlignment() const + { +#ifndef GLSLANG_WEB + if (getBasicType() == glslang::EbtReference) { + return getReferentType()->getQualifier().hasBufferReferenceAlign() ? + (1u << getReferentType()->getQualifier().layoutBufferReferenceAlign) : 16u; + } +#endif + return 0; + } + +#ifndef GLSLANG_WEB + const TSpirvType& getSpirvType() const { assert(spirvType); return *spirvType; } +#endif + +protected: + // Require consumer to pick between deep copy and shallow copy. + TType(const TType& type); + TType& operator=(const TType& type); + + // Recursively copy a type graph, while preserving the graph-like + // quality. That is, don't make more than one copy of a structure that + // gets reused multiple times in the type graph. + void deepCopy(const TType& copyOf, TMap& copiedMap) + { + shallowCopy(copyOf); + +#ifndef GLSLANG_WEB + // GL_EXT_spirv_intrinsics + if (copyOf.qualifier.spirvDecorate) { + qualifier.spirvDecorate = new TSpirvDecorate; + *qualifier.spirvDecorate = *copyOf.qualifier.spirvDecorate; + } + + if (copyOf.spirvType) { + spirvType = new TSpirvType; + *spirvType = *copyOf.spirvType; + } +#endif + + if (copyOf.arraySizes) { + arraySizes = new TArraySizes; + *arraySizes = *copyOf.arraySizes; + } + + if (copyOf.typeParameters) { + typeParameters = new TArraySizes; + *typeParameters = *copyOf.typeParameters; + } + + if (copyOf.isStruct() && copyOf.structure) { + auto prevCopy = copiedMap.find(copyOf.structure); + if (prevCopy != copiedMap.end()) + structure = prevCopy->second; + else { + structure = new TTypeList; + copiedMap[copyOf.structure] = structure; + for (unsigned int i = 0; i < copyOf.structure->size(); ++i) { + TTypeLoc typeLoc; + typeLoc.loc = (*copyOf.structure)[i].loc; + typeLoc.type = new TType(); + typeLoc.type->deepCopy(*(*copyOf.structure)[i].type, copiedMap); + structure->push_back(typeLoc); + } + } + } + + if (copyOf.fieldName) + fieldName = NewPoolTString(copyOf.fieldName->c_str()); + if (copyOf.typeName) + typeName = NewPoolTString(copyOf.typeName->c_str()); + } + + + void buildMangledName(TString&) const; + + TBasicType basicType : 8; + int vectorSize : 4; // 1 means either scalar or 1-component vector; see vector1 to disambiguate. + int matrixCols : 4; + int matrixRows : 4; + bool vector1 : 1; // Backward-compatible tracking of a 1-component vector distinguished from a scalar. + // GLSL 4.5 never has a 1-component vector; so this will always be false until such + // functionality is added. + // HLSL does have a 1-component vectors, so this will be true to disambiguate + // from a scalar. + bool coopmat : 1; + TQualifier qualifier; + + TArraySizes* arraySizes; // nullptr unless an array; can be shared across types + // A type can't be both a structure (EbtStruct/EbtBlock) and a reference (EbtReference), so + // conserve space by making these a union + union { + TTypeList* structure; // invalid unless this is a struct; can be shared across types + TType *referentType; // invalid unless this is an EbtReference + }; + TString *fieldName; // for structure field names + TString *typeName; // for structure type name + TSampler sampler; + TArraySizes* typeParameters;// nullptr unless a parameterized type; can be shared across types +#ifndef GLSLANG_WEB + TSpirvType* spirvType; // SPIR-V type defined by spirv_type directive +#endif +}; + +} // end namespace glslang + +#endif // _TYPES_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/arrays.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/arrays.h new file mode 100644 index 0000000..7f047d9 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/arrays.h @@ -0,0 +1,341 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Implement types for tracking GLSL arrays, arrays of arrays, etc. +// + +#ifndef _ARRAYS_INCLUDED +#define _ARRAYS_INCLUDED + +#include + +namespace glslang { + +// This is used to mean there is no size yet (unsized), it is waiting to get a size from somewhere else. +const int UnsizedArraySize = 0; + +class TIntermTyped; +extern bool SameSpecializationConstants(TIntermTyped*, TIntermTyped*); + +// Specialization constants need both a nominal size and a node that defines +// the specialization constant being used. Array types are the same when their +// size and specialization constant nodes are the same. +struct TArraySize { + unsigned int size; + TIntermTyped* node; // nullptr means no specialization constant node + bool operator==(const TArraySize& rhs) const + { + if (size != rhs.size) + return false; + if (node == nullptr || rhs.node == nullptr) + return node == rhs.node; + + return SameSpecializationConstants(node, rhs.node); + } +}; + +// +// TSmallArrayVector is used as the container for the set of sizes in TArraySizes. +// It has generic-container semantics, while TArraySizes has array-of-array semantics. +// That is, TSmallArrayVector should be more focused on mechanism and TArraySizes on policy. +// +struct TSmallArrayVector { + // + // TODO: memory: TSmallArrayVector is intended to be smaller. + // Almost all arrays could be handled by two sizes each fitting + // in 16 bits, needing a real vector only in the cases where there + // are more than 3 sizes or a size needing more than 16 bits. + // + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TSmallArrayVector() : sizes(nullptr) { } + virtual ~TSmallArrayVector() { dealloc(); } + + // For breaking into two non-shared copies, independently modifiable. + TSmallArrayVector& operator=(const TSmallArrayVector& from) + { + if (from.sizes == nullptr) + sizes = nullptr; + else { + alloc(); + *sizes = *from.sizes; + } + + return *this; + } + + int size() const + { + if (sizes == nullptr) + return 0; + return (int)sizes->size(); + } + + unsigned int frontSize() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().size; + } + + TIntermTyped* frontNode() const + { + assert(sizes != nullptr && sizes->size() > 0); + return sizes->front().node; + } + + void changeFront(unsigned int s) + { + assert(sizes != nullptr); + // this should only happen for implicitly sized arrays, not specialization constants + assert(sizes->front().node == nullptr); + sizes->front().size = s; + } + + void push_back(unsigned int e, TIntermTyped* n) + { + alloc(); + TArraySize pair = { e, n }; + sizes->push_back(pair); + } + + void push_back(const TSmallArrayVector& newDims) + { + alloc(); + sizes->insert(sizes->end(), newDims.sizes->begin(), newDims.sizes->end()); + } + + void pop_front() + { + assert(sizes != nullptr && sizes->size() > 0); + if (sizes->size() == 1) + dealloc(); + else + sizes->erase(sizes->begin()); + } + + // 'this' should currently not be holding anything, and copyNonFront + // will make it hold a copy of all but the first element of rhs. + // (This would be useful for making a type that is dereferenced by + // one dimension.) + void copyNonFront(const TSmallArrayVector& rhs) + { + assert(sizes == nullptr); + if (rhs.size() > 1) { + alloc(); + sizes->insert(sizes->begin(), rhs.sizes->begin() + 1, rhs.sizes->end()); + } + } + + unsigned int getDimSize(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].size; + } + + void setDimSize(int i, unsigned int size) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + assert((*sizes)[i].node == nullptr); + (*sizes)[i].size = size; + } + + TIntermTyped* getDimNode(int i) const + { + assert(sizes != nullptr && (int)sizes->size() > i); + return (*sizes)[i].node; + } + + bool operator==(const TSmallArrayVector& rhs) const + { + if (sizes == nullptr && rhs.sizes == nullptr) + return true; + if (sizes == nullptr || rhs.sizes == nullptr) + return false; + return *sizes == *rhs.sizes; + } + bool operator!=(const TSmallArrayVector& rhs) const { return ! operator==(rhs); } + +protected: + TSmallArrayVector(const TSmallArrayVector&); + + void alloc() + { + if (sizes == nullptr) + sizes = new TVector; + } + void dealloc() + { + delete sizes; + sizes = nullptr; + } + + TVector* sizes; // will either hold such a pointer, or in the future, hold the two array sizes +}; + +// +// Represent an array, or array of arrays, to arbitrary depth. This is not +// done through a hierarchy of types in a type tree, rather all contiguous arrayness +// in the type hierarchy is localized into this single cumulative object. +// +// The arrayness in TTtype is a pointer, so that it can be non-allocated and zero +// for the vast majority of types that are non-array types. +// +// Order Policy: these are all identical: +// - left to right order within a contiguous set of ...[..][..][..]... in the source language +// - index order 0, 1, 2, ... within the 'sizes' member below +// - outer-most to inner-most +// +struct TArraySizes { + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + + TArraySizes() : implicitArraySize(1), variablyIndexed(false) { } + + // For breaking into two non-shared copies, independently modifiable. + TArraySizes& operator=(const TArraySizes& from) + { + implicitArraySize = from.implicitArraySize; + variablyIndexed = from.variablyIndexed; + sizes = from.sizes; + + return *this; + } + + // translate from array-of-array semantics to container semantics + int getNumDims() const { return sizes.size(); } + int getDimSize(int dim) const { return sizes.getDimSize(dim); } + TIntermTyped* getDimNode(int dim) const { return sizes.getDimNode(dim); } + void setDimSize(int dim, int size) { sizes.setDimSize(dim, size); } + int getOuterSize() const { return sizes.frontSize(); } + TIntermTyped* getOuterNode() const { return sizes.frontNode(); } + int getCumulativeSize() const + { + int size = 1; + for (int d = 0; d < sizes.size(); ++d) { + // this only makes sense in paths that have a known array size + assert(sizes.getDimSize(d) != UnsizedArraySize); + size *= sizes.getDimSize(d); + } + return size; + } + void addInnerSize() { addInnerSize((unsigned)UnsizedArraySize); } + void addInnerSize(int s) { addInnerSize((unsigned)s, nullptr); } + void addInnerSize(int s, TIntermTyped* n) { sizes.push_back((unsigned)s, n); } + void addInnerSize(TArraySize pair) { + sizes.push_back(pair.size, pair.node); + } + void addInnerSizes(const TArraySizes& s) { sizes.push_back(s.sizes); } + void changeOuterSize(int s) { sizes.changeFront((unsigned)s); } + int getImplicitSize() const { return implicitArraySize; } + void updateImplicitSize(int s) { implicitArraySize = std::max(implicitArraySize, s); } + bool isInnerUnsized() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + return true; + } + + return false; + } + bool clearInnerUnsized() + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) + setDimSize(d, 1); + } + + return false; + } + bool isInnerSpecialization() const + { + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimNode(d) != nullptr) + return true; + } + + return false; + } + bool isOuterSpecialization() + { + return sizes.getDimNode(0) != nullptr; + } + + bool hasUnsized() const { return getOuterSize() == UnsizedArraySize || isInnerUnsized(); } + bool isSized() const { return getOuterSize() != UnsizedArraySize; } + void dereference() { sizes.pop_front(); } + void copyDereferenced(const TArraySizes& rhs) + { + assert(sizes.size() == 0); + if (rhs.sizes.size() > 1) + sizes.copyNonFront(rhs.sizes); + } + + bool sameInnerArrayness(const TArraySizes& rhs) const + { + if (sizes.size() != rhs.sizes.size()) + return false; + + for (int d = 1; d < sizes.size(); ++d) { + if (sizes.getDimSize(d) != rhs.sizes.getDimSize(d) || + sizes.getDimNode(d) != rhs.sizes.getDimNode(d)) + return false; + } + + return true; + } + + void setVariablyIndexed() { variablyIndexed = true; } + bool isVariablyIndexed() const { return variablyIndexed; } + + bool operator==(const TArraySizes& rhs) const { return sizes == rhs.sizes; } + bool operator!=(const TArraySizes& rhs) const { return sizes != rhs.sizes; } + +protected: + TSmallArrayVector sizes; + + TArraySizes(const TArraySizes&); + + // For tracking maximum referenced compile-time constant index. + // Applies only to the outer-most dimension. Potentially becomes + // the implicit size of the array, if not variably indexed and + // otherwise legal. + int implicitArraySize; + bool variablyIndexed; // true if array is indexed with a non compile-time constant +}; + +} // end namespace glslang + +#endif // _ARRAYS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/glslang_c_interface.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/glslang_c_interface.h new file mode 100644 index 0000000..f540f26 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/glslang_c_interface.h @@ -0,0 +1,279 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef GLSLANG_C_IFACE_H_INCLUDED +#define GLSLANG_C_IFACE_H_INCLUDED + +#include +#include + +#include "glslang_c_shader_types.h" + +typedef struct glslang_shader_s glslang_shader_t; +typedef struct glslang_program_s glslang_program_t; + +/* TLimits counterpart */ +typedef struct glslang_limits_s { + bool non_inductive_for_loops; + bool while_loops; + bool do_while_loops; + bool general_uniform_indexing; + bool general_attribute_matrix_vector_indexing; + bool general_varying_indexing; + bool general_sampler_indexing; + bool general_variable_indexing; + bool general_constant_matrix_vector_indexing; +} glslang_limits_t; + +/* TBuiltInResource counterpart */ +typedef struct glslang_resource_s { + int max_lights; + int max_clip_planes; + int max_texture_units; + int max_texture_coords; + int max_vertex_attribs; + int max_vertex_uniform_components; + int max_varying_floats; + int max_vertex_texture_image_units; + int max_combined_texture_image_units; + int max_texture_image_units; + int max_fragment_uniform_components; + int max_draw_buffers; + int max_vertex_uniform_vectors; + int max_varying_vectors; + int max_fragment_uniform_vectors; + int max_vertex_output_vectors; + int max_fragment_input_vectors; + int min_program_texel_offset; + int max_program_texel_offset; + int max_clip_distances; + int max_compute_work_group_count_x; + int max_compute_work_group_count_y; + int max_compute_work_group_count_z; + int max_compute_work_group_size_x; + int max_compute_work_group_size_y; + int max_compute_work_group_size_z; + int max_compute_uniform_components; + int max_compute_texture_image_units; + int max_compute_image_uniforms; + int max_compute_atomic_counters; + int max_compute_atomic_counter_buffers; + int max_varying_components; + int max_vertex_output_components; + int max_geometry_input_components; + int max_geometry_output_components; + int max_fragment_input_components; + int max_image_units; + int max_combined_image_units_and_fragment_outputs; + int max_combined_shader_output_resources; + int max_image_samples; + int max_vertex_image_uniforms; + int max_tess_control_image_uniforms; + int max_tess_evaluation_image_uniforms; + int max_geometry_image_uniforms; + int max_fragment_image_uniforms; + int max_combined_image_uniforms; + int max_geometry_texture_image_units; + int max_geometry_output_vertices; + int max_geometry_total_output_components; + int max_geometry_uniform_components; + int max_geometry_varying_components; + int max_tess_control_input_components; + int max_tess_control_output_components; + int max_tess_control_texture_image_units; + int max_tess_control_uniform_components; + int max_tess_control_total_output_components; + int max_tess_evaluation_input_components; + int max_tess_evaluation_output_components; + int max_tess_evaluation_texture_image_units; + int max_tess_evaluation_uniform_components; + int max_tess_patch_components; + int max_patch_vertices; + int max_tess_gen_level; + int max_viewports; + int max_vertex_atomic_counters; + int max_tess_control_atomic_counters; + int max_tess_evaluation_atomic_counters; + int max_geometry_atomic_counters; + int max_fragment_atomic_counters; + int max_combined_atomic_counters; + int max_atomic_counter_bindings; + int max_vertex_atomic_counter_buffers; + int max_tess_control_atomic_counter_buffers; + int max_tess_evaluation_atomic_counter_buffers; + int max_geometry_atomic_counter_buffers; + int max_fragment_atomic_counter_buffers; + int max_combined_atomic_counter_buffers; + int max_atomic_counter_buffer_size; + int max_transform_feedback_buffers; + int max_transform_feedback_interleaved_components; + int max_cull_distances; + int max_combined_clip_and_cull_distances; + int max_samples; + int max_mesh_output_vertices_nv; + int max_mesh_output_primitives_nv; + int max_mesh_work_group_size_x_nv; + int max_mesh_work_group_size_y_nv; + int max_mesh_work_group_size_z_nv; + int max_task_work_group_size_x_nv; + int max_task_work_group_size_y_nv; + int max_task_work_group_size_z_nv; + int max_mesh_view_count_nv; + int max_mesh_output_vertices_ext; + int max_mesh_output_primitives_ext; + int max_mesh_work_group_size_x_ext; + int max_mesh_work_group_size_y_ext; + int max_mesh_work_group_size_z_ext; + int max_task_work_group_size_x_ext; + int max_task_work_group_size_y_ext; + int max_task_work_group_size_z_ext; + int max_mesh_view_count_ext; + int maxDualSourceDrawBuffersEXT; + + glslang_limits_t limits; +} glslang_resource_t; + +typedef struct glslang_input_s { + glslang_source_t language; + glslang_stage_t stage; + glslang_client_t client; + glslang_target_client_version_t client_version; + glslang_target_language_t target_language; + glslang_target_language_version_t target_language_version; + /** Shader source code */ + const char* code; + int default_version; + glslang_profile_t default_profile; + int force_default_version_and_profile; + int forward_compatible; + glslang_messages_t messages; + const glslang_resource_t* resource; +} glslang_input_t; + +/* Inclusion result structure allocated by C include_local/include_system callbacks */ +typedef struct glsl_include_result_s { + /* Header file name or NULL if inclusion failed */ + const char* header_name; + + /* Header contents or NULL */ + const char* header_data; + size_t header_length; + +} glsl_include_result_t; + +/* Callback for local file inclusion */ +typedef glsl_include_result_t* (*glsl_include_local_func)(void* ctx, const char* header_name, const char* includer_name, + size_t include_depth); + +/* Callback for system file inclusion */ +typedef glsl_include_result_t* (*glsl_include_system_func)(void* ctx, const char* header_name, + const char* includer_name, size_t include_depth); + +/* Callback for include result destruction */ +typedef int (*glsl_free_include_result_func)(void* ctx, glsl_include_result_t* result); + +/* Collection of callbacks for GLSL preprocessor */ +typedef struct glsl_include_callbacks_s { + glsl_include_system_func include_system; + glsl_include_local_func include_local; + glsl_free_include_result_func free_include_result; +} glsl_include_callbacks_t; + +/* SpvOptions counterpart */ +typedef struct glslang_spv_options_s { + bool generate_debug_info; + bool strip_debug_info; + bool disable_optimizer; + bool optimize_size; + bool disassemble; + bool validate; + bool emit_nonsemantic_shader_debug_info; + bool emit_nonsemantic_shader_debug_source; +} glslang_spv_options_t; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +GLSLANG_EXPORT int glslang_initialize_process(); +GLSLANG_EXPORT void glslang_finalize_process(); + +GLSLANG_EXPORT glslang_shader_t* glslang_shader_create(const glslang_input_t* input); +GLSLANG_EXPORT void glslang_shader_delete(glslang_shader_t* shader); +GLSLANG_EXPORT void glslang_shader_set_preamble(glslang_shader_t* shader, const char* s); +GLSLANG_EXPORT void glslang_shader_shift_binding(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base); +GLSLANG_EXPORT void glslang_shader_shift_binding_for_set(glslang_shader_t* shader, glslang_resource_type_t res, unsigned int base, unsigned int set); +GLSLANG_EXPORT void glslang_shader_set_options(glslang_shader_t* shader, int options); // glslang_shader_options_t +GLSLANG_EXPORT void glslang_shader_set_glsl_version(glslang_shader_t* shader, int version); +GLSLANG_EXPORT int glslang_shader_preprocess(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT int glslang_shader_parse(glslang_shader_t* shader, const glslang_input_t* input); +GLSLANG_EXPORT const char* glslang_shader_get_preprocessed_code(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_log(glslang_shader_t* shader); +GLSLANG_EXPORT const char* glslang_shader_get_info_debug_log(glslang_shader_t* shader); + +GLSLANG_EXPORT glslang_program_t* glslang_program_create(); +GLSLANG_EXPORT void glslang_program_delete(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_add_shader(glslang_program_t* program, glslang_shader_t* shader); +GLSLANG_EXPORT int glslang_program_link(glslang_program_t* program, int messages); // glslang_messages_t +GLSLANG_EXPORT void glslang_program_add_source_text(glslang_program_t* program, glslang_stage_t stage, const char* text, size_t len); +GLSLANG_EXPORT void glslang_program_set_source_file(glslang_program_t* program, glslang_stage_t stage, const char* file); +GLSLANG_EXPORT int glslang_program_map_io(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_generate(glslang_program_t* program, glslang_stage_t stage); +GLSLANG_EXPORT void glslang_program_SPIRV_generate_with_options(glslang_program_t* program, glslang_stage_t stage, glslang_spv_options_t* spv_options); +GLSLANG_EXPORT size_t glslang_program_SPIRV_get_size(glslang_program_t* program); +GLSLANG_EXPORT void glslang_program_SPIRV_get(glslang_program_t* program, unsigned int*); +GLSLANG_EXPORT unsigned int* glslang_program_SPIRV_get_ptr(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_SPIRV_get_messages(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_log(glslang_program_t* program); +GLSLANG_EXPORT const char* glslang_program_get_info_debug_log(glslang_program_t* program); + +#ifdef __cplusplus +} +#endif + +#endif /* #ifdef GLSLANG_C_IFACE_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/glslang_c_shader_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/glslang_c_shader_types.h new file mode 100644 index 0000000..9bc2114 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/glslang_c_shader_types.h @@ -0,0 +1,227 @@ +/** + This code is based on the glslang_c_interface implementation by Viktor Latypov +**/ + +/** +BSD 2-Clause License + +Copyright (c) 2019, Viktor Latypov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef C_SHADER_TYPES_H_INCLUDED +#define C_SHADER_TYPES_H_INCLUDED + +#define LAST_ELEMENT_MARKER(x) x + +/* EShLanguage counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX, + GLSLANG_STAGE_TESSCONTROL, + GLSLANG_STAGE_TESSEVALUATION, + GLSLANG_STAGE_GEOMETRY, + GLSLANG_STAGE_FRAGMENT, + GLSLANG_STAGE_COMPUTE, + GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_RAYGEN_NV = GLSLANG_STAGE_RAYGEN, + GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_INTERSECT_NV = GLSLANG_STAGE_INTERSECT, + GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_ANYHIT_NV = GLSLANG_STAGE_ANYHIT, + GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_CLOSESTHIT_NV = GLSLANG_STAGE_CLOSESTHIT, + GLSLANG_STAGE_MISS, + GLSLANG_STAGE_MISS_NV = GLSLANG_STAGE_MISS, + GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_CALLABLE_NV = GLSLANG_STAGE_CALLABLE, + GLSLANG_STAGE_TASK, + GLSLANG_STAGE_TASK_NV = GLSLANG_STAGE_TASK, + GLSLANG_STAGE_MESH, + GLSLANG_STAGE_MESH_NV = GLSLANG_STAGE_MESH, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_COUNT), +} glslang_stage_t; // would be better as stage, but this is ancient now + +/* EShLanguageMask counterpart */ +typedef enum { + GLSLANG_STAGE_VERTEX_MASK = (1 << GLSLANG_STAGE_VERTEX), + GLSLANG_STAGE_TESSCONTROL_MASK = (1 << GLSLANG_STAGE_TESSCONTROL), + GLSLANG_STAGE_TESSEVALUATION_MASK = (1 << GLSLANG_STAGE_TESSEVALUATION), + GLSLANG_STAGE_GEOMETRY_MASK = (1 << GLSLANG_STAGE_GEOMETRY), + GLSLANG_STAGE_FRAGMENT_MASK = (1 << GLSLANG_STAGE_FRAGMENT), + GLSLANG_STAGE_COMPUTE_MASK = (1 << GLSLANG_STAGE_COMPUTE), + GLSLANG_STAGE_RAYGEN_MASK = (1 << GLSLANG_STAGE_RAYGEN), + GLSLANG_STAGE_RAYGEN_NV_MASK = GLSLANG_STAGE_RAYGEN_MASK, + GLSLANG_STAGE_INTERSECT_MASK = (1 << GLSLANG_STAGE_INTERSECT), + GLSLANG_STAGE_INTERSECT_NV_MASK = GLSLANG_STAGE_INTERSECT_MASK, + GLSLANG_STAGE_ANYHIT_MASK = (1 << GLSLANG_STAGE_ANYHIT), + GLSLANG_STAGE_ANYHIT_NV_MASK = GLSLANG_STAGE_ANYHIT_MASK, + GLSLANG_STAGE_CLOSESTHIT_MASK = (1 << GLSLANG_STAGE_CLOSESTHIT), + GLSLANG_STAGE_CLOSESTHIT_NV_MASK = GLSLANG_STAGE_CLOSESTHIT_MASK, + GLSLANG_STAGE_MISS_MASK = (1 << GLSLANG_STAGE_MISS), + GLSLANG_STAGE_MISS_NV_MASK = GLSLANG_STAGE_MISS_MASK, + GLSLANG_STAGE_CALLABLE_MASK = (1 << GLSLANG_STAGE_CALLABLE), + GLSLANG_STAGE_CALLABLE_NV_MASK = GLSLANG_STAGE_CALLABLE_MASK, + GLSLANG_STAGE_TASK_MASK = (1 << GLSLANG_STAGE_TASK), + GLSLANG_STAGE_TASK_NV_MASK = GLSLANG_STAGE_TASK_MASK, + GLSLANG_STAGE_MESH_MASK = (1 << GLSLANG_STAGE_MESH), + GLSLANG_STAGE_MESH_NV_MASK = GLSLANG_STAGE_MESH_MASK, + LAST_ELEMENT_MARKER(GLSLANG_STAGE_MASK_COUNT), +} glslang_stage_mask_t; + +/* EShSource counterpart */ +typedef enum { + GLSLANG_SOURCE_NONE, + GLSLANG_SOURCE_GLSL, + GLSLANG_SOURCE_HLSL, + LAST_ELEMENT_MARKER(GLSLANG_SOURCE_COUNT), +} glslang_source_t; + +/* EShClient counterpart */ +typedef enum { + GLSLANG_CLIENT_NONE, + GLSLANG_CLIENT_VULKAN, + GLSLANG_CLIENT_OPENGL, + LAST_ELEMENT_MARKER(GLSLANG_CLIENT_COUNT), +} glslang_client_t; + +/* EShTargetLanguage counterpart */ +typedef enum { + GLSLANG_TARGET_NONE, + GLSLANG_TARGET_SPV, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_COUNT), +} glslang_target_language_t; + +/* SH_TARGET_ClientVersion counterpart */ +typedef enum { + GLSLANG_TARGET_VULKAN_1_0 = (1 << 22), + GLSLANG_TARGET_VULKAN_1_1 = (1 << 22) | (1 << 12), + GLSLANG_TARGET_VULKAN_1_2 = (1 << 22) | (2 << 12), + GLSLANG_TARGET_VULKAN_1_3 = (1 << 22) | (3 << 12), + GLSLANG_TARGET_OPENGL_450 = 450, + LAST_ELEMENT_MARKER(GLSLANG_TARGET_CLIENT_VERSION_COUNT = 5), +} glslang_target_client_version_t; + +/* SH_TARGET_LanguageVersion counterpart */ +typedef enum { + GLSLANG_TARGET_SPV_1_0 = (1 << 16), + GLSLANG_TARGET_SPV_1_1 = (1 << 16) | (1 << 8), + GLSLANG_TARGET_SPV_1_2 = (1 << 16) | (2 << 8), + GLSLANG_TARGET_SPV_1_3 = (1 << 16) | (3 << 8), + GLSLANG_TARGET_SPV_1_4 = (1 << 16) | (4 << 8), + GLSLANG_TARGET_SPV_1_5 = (1 << 16) | (5 << 8), + GLSLANG_TARGET_SPV_1_6 = (1 << 16) | (6 << 8), + LAST_ELEMENT_MARKER(GLSLANG_TARGET_LANGUAGE_VERSION_COUNT = 7), +} glslang_target_language_version_t; + +/* EShExecutable counterpart */ +typedef enum { GLSLANG_EX_VERTEX_FRAGMENT, GLSLANG_EX_FRAGMENT } glslang_executable_t; + +// EShOptimizationLevel counterpart +// This enum is not used in the current C interface, but could be added at a later date. +// GLSLANG_OPT_NONE is the current default. +typedef enum { + GLSLANG_OPT_NO_GENERATION, + GLSLANG_OPT_NONE, + GLSLANG_OPT_SIMPLE, + GLSLANG_OPT_FULL, + LAST_ELEMENT_MARKER(GLSLANG_OPT_LEVEL_COUNT), +} glslang_optimization_level_t; + +/* EShTextureSamplerTransformMode counterpart */ +typedef enum { + GLSLANG_TEX_SAMP_TRANS_KEEP, + GLSLANG_TEX_SAMP_TRANS_UPGRADE_TEXTURE_REMOVE_SAMPLER, + LAST_ELEMENT_MARKER(GLSLANG_TEX_SAMP_TRANS_COUNT), +} glslang_texture_sampler_transform_mode_t; + +/* EShMessages counterpart */ +typedef enum { + GLSLANG_MSG_DEFAULT_BIT = 0, + GLSLANG_MSG_RELAXED_ERRORS_BIT = (1 << 0), + GLSLANG_MSG_SUPPRESS_WARNINGS_BIT = (1 << 1), + GLSLANG_MSG_AST_BIT = (1 << 2), + GLSLANG_MSG_SPV_RULES_BIT = (1 << 3), + GLSLANG_MSG_VULKAN_RULES_BIT = (1 << 4), + GLSLANG_MSG_ONLY_PREPROCESSOR_BIT = (1 << 5), + GLSLANG_MSG_READ_HLSL_BIT = (1 << 6), + GLSLANG_MSG_CASCADING_ERRORS_BIT = (1 << 7), + GLSLANG_MSG_KEEP_UNCALLED_BIT = (1 << 8), + GLSLANG_MSG_HLSL_OFFSETS_BIT = (1 << 9), + GLSLANG_MSG_DEBUG_INFO_BIT = (1 << 10), + GLSLANG_MSG_HLSL_ENABLE_16BIT_TYPES_BIT = (1 << 11), + GLSLANG_MSG_HLSL_LEGALIZATION_BIT = (1 << 12), + GLSLANG_MSG_HLSL_DX9_COMPATIBLE_BIT = (1 << 13), + GLSLANG_MSG_BUILTIN_SYMBOL_TABLE_BIT = (1 << 14), + GLSLANG_MSG_ENHANCED = (1 << 15), + LAST_ELEMENT_MARKER(GLSLANG_MSG_COUNT), +} glslang_messages_t; + +/* EShReflectionOptions counterpart */ +typedef enum { + GLSLANG_REFLECTION_DEFAULT_BIT = 0, + GLSLANG_REFLECTION_STRICT_ARRAY_SUFFIX_BIT = (1 << 0), + GLSLANG_REFLECTION_BASIC_ARRAY_SUFFIX_BIT = (1 << 1), + GLSLANG_REFLECTION_INTERMEDIATE_IOO_BIT = (1 << 2), + GLSLANG_REFLECTION_SEPARATE_BUFFERS_BIT = (1 << 3), + GLSLANG_REFLECTION_ALL_BLOCK_VARIABLES_BIT = (1 << 4), + GLSLANG_REFLECTION_UNWRAP_IO_BLOCKS_BIT = (1 << 5), + GLSLANG_REFLECTION_ALL_IO_VARIABLES_BIT = (1 << 6), + GLSLANG_REFLECTION_SHARED_STD140_SSBO_BIT = (1 << 7), + GLSLANG_REFLECTION_SHARED_STD140_UBO_BIT = (1 << 8), + LAST_ELEMENT_MARKER(GLSLANG_REFLECTION_COUNT), +} glslang_reflection_options_t; + +/* EProfile counterpart (from Versions.h) */ +typedef enum { + GLSLANG_BAD_PROFILE = 0, + GLSLANG_NO_PROFILE = (1 << 0), + GLSLANG_CORE_PROFILE = (1 << 1), + GLSLANG_COMPATIBILITY_PROFILE = (1 << 2), + GLSLANG_ES_PROFILE = (1 << 3), + LAST_ELEMENT_MARKER(GLSLANG_PROFILE_COUNT), +} glslang_profile_t; + +/* Shader options */ +typedef enum { + GLSLANG_SHADER_DEFAULT_BIT = 0, + GLSLANG_SHADER_AUTO_MAP_BINDINGS = (1 << 0), + GLSLANG_SHADER_AUTO_MAP_LOCATIONS = (1 << 1), + GLSLANG_SHADER_VULKAN_RULES_RELAXED = (1 << 2), + LAST_ELEMENT_MARKER(GLSLANG_SHADER_COUNT), +} glslang_shader_options_t; + +/* TResourceType counterpart */ +typedef enum { + GLSLANG_RESOURCE_TYPE_SAMPLER, + GLSLANG_RESOURCE_TYPE_TEXTURE, + GLSLANG_RESOURCE_TYPE_IMAGE, + GLSLANG_RESOURCE_TYPE_UBO, + GLSLANG_RESOURCE_TYPE_SSBO, + GLSLANG_RESOURCE_TYPE_UAV, + LAST_ELEMENT_MARKER(GLSLANG_RESOURCE_TYPE_COUNT), +} glslang_resource_type_t; + +#undef LAST_ELEMENT_MARKER + +#endif diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/intermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/intermediate.h new file mode 100644 index 0000000..a024002 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Include/intermediate.h @@ -0,0 +1,1850 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// Definition of the in-memory high-level intermediate representation +// of shaders. This is a tree that parser creates. +// +// Nodes in the tree are defined as a hierarchy of classes derived from +// TIntermNode. Each is a node in a tree. There is no preset branching factor; +// each node can have it's own type of list of children. +// + +#ifndef __INTERMEDIATE_H +#define __INTERMEDIATE_H + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' + #pragma warning(disable : 5026) // 'glslang::TIntermUnary': move constructor was implicitly defined as deleted +#endif + +#include "../Include/Common.h" +#include "../Include/Types.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + +class TIntermediate; + +// +// Operators used by the high-level (parse tree) representation. +// +enum TOperator { + EOpNull, // if in a node, should only mean a node is still being built + EOpSequence, // denotes a list of statements, or parameters, etc. + EOpScope, // Used by debugging to denote a scoped list of statements + EOpLinkerObjects, // for aggregate node of objects the linker may need, if not reference by the rest of the AST + EOpFunctionCall, + EOpFunction, // For function definition + EOpParameters, // an aggregate listing the parameters to a function +#ifndef GLSLANG_WEB + EOpSpirvInst, +#endif + + // + // Unary operators + // + + EOpNegative, + EOpLogicalNot, + EOpVectorLogicalNot, + EOpBitwiseNot, + + EOpPostIncrement, + EOpPostDecrement, + EOpPreIncrement, + EOpPreDecrement, + + EOpCopyObject, + + EOpDeclare, // Used by debugging to force declaration of variable in correct scope + + // (u)int* -> bool + EOpConvInt8ToBool, + EOpConvUint8ToBool, + EOpConvInt16ToBool, + EOpConvUint16ToBool, + EOpConvIntToBool, + EOpConvUintToBool, + EOpConvInt64ToBool, + EOpConvUint64ToBool, + + // float* -> bool + EOpConvFloat16ToBool, + EOpConvFloatToBool, + EOpConvDoubleToBool, + + // bool -> (u)int* + EOpConvBoolToInt8, + EOpConvBoolToUint8, + EOpConvBoolToInt16, + EOpConvBoolToUint16, + EOpConvBoolToInt, + EOpConvBoolToUint, + EOpConvBoolToInt64, + EOpConvBoolToUint64, + + // bool -> float* + EOpConvBoolToFloat16, + EOpConvBoolToFloat, + EOpConvBoolToDouble, + + // int8_t -> (u)int* + EOpConvInt8ToInt16, + EOpConvInt8ToInt, + EOpConvInt8ToInt64, + EOpConvInt8ToUint8, + EOpConvInt8ToUint16, + EOpConvInt8ToUint, + EOpConvInt8ToUint64, + + // uint8_t -> (u)int* + EOpConvUint8ToInt8, + EOpConvUint8ToInt16, + EOpConvUint8ToInt, + EOpConvUint8ToInt64, + EOpConvUint8ToUint16, + EOpConvUint8ToUint, + EOpConvUint8ToUint64, + + // int8_t -> float* + EOpConvInt8ToFloat16, + EOpConvInt8ToFloat, + EOpConvInt8ToDouble, + + // uint8_t -> float* + EOpConvUint8ToFloat16, + EOpConvUint8ToFloat, + EOpConvUint8ToDouble, + + // int16_t -> (u)int* + EOpConvInt16ToInt8, + EOpConvInt16ToInt, + EOpConvInt16ToInt64, + EOpConvInt16ToUint8, + EOpConvInt16ToUint16, + EOpConvInt16ToUint, + EOpConvInt16ToUint64, + + // uint16_t -> (u)int* + EOpConvUint16ToInt8, + EOpConvUint16ToInt16, + EOpConvUint16ToInt, + EOpConvUint16ToInt64, + EOpConvUint16ToUint8, + EOpConvUint16ToUint, + EOpConvUint16ToUint64, + + // int16_t -> float* + EOpConvInt16ToFloat16, + EOpConvInt16ToFloat, + EOpConvInt16ToDouble, + + // uint16_t -> float* + EOpConvUint16ToFloat16, + EOpConvUint16ToFloat, + EOpConvUint16ToDouble, + + // int32_t -> (u)int* + EOpConvIntToInt8, + EOpConvIntToInt16, + EOpConvIntToInt64, + EOpConvIntToUint8, + EOpConvIntToUint16, + EOpConvIntToUint, + EOpConvIntToUint64, + + // uint32_t -> (u)int* + EOpConvUintToInt8, + EOpConvUintToInt16, + EOpConvUintToInt, + EOpConvUintToInt64, + EOpConvUintToUint8, + EOpConvUintToUint16, + EOpConvUintToUint64, + + // int32_t -> float* + EOpConvIntToFloat16, + EOpConvIntToFloat, + EOpConvIntToDouble, + + // uint32_t -> float* + EOpConvUintToFloat16, + EOpConvUintToFloat, + EOpConvUintToDouble, + + // int64_t -> (u)int* + EOpConvInt64ToInt8, + EOpConvInt64ToInt16, + EOpConvInt64ToInt, + EOpConvInt64ToUint8, + EOpConvInt64ToUint16, + EOpConvInt64ToUint, + EOpConvInt64ToUint64, + + // uint64_t -> (u)int* + EOpConvUint64ToInt8, + EOpConvUint64ToInt16, + EOpConvUint64ToInt, + EOpConvUint64ToInt64, + EOpConvUint64ToUint8, + EOpConvUint64ToUint16, + EOpConvUint64ToUint, + + // int64_t -> float* + EOpConvInt64ToFloat16, + EOpConvInt64ToFloat, + EOpConvInt64ToDouble, + + // uint64_t -> float* + EOpConvUint64ToFloat16, + EOpConvUint64ToFloat, + EOpConvUint64ToDouble, + + // float16_t -> (u)int* + EOpConvFloat16ToInt8, + EOpConvFloat16ToInt16, + EOpConvFloat16ToInt, + EOpConvFloat16ToInt64, + EOpConvFloat16ToUint8, + EOpConvFloat16ToUint16, + EOpConvFloat16ToUint, + EOpConvFloat16ToUint64, + + // float16_t -> float* + EOpConvFloat16ToFloat, + EOpConvFloat16ToDouble, + + // float -> (u)int* + EOpConvFloatToInt8, + EOpConvFloatToInt16, + EOpConvFloatToInt, + EOpConvFloatToInt64, + EOpConvFloatToUint8, + EOpConvFloatToUint16, + EOpConvFloatToUint, + EOpConvFloatToUint64, + + // float -> float* + EOpConvFloatToFloat16, + EOpConvFloatToDouble, + + // float64 _t-> (u)int* + EOpConvDoubleToInt8, + EOpConvDoubleToInt16, + EOpConvDoubleToInt, + EOpConvDoubleToInt64, + EOpConvDoubleToUint8, + EOpConvDoubleToUint16, + EOpConvDoubleToUint, + EOpConvDoubleToUint64, + + // float64_t -> float* + EOpConvDoubleToFloat16, + EOpConvDoubleToFloat, + + // uint64_t <-> pointer + EOpConvUint64ToPtr, + EOpConvPtrToUint64, + + // uvec2 <-> pointer + EOpConvUvec2ToPtr, + EOpConvPtrToUvec2, + + // uint64_t -> accelerationStructureEXT + EOpConvUint64ToAccStruct, + + // uvec2 -> accelerationStructureEXT + EOpConvUvec2ToAccStruct, + + // + // binary operations + // + + EOpAdd, + EOpSub, + EOpMul, + EOpDiv, + EOpMod, + EOpRightShift, + EOpLeftShift, + EOpAnd, + EOpInclusiveOr, + EOpExclusiveOr, + EOpEqual, + EOpNotEqual, + EOpVectorEqual, + EOpVectorNotEqual, + EOpLessThan, + EOpGreaterThan, + EOpLessThanEqual, + EOpGreaterThanEqual, + EOpComma, + + EOpVectorTimesScalar, + EOpVectorTimesMatrix, + EOpMatrixTimesVector, + EOpMatrixTimesScalar, + + EOpLogicalOr, + EOpLogicalXor, + EOpLogicalAnd, + + EOpIndexDirect, + EOpIndexIndirect, + EOpIndexDirectStruct, + + EOpVectorSwizzle, + + EOpMethod, + EOpScoping, + + // + // Built-in functions mapped to operators + // + + EOpRadians, + EOpDegrees, + EOpSin, + EOpCos, + EOpTan, + EOpAsin, + EOpAcos, + EOpAtan, + EOpSinh, + EOpCosh, + EOpTanh, + EOpAsinh, + EOpAcosh, + EOpAtanh, + + EOpPow, + EOpExp, + EOpLog, + EOpExp2, + EOpLog2, + EOpSqrt, + EOpInverseSqrt, + + EOpAbs, + EOpSign, + EOpFloor, + EOpTrunc, + EOpRound, + EOpRoundEven, + EOpCeil, + EOpFract, + EOpModf, + EOpMin, + EOpMax, + EOpClamp, + EOpMix, + EOpStep, + EOpSmoothStep, + + EOpIsNan, + EOpIsInf, + + EOpFma, + + EOpFrexp, + EOpLdexp, + + EOpFloatBitsToInt, + EOpFloatBitsToUint, + EOpIntBitsToFloat, + EOpUintBitsToFloat, + EOpDoubleBitsToInt64, + EOpDoubleBitsToUint64, + EOpInt64BitsToDouble, + EOpUint64BitsToDouble, + EOpFloat16BitsToInt16, + EOpFloat16BitsToUint16, + EOpInt16BitsToFloat16, + EOpUint16BitsToFloat16, + EOpPackSnorm2x16, + EOpUnpackSnorm2x16, + EOpPackUnorm2x16, + EOpUnpackUnorm2x16, + EOpPackSnorm4x8, + EOpUnpackSnorm4x8, + EOpPackUnorm4x8, + EOpUnpackUnorm4x8, + EOpPackHalf2x16, + EOpUnpackHalf2x16, + EOpPackDouble2x32, + EOpUnpackDouble2x32, + EOpPackInt2x32, + EOpUnpackInt2x32, + EOpPackUint2x32, + EOpUnpackUint2x32, + EOpPackFloat2x16, + EOpUnpackFloat2x16, + EOpPackInt2x16, + EOpUnpackInt2x16, + EOpPackUint2x16, + EOpUnpackUint2x16, + EOpPackInt4x16, + EOpUnpackInt4x16, + EOpPackUint4x16, + EOpUnpackUint4x16, + EOpPack16, + EOpPack32, + EOpPack64, + EOpUnpack32, + EOpUnpack16, + EOpUnpack8, + + EOpLength, + EOpDistance, + EOpDot, + EOpCross, + EOpNormalize, + EOpFaceForward, + EOpReflect, + EOpRefract, + + EOpMin3, + EOpMax3, + EOpMid3, + + EOpDPdx, // Fragment only + EOpDPdy, // Fragment only + EOpFwidth, // Fragment only + EOpDPdxFine, // Fragment only + EOpDPdyFine, // Fragment only + EOpFwidthFine, // Fragment only + EOpDPdxCoarse, // Fragment only + EOpDPdyCoarse, // Fragment only + EOpFwidthCoarse, // Fragment only + + EOpInterpolateAtCentroid, // Fragment only + EOpInterpolateAtSample, // Fragment only + EOpInterpolateAtOffset, // Fragment only + EOpInterpolateAtVertex, + + EOpMatrixTimesMatrix, + EOpOuterProduct, + EOpDeterminant, + EOpMatrixInverse, + EOpTranspose, + + EOpFtransform, + + EOpNoise, + + EOpEmitVertex, // geometry only + EOpEndPrimitive, // geometry only + EOpEmitStreamVertex, // geometry only + EOpEndStreamPrimitive, // geometry only + + EOpBarrier, + EOpMemoryBarrier, + EOpMemoryBarrierAtomicCounter, + EOpMemoryBarrierBuffer, + EOpMemoryBarrierImage, + EOpMemoryBarrierShared, // compute only + EOpGroupMemoryBarrier, // compute only + + EOpBallot, + EOpReadInvocation, + EOpReadFirstInvocation, + + EOpAnyInvocation, + EOpAllInvocations, + EOpAllInvocationsEqual, + + EOpSubgroupGuardStart, + EOpSubgroupBarrier, + EOpSubgroupMemoryBarrier, + EOpSubgroupMemoryBarrierBuffer, + EOpSubgroupMemoryBarrierImage, + EOpSubgroupMemoryBarrierShared, // compute only + EOpSubgroupElect, + EOpSubgroupAll, + EOpSubgroupAny, + EOpSubgroupAllEqual, + EOpSubgroupBroadcast, + EOpSubgroupBroadcastFirst, + EOpSubgroupBallot, + EOpSubgroupInverseBallot, + EOpSubgroupBallotBitExtract, + EOpSubgroupBallotBitCount, + EOpSubgroupBallotInclusiveBitCount, + EOpSubgroupBallotExclusiveBitCount, + EOpSubgroupBallotFindLSB, + EOpSubgroupBallotFindMSB, + EOpSubgroupShuffle, + EOpSubgroupShuffleXor, + EOpSubgroupShuffleUp, + EOpSubgroupShuffleDown, + EOpSubgroupAdd, + EOpSubgroupMul, + EOpSubgroupMin, + EOpSubgroupMax, + EOpSubgroupAnd, + EOpSubgroupOr, + EOpSubgroupXor, + EOpSubgroupInclusiveAdd, + EOpSubgroupInclusiveMul, + EOpSubgroupInclusiveMin, + EOpSubgroupInclusiveMax, + EOpSubgroupInclusiveAnd, + EOpSubgroupInclusiveOr, + EOpSubgroupInclusiveXor, + EOpSubgroupExclusiveAdd, + EOpSubgroupExclusiveMul, + EOpSubgroupExclusiveMin, + EOpSubgroupExclusiveMax, + EOpSubgroupExclusiveAnd, + EOpSubgroupExclusiveOr, + EOpSubgroupExclusiveXor, + EOpSubgroupClusteredAdd, + EOpSubgroupClusteredMul, + EOpSubgroupClusteredMin, + EOpSubgroupClusteredMax, + EOpSubgroupClusteredAnd, + EOpSubgroupClusteredOr, + EOpSubgroupClusteredXor, + EOpSubgroupQuadBroadcast, + EOpSubgroupQuadSwapHorizontal, + EOpSubgroupQuadSwapVertical, + EOpSubgroupQuadSwapDiagonal, + + EOpSubgroupPartition, + EOpSubgroupPartitionedAdd, + EOpSubgroupPartitionedMul, + EOpSubgroupPartitionedMin, + EOpSubgroupPartitionedMax, + EOpSubgroupPartitionedAnd, + EOpSubgroupPartitionedOr, + EOpSubgroupPartitionedXor, + EOpSubgroupPartitionedInclusiveAdd, + EOpSubgroupPartitionedInclusiveMul, + EOpSubgroupPartitionedInclusiveMin, + EOpSubgroupPartitionedInclusiveMax, + EOpSubgroupPartitionedInclusiveAnd, + EOpSubgroupPartitionedInclusiveOr, + EOpSubgroupPartitionedInclusiveXor, + EOpSubgroupPartitionedExclusiveAdd, + EOpSubgroupPartitionedExclusiveMul, + EOpSubgroupPartitionedExclusiveMin, + EOpSubgroupPartitionedExclusiveMax, + EOpSubgroupPartitionedExclusiveAnd, + EOpSubgroupPartitionedExclusiveOr, + EOpSubgroupPartitionedExclusiveXor, + + EOpSubgroupGuardStop, + + EOpMinInvocations, + EOpMaxInvocations, + EOpAddInvocations, + EOpMinInvocationsNonUniform, + EOpMaxInvocationsNonUniform, + EOpAddInvocationsNonUniform, + EOpMinInvocationsInclusiveScan, + EOpMaxInvocationsInclusiveScan, + EOpAddInvocationsInclusiveScan, + EOpMinInvocationsInclusiveScanNonUniform, + EOpMaxInvocationsInclusiveScanNonUniform, + EOpAddInvocationsInclusiveScanNonUniform, + EOpMinInvocationsExclusiveScan, + EOpMaxInvocationsExclusiveScan, + EOpAddInvocationsExclusiveScan, + EOpMinInvocationsExclusiveScanNonUniform, + EOpMaxInvocationsExclusiveScanNonUniform, + EOpAddInvocationsExclusiveScanNonUniform, + EOpSwizzleInvocations, + EOpSwizzleInvocationsMasked, + EOpWriteInvocation, + EOpMbcnt, + + EOpCubeFaceIndex, + EOpCubeFaceCoord, + EOpTime, + + EOpAtomicAdd, + EOpAtomicSubtract, + EOpAtomicMin, + EOpAtomicMax, + EOpAtomicAnd, + EOpAtomicOr, + EOpAtomicXor, + EOpAtomicExchange, + EOpAtomicCompSwap, + EOpAtomicLoad, + EOpAtomicStore, + + EOpAtomicCounterIncrement, // results in pre-increment value + EOpAtomicCounterDecrement, // results in post-decrement value + EOpAtomicCounter, + EOpAtomicCounterAdd, + EOpAtomicCounterSubtract, + EOpAtomicCounterMin, + EOpAtomicCounterMax, + EOpAtomicCounterAnd, + EOpAtomicCounterOr, + EOpAtomicCounterXor, + EOpAtomicCounterExchange, + EOpAtomicCounterCompSwap, + + EOpAny, + EOpAll, + + EOpCooperativeMatrixLoad, + EOpCooperativeMatrixStore, + EOpCooperativeMatrixMulAdd, + + EOpBeginInvocationInterlock, // Fragment only + EOpEndInvocationInterlock, // Fragment only + + EOpIsHelperInvocation, + + EOpDebugPrintf, + + // + // Branch + // + + EOpKill, // Fragment only + EOpTerminateInvocation, // Fragment only + EOpDemote, // Fragment only + EOpTerminateRayKHR, // Any-hit only + EOpIgnoreIntersectionKHR, // Any-hit only + EOpReturn, + EOpBreak, + EOpContinue, + EOpCase, + EOpDefault, + + // + // Constructors + // + + EOpConstructGuardStart, + EOpConstructInt, // these first scalar forms also identify what implicit conversion is needed + EOpConstructUint, + EOpConstructInt8, + EOpConstructUint8, + EOpConstructInt16, + EOpConstructUint16, + EOpConstructInt64, + EOpConstructUint64, + EOpConstructBool, + EOpConstructFloat, + EOpConstructDouble, + // Keep vector and matrix constructors in a consistent relative order for + // TParseContext::constructBuiltIn, which converts between 8/16/32 bit + // vector constructors + EOpConstructVec2, + EOpConstructVec3, + EOpConstructVec4, + EOpConstructMat2x2, + EOpConstructMat2x3, + EOpConstructMat2x4, + EOpConstructMat3x2, + EOpConstructMat3x3, + EOpConstructMat3x4, + EOpConstructMat4x2, + EOpConstructMat4x3, + EOpConstructMat4x4, + EOpConstructDVec2, + EOpConstructDVec3, + EOpConstructDVec4, + EOpConstructBVec2, + EOpConstructBVec3, + EOpConstructBVec4, + EOpConstructI8Vec2, + EOpConstructI8Vec3, + EOpConstructI8Vec4, + EOpConstructU8Vec2, + EOpConstructU8Vec3, + EOpConstructU8Vec4, + EOpConstructI16Vec2, + EOpConstructI16Vec3, + EOpConstructI16Vec4, + EOpConstructU16Vec2, + EOpConstructU16Vec3, + EOpConstructU16Vec4, + EOpConstructIVec2, + EOpConstructIVec3, + EOpConstructIVec4, + EOpConstructUVec2, + EOpConstructUVec3, + EOpConstructUVec4, + EOpConstructI64Vec2, + EOpConstructI64Vec3, + EOpConstructI64Vec4, + EOpConstructU64Vec2, + EOpConstructU64Vec3, + EOpConstructU64Vec4, + EOpConstructDMat2x2, + EOpConstructDMat2x3, + EOpConstructDMat2x4, + EOpConstructDMat3x2, + EOpConstructDMat3x3, + EOpConstructDMat3x4, + EOpConstructDMat4x2, + EOpConstructDMat4x3, + EOpConstructDMat4x4, + EOpConstructIMat2x2, + EOpConstructIMat2x3, + EOpConstructIMat2x4, + EOpConstructIMat3x2, + EOpConstructIMat3x3, + EOpConstructIMat3x4, + EOpConstructIMat4x2, + EOpConstructIMat4x3, + EOpConstructIMat4x4, + EOpConstructUMat2x2, + EOpConstructUMat2x3, + EOpConstructUMat2x4, + EOpConstructUMat3x2, + EOpConstructUMat3x3, + EOpConstructUMat3x4, + EOpConstructUMat4x2, + EOpConstructUMat4x3, + EOpConstructUMat4x4, + EOpConstructBMat2x2, + EOpConstructBMat2x3, + EOpConstructBMat2x4, + EOpConstructBMat3x2, + EOpConstructBMat3x3, + EOpConstructBMat3x4, + EOpConstructBMat4x2, + EOpConstructBMat4x3, + EOpConstructBMat4x4, + EOpConstructFloat16, + EOpConstructF16Vec2, + EOpConstructF16Vec3, + EOpConstructF16Vec4, + EOpConstructF16Mat2x2, + EOpConstructF16Mat2x3, + EOpConstructF16Mat2x4, + EOpConstructF16Mat3x2, + EOpConstructF16Mat3x3, + EOpConstructF16Mat3x4, + EOpConstructF16Mat4x2, + EOpConstructF16Mat4x3, + EOpConstructF16Mat4x4, + EOpConstructStruct, + EOpConstructTextureSampler, + EOpConstructNonuniform, // expected to be transformed away, not present in final AST + EOpConstructReference, + EOpConstructCooperativeMatrix, + EOpConstructAccStruct, + EOpConstructGuardEnd, + + // + // moves + // + + EOpAssign, + EOpAddAssign, + EOpSubAssign, + EOpMulAssign, + EOpVectorTimesMatrixAssign, + EOpVectorTimesScalarAssign, + EOpMatrixTimesScalarAssign, + EOpMatrixTimesMatrixAssign, + EOpDivAssign, + EOpModAssign, + EOpAndAssign, + EOpInclusiveOrAssign, + EOpExclusiveOrAssign, + EOpLeftShiftAssign, + EOpRightShiftAssign, + + // + // Array operators + // + + // Can apply to arrays, vectors, or matrices. + // Can be decomposed to a constant at compile time, but this does not always happen, + // due to link-time effects. So, consumer can expect either a link-time sized or + // run-time sized array. + EOpArrayLength, + + // + // Image operations + // + + EOpImageGuardBegin, + + EOpImageQuerySize, + EOpImageQuerySamples, + EOpImageLoad, + EOpImageStore, + EOpImageLoadLod, + EOpImageStoreLod, + EOpImageAtomicAdd, + EOpImageAtomicMin, + EOpImageAtomicMax, + EOpImageAtomicAnd, + EOpImageAtomicOr, + EOpImageAtomicXor, + EOpImageAtomicExchange, + EOpImageAtomicCompSwap, + EOpImageAtomicLoad, + EOpImageAtomicStore, + + EOpSubpassLoad, + EOpSubpassLoadMS, + EOpSparseImageLoad, + EOpSparseImageLoadLod, + + EOpImageGuardEnd, + + // + // Texture operations + // + + EOpTextureGuardBegin, + + EOpTextureQuerySize, + EOpTextureQueryLod, + EOpTextureQueryLevels, + EOpTextureQuerySamples, + + EOpSamplingGuardBegin, + + EOpTexture, + EOpTextureProj, + EOpTextureLod, + EOpTextureOffset, + EOpTextureFetch, + EOpTextureFetchOffset, + EOpTextureProjOffset, + EOpTextureLodOffset, + EOpTextureProjLod, + EOpTextureProjLodOffset, + EOpTextureGrad, + EOpTextureGradOffset, + EOpTextureProjGrad, + EOpTextureProjGradOffset, + EOpTextureGather, + EOpTextureGatherOffset, + EOpTextureGatherOffsets, + EOpTextureClamp, + EOpTextureOffsetClamp, + EOpTextureGradClamp, + EOpTextureGradOffsetClamp, + EOpTextureGatherLod, + EOpTextureGatherLodOffset, + EOpTextureGatherLodOffsets, + EOpFragmentMaskFetch, + EOpFragmentFetch, + + EOpSparseTextureGuardBegin, + + EOpSparseTexture, + EOpSparseTextureLod, + EOpSparseTextureOffset, + EOpSparseTextureFetch, + EOpSparseTextureFetchOffset, + EOpSparseTextureLodOffset, + EOpSparseTextureGrad, + EOpSparseTextureGradOffset, + EOpSparseTextureGather, + EOpSparseTextureGatherOffset, + EOpSparseTextureGatherOffsets, + EOpSparseTexelsResident, + EOpSparseTextureClamp, + EOpSparseTextureOffsetClamp, + EOpSparseTextureGradClamp, + EOpSparseTextureGradOffsetClamp, + EOpSparseTextureGatherLod, + EOpSparseTextureGatherLodOffset, + EOpSparseTextureGatherLodOffsets, + + EOpSparseTextureGuardEnd, + + EOpImageFootprintGuardBegin, + EOpImageSampleFootprintNV, + EOpImageSampleFootprintClampNV, + EOpImageSampleFootprintLodNV, + EOpImageSampleFootprintGradNV, + EOpImageSampleFootprintGradClampNV, + EOpImageFootprintGuardEnd, + EOpSamplingGuardEnd, + EOpTextureGuardEnd, + + // + // Integer operations + // + + EOpAddCarry, + EOpSubBorrow, + EOpUMulExtended, + EOpIMulExtended, + EOpBitfieldExtract, + EOpBitfieldInsert, + EOpBitFieldReverse, + EOpBitCount, + EOpFindLSB, + EOpFindMSB, + + EOpCountLeadingZeros, + EOpCountTrailingZeros, + EOpAbsDifference, + EOpAddSaturate, + EOpSubSaturate, + EOpAverage, + EOpAverageRounded, + EOpMul32x16, + + EOpTraceNV, + EOpTraceRayMotionNV, + EOpTraceKHR, + EOpReportIntersection, + EOpIgnoreIntersectionNV, + EOpTerminateRayNV, + EOpExecuteCallableNV, + EOpExecuteCallableKHR, + EOpWritePackedPrimitiveIndices4x8NV, + EOpEmitMeshTasksEXT, + EOpSetMeshOutputsEXT, + + // + // GL_EXT_ray_query operations + // + + EOpRayQueryInitialize, + EOpRayQueryTerminate, + EOpRayQueryGenerateIntersection, + EOpRayQueryConfirmIntersection, + EOpRayQueryProceed, + EOpRayQueryGetIntersectionType, + EOpRayQueryGetRayTMin, + EOpRayQueryGetRayFlags, + EOpRayQueryGetIntersectionT, + EOpRayQueryGetIntersectionInstanceCustomIndex, + EOpRayQueryGetIntersectionInstanceId, + EOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffset, + EOpRayQueryGetIntersectionGeometryIndex, + EOpRayQueryGetIntersectionPrimitiveIndex, + EOpRayQueryGetIntersectionBarycentrics, + EOpRayQueryGetIntersectionFrontFace, + EOpRayQueryGetIntersectionCandidateAABBOpaque, + EOpRayQueryGetIntersectionObjectRayDirection, + EOpRayQueryGetIntersectionObjectRayOrigin, + EOpRayQueryGetWorldRayDirection, + EOpRayQueryGetWorldRayOrigin, + EOpRayQueryGetIntersectionObjectToWorld, + EOpRayQueryGetIntersectionWorldToObject, + + // + // HLSL operations + // + + EOpClip, // discard if input value < 0 + EOpIsFinite, + EOpLog10, // base 10 log + EOpRcp, // 1/x + EOpSaturate, // clamp from 0 to 1 + EOpSinCos, // sin and cos in out parameters + EOpGenMul, // mul(x,y) on any of mat/vec/scalars + EOpDst, // x = 1, y=src0.y * src1.y, z=src0.z, w=src1.w + EOpInterlockedAdd, // atomic ops, but uses [optional] out arg instead of return + EOpInterlockedAnd, // ... + EOpInterlockedCompareExchange, // ... + EOpInterlockedCompareStore, // ... + EOpInterlockedExchange, // ... + EOpInterlockedMax, // ... + EOpInterlockedMin, // ... + EOpInterlockedOr, // ... + EOpInterlockedXor, // ... + EOpAllMemoryBarrierWithGroupSync, // memory barriers without non-hlsl AST equivalents + EOpDeviceMemoryBarrier, // ... + EOpDeviceMemoryBarrierWithGroupSync, // ... + EOpWorkgroupMemoryBarrier, // ... + EOpWorkgroupMemoryBarrierWithGroupSync, // ... + EOpEvaluateAttributeSnapped, // InterpolateAtOffset with int position on 16x16 grid + EOpF32tof16, // HLSL conversion: half of a PackHalf2x16 + EOpF16tof32, // HLSL conversion: half of an UnpackHalf2x16 + EOpLit, // HLSL lighting coefficient vector + EOpTextureBias, // HLSL texture bias: will be lowered to EOpTexture + EOpAsDouble, // slightly different from EOpUint64BitsToDouble + EOpD3DCOLORtoUBYTE4, // convert and swizzle 4-component color to UBYTE4 range + + EOpMethodSample, // Texture object methods. These are translated to existing + EOpMethodSampleBias, // AST methods, and exist to represent HLSL semantics until that + EOpMethodSampleCmp, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodSampleCmpLevelZero, // ... + EOpMethodSampleGrad, // ... + EOpMethodSampleLevel, // ... + EOpMethodLoad, // ... + EOpMethodGetDimensions, // ... + EOpMethodGetSamplePosition, // ... + EOpMethodGather, // ... + EOpMethodCalculateLevelOfDetail, // ... + EOpMethodCalculateLevelOfDetailUnclamped, // ... + + // Load already defined above for textures + EOpMethodLoad2, // Structure buffer object methods. These are translated to existing + EOpMethodLoad3, // AST methods, and exist to represent HLSL semantics until that + EOpMethodLoad4, // translation is performed. See HlslParseContext::decomposeSampleMethods(). + EOpMethodStore, // ... + EOpMethodStore2, // ... + EOpMethodStore3, // ... + EOpMethodStore4, // ... + EOpMethodIncrementCounter, // ... + EOpMethodDecrementCounter, // ... + // EOpMethodAppend is defined for geo shaders below + EOpMethodConsume, + + // SM5 texture methods + EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about + EOpMethodGatherGreen, // translation to existing AST opcodes. They exist temporarily + EOpMethodGatherBlue, // because HLSL arguments are slightly different. + EOpMethodGatherAlpha, // ... + EOpMethodGatherCmp, // ... + EOpMethodGatherCmpRed, // ... + EOpMethodGatherCmpGreen, // ... + EOpMethodGatherCmpBlue, // ... + EOpMethodGatherCmpAlpha, // ... + + // geometry methods + EOpMethodAppend, // Geometry shader methods + EOpMethodRestartStrip, // ... + + // matrix + EOpMatrixSwizzle, // select multiple matrix components (non-column) + + // SM6 wave ops + EOpWaveGetLaneCount, // Will decompose to gl_SubgroupSize. + EOpWaveGetLaneIndex, // Will decompose to gl_SubgroupInvocationID. + EOpWaveActiveCountBits, // Will decompose to subgroupBallotBitCount(subgroupBallot()). + EOpWavePrefixCountBits, // Will decompose to subgroupBallotInclusiveBitCount(subgroupBallot()). + + // Shader Clock Ops + EOpReadClockSubgroupKHR, + EOpReadClockDeviceKHR, +}; + +class TIntermTraverser; +class TIntermOperator; +class TIntermAggregate; +class TIntermUnary; +class TIntermBinary; +class TIntermConstantUnion; +class TIntermSelection; +class TIntermSwitch; +class TIntermBranch; +class TIntermTyped; +class TIntermMethod; +class TIntermSymbol; +class TIntermLoop; + +} // end namespace glslang + +// +// Base class for the tree nodes +// +// (Put outside the glslang namespace, as it's used as part of the external interface.) +// +class TIntermNode { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + + TIntermNode() { loc.init(); } + virtual const glslang::TSourceLoc& getLoc() const { return loc; } + virtual void setLoc(const glslang::TSourceLoc& l) { loc = l; } + virtual void traverse(glslang::TIntermTraverser*) = 0; + virtual glslang::TIntermTyped* getAsTyped() { return 0; } + virtual glslang::TIntermOperator* getAsOperator() { return 0; } + virtual glslang::TIntermConstantUnion* getAsConstantUnion() { return 0; } + virtual glslang::TIntermAggregate* getAsAggregate() { return 0; } + virtual glslang::TIntermUnary* getAsUnaryNode() { return 0; } + virtual glslang::TIntermBinary* getAsBinaryNode() { return 0; } + virtual glslang::TIntermSelection* getAsSelectionNode() { return 0; } + virtual glslang::TIntermSwitch* getAsSwitchNode() { return 0; } + virtual glslang::TIntermMethod* getAsMethodNode() { return 0; } + virtual glslang::TIntermSymbol* getAsSymbolNode() { return 0; } + virtual glslang::TIntermBranch* getAsBranchNode() { return 0; } + virtual glslang::TIntermLoop* getAsLoopNode() { return 0; } + + virtual const glslang::TIntermTyped* getAsTyped() const { return 0; } + virtual const glslang::TIntermOperator* getAsOperator() const { return 0; } + virtual const glslang::TIntermConstantUnion* getAsConstantUnion() const { return 0; } + virtual const glslang::TIntermAggregate* getAsAggregate() const { return 0; } + virtual const glslang::TIntermUnary* getAsUnaryNode() const { return 0; } + virtual const glslang::TIntermBinary* getAsBinaryNode() const { return 0; } + virtual const glslang::TIntermSelection* getAsSelectionNode() const { return 0; } + virtual const glslang::TIntermSwitch* getAsSwitchNode() const { return 0; } + virtual const glslang::TIntermMethod* getAsMethodNode() const { return 0; } + virtual const glslang::TIntermSymbol* getAsSymbolNode() const { return 0; } + virtual const glslang::TIntermBranch* getAsBranchNode() const { return 0; } + virtual const glslang::TIntermLoop* getAsLoopNode() const { return 0; } + virtual ~TIntermNode() { } + +protected: + TIntermNode(const TIntermNode&); + TIntermNode& operator=(const TIntermNode&); + glslang::TSourceLoc loc; +}; + +namespace glslang { + +// +// This is just to help yacc. +// +struct TIntermNodePair { + TIntermNode* node1; + TIntermNode* node2; +}; + +// +// Intermediate class for nodes that have a type. +// +class TIntermTyped : public TIntermNode { +public: + TIntermTyped(const TType& t) { type.shallowCopy(t); } + TIntermTyped(TBasicType basicType) { TType bt(basicType); type.shallowCopy(bt); } + virtual TIntermTyped* getAsTyped() { return this; } + virtual const TIntermTyped* getAsTyped() const { return this; } + virtual void setType(const TType& t) { type.shallowCopy(t); } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { return type; } + + virtual TBasicType getBasicType() const { return type.getBasicType(); } + virtual TQualifier& getQualifier() { return type.getQualifier(); } + virtual const TQualifier& getQualifier() const { return type.getQualifier(); } + virtual TArraySizes* getArraySizes() { return type.getArraySizes(); } + virtual const TArraySizes* getArraySizes() const { return type.getArraySizes(); } + virtual void propagatePrecision(TPrecisionQualifier); + virtual int getVectorSize() const { return type.getVectorSize(); } + virtual int getMatrixCols() const { return type.getMatrixCols(); } + virtual int getMatrixRows() const { return type.getMatrixRows(); } + virtual bool isMatrix() const { return type.isMatrix(); } + virtual bool isArray() const { return type.isArray(); } + virtual bool isVector() const { return type.isVector(); } + virtual bool isScalar() const { return type.isScalar(); } + virtual bool isStruct() const { return type.isStruct(); } + virtual bool isFloatingDomain() const { return type.isFloatingDomain(); } + virtual bool isIntegerDomain() const { return type.isIntegerDomain(); } + bool isAtomic() const { return type.isAtomic(); } + bool isReference() const { return type.isReference(); } + TString getCompleteString(bool enhanced = false) const { return type.getCompleteString(enhanced); } + +protected: + TIntermTyped& operator=(const TIntermTyped&); + TType type; +}; + +// +// Handle for, do-while, and while loops. +// +class TIntermLoop : public TIntermNode { +public: + TIntermLoop(TIntermNode* aBody, TIntermTyped* aTest, TIntermTyped* aTerminal, bool testFirst) : + body(aBody), + test(aTest), + terminal(aTerminal), + first(testFirst), + unroll(false), + dontUnroll(false), + dependency(0), + minIterations(0), + maxIterations(iterationsInfinite), + iterationMultiple(1), + peelCount(0), + partialCount(0) + { } + + virtual TIntermLoop* getAsLoopNode() { return this; } + virtual const TIntermLoop* getAsLoopNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TIntermNode* getBody() const { return body; } + TIntermTyped* getTest() const { return test; } + TIntermTyped* getTerminal() const { return terminal; } + bool testFirst() const { return first; } + + void setUnroll() { unroll = true; } + void setDontUnroll() { + dontUnroll = true; + peelCount = 0; + partialCount = 0; + } + bool getUnroll() const { return unroll; } + bool getDontUnroll() const { return dontUnroll; } + + static const unsigned int dependencyInfinite = 0xFFFFFFFF; + static const unsigned int iterationsInfinite = 0xFFFFFFFF; + void setLoopDependency(int d) { dependency = d; } + int getLoopDependency() const { return dependency; } + + void setMinIterations(unsigned int v) { minIterations = v; } + unsigned int getMinIterations() const { return minIterations; } + void setMaxIterations(unsigned int v) { maxIterations = v; } + unsigned int getMaxIterations() const { return maxIterations; } + void setIterationMultiple(unsigned int v) { iterationMultiple = v; } + unsigned int getIterationMultiple() const { return iterationMultiple; } + void setPeelCount(unsigned int v) { + peelCount = v; + dontUnroll = false; + } + unsigned int getPeelCount() const { return peelCount; } + void setPartialCount(unsigned int v) { + partialCount = v; + dontUnroll = false; + } + unsigned int getPartialCount() const { return partialCount; } + +protected: + TIntermNode* body; // code to loop over + TIntermTyped* test; // exit condition associated with loop, could be 0 for 'for' loops + TIntermTyped* terminal; // exists for for-loops + bool first; // true for while and for, not for do-while + bool unroll; // true if unroll requested + bool dontUnroll; // true if request to not unroll + unsigned int dependency; // loop dependency hint; 0 means not set or unknown + unsigned int minIterations; // as per the SPIR-V specification + unsigned int maxIterations; // as per the SPIR-V specification + unsigned int iterationMultiple; // as per the SPIR-V specification + unsigned int peelCount; // as per the SPIR-V specification + unsigned int partialCount; // as per the SPIR-V specification +}; + +// +// Handle case, break, continue, return, and kill. +// +class TIntermBranch : public TIntermNode { +public: + TIntermBranch(TOperator op, TIntermTyped* e) : + flowOp(op), + expression(e) { } + virtual TIntermBranch* getAsBranchNode() { return this; } + virtual const TIntermBranch* getAsBranchNode() const { return this; } + virtual void traverse(TIntermTraverser*); + TOperator getFlowOp() const { return flowOp; } + TIntermTyped* getExpression() const { return expression; } + void setExpression(TIntermTyped* pExpression) { expression = pExpression; } + void updatePrecision(TPrecisionQualifier parentPrecision); +protected: + TOperator flowOp; + TIntermTyped* expression; +}; + +// +// Represent method names before seeing their calling signature +// or resolving them to operations. Just an expression as the base object +// and a textural name. +// +class TIntermMethod : public TIntermTyped { +public: + TIntermMethod(TIntermTyped* o, const TType& t, const TString& m) : TIntermTyped(t), object(o), method(m) { } + virtual TIntermMethod* getAsMethodNode() { return this; } + virtual const TIntermMethod* getAsMethodNode() const { return this; } + virtual const TString& getMethodName() const { return method; } + virtual TIntermTyped* getObject() const { return object; } + virtual void traverse(TIntermTraverser*); +protected: + TIntermTyped* object; + TString method; +}; + +// +// Nodes that correspond to symbols or constants in the source code. +// +class TIntermSymbol : public TIntermTyped { +public: + // if symbol is initialized as symbol(sym), the memory comes from the pool allocator of sym. If sym comes from + // per process threadPoolAllocator, then it causes increased memory usage per compile + // it is essential to use "symbol = sym" to assign to symbol + TIntermSymbol(long long i, const TString& n, const TType& t) + : TIntermTyped(t), id(i), +#ifndef GLSLANG_WEB + flattenSubset(-1), +#endif + constSubtree(nullptr) + { name = n; } + virtual long long getId() const { return id; } + virtual void changeId(long long i) { id = i; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual TIntermSymbol* getAsSymbolNode() { return this; } + virtual const TIntermSymbol* getAsSymbolNode() const { return this; } + void setConstArray(const TConstUnionArray& c) { constArray = c; } + const TConstUnionArray& getConstArray() const { return constArray; } + void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + TIntermTyped* getConstSubtree() const { return constSubtree; } +#ifndef GLSLANG_WEB + void setFlattenSubset(int subset) { flattenSubset = subset; } + virtual const TString& getAccessName() const; + + int getFlattenSubset() const { return flattenSubset; } // -1 means full object +#endif + + // This is meant for cases where a node has already been constructed, and + // later on, it becomes necessary to switch to a different symbol. + virtual void switchId(long long newId) { id = newId; } + +protected: + long long id; // the unique id of the symbol this node represents +#ifndef GLSLANG_WEB + int flattenSubset; // how deeply the flattened object rooted at id has been dereferenced +#endif + TString name; // the name of the symbol this node represents + TConstUnionArray constArray; // if the symbol is a front-end compile-time constant, this is its value + TIntermTyped* constSubtree; +}; + +class TIntermConstantUnion : public TIntermTyped { +public: + TIntermConstantUnion(const TConstUnionArray& ua, const TType& t) : TIntermTyped(t), constArray(ua), literal(false) { } + const TConstUnionArray& getConstArray() const { return constArray; } + virtual TIntermConstantUnion* getAsConstantUnion() { return this; } + virtual const TIntermConstantUnion* getAsConstantUnion() const { return this; } + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* fold(TOperator, const TIntermTyped*) const; + virtual TIntermTyped* fold(TOperator, const TType&) const; + void setLiteral() { literal = true; } + void setExpression() { literal = false; } + bool isLiteral() const { return literal; } + +protected: + TIntermConstantUnion& operator=(const TIntermConstantUnion&); + + const TConstUnionArray constArray; + bool literal; // true if node represents a literal in the source code +}; + +// Represent the independent aspects of a texturing TOperator +struct TCrackedTextureOp { + bool query; + bool proj; + bool lod; + bool fetch; + bool offset; + bool offsets; + bool gather; + bool grad; + bool subpass; + bool lodClamp; + bool fragMask; +}; + +// +// Intermediate class for node types that hold operators. +// +class TIntermOperator : public TIntermTyped { +public: + virtual TIntermOperator* getAsOperator() { return this; } + virtual const TIntermOperator* getAsOperator() const { return this; } + TOperator getOp() const { return op; } + void setOp(TOperator newOp) { op = newOp; } + bool modifiesState() const; + bool isConstructor() const; + bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; } + bool isSampling() const { return op > EOpSamplingGuardBegin && op < EOpSamplingGuardEnd; } +#ifdef GLSLANG_WEB + bool isImage() const { return false; } + bool isSparseTexture() const { return false; } + bool isImageFootprint() const { return false; } + bool isSparseImage() const { return false; } + bool isSubgroup() const { return false; } +#else + bool isImage() const { return op > EOpImageGuardBegin && op < EOpImageGuardEnd; } + bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; } + bool isImageFootprint() const { return op > EOpImageFootprintGuardBegin && op < EOpImageFootprintGuardEnd; } + bool isSparseImage() const { return op == EOpSparseImageLoad; } + bool isSubgroup() const { return op > EOpSubgroupGuardStart && op < EOpSubgroupGuardStop; } +#endif + + void setOperationPrecision(TPrecisionQualifier p) { operationPrecision = p; } + TPrecisionQualifier getOperationPrecision() const { return operationPrecision != EpqNone ? + operationPrecision : + type.getQualifier().precision; } + TString getCompleteString() const + { + TString cs = type.getCompleteString(); + if (getOperationPrecision() != type.getQualifier().precision) { + cs += ", operation at "; + cs += GetPrecisionQualifierString(getOperationPrecision()); + } + + return cs; + } + + // Crack the op into the individual dimensions of texturing operation. + void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const + { + cracked.query = false; + cracked.proj = false; + cracked.lod = false; + cracked.fetch = false; + cracked.offset = false; + cracked.offsets = false; + cracked.gather = false; + cracked.grad = false; + cracked.subpass = false; + cracked.lodClamp = false; + cracked.fragMask = false; + + switch (op) { + case EOpImageQuerySize: + case EOpImageQuerySamples: + case EOpTextureQuerySize: + case EOpTextureQueryLod: + case EOpTextureQueryLevels: + case EOpTextureQuerySamples: + case EOpSparseTexelsResident: + cracked.query = true; + break; + case EOpTexture: + case EOpSparseTexture: + break; + case EOpTextureProj: + cracked.proj = true; + break; + case EOpTextureLod: + case EOpSparseTextureLod: + cracked.lod = true; + break; + case EOpTextureOffset: + case EOpSparseTextureOffset: + cracked.offset = true; + break; + case EOpTextureFetch: + case EOpSparseTextureFetch: + cracked.fetch = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureFetchOffset: + case EOpSparseTextureFetchOffset: + cracked.fetch = true; + cracked.offset = true; + if (sampler.is1D() || (sampler.dim == Esd2D && ! sampler.isMultiSample()) || sampler.dim == Esd3D) + cracked.lod = true; + break; + case EOpTextureProjOffset: + cracked.offset = true; + cracked.proj = true; + break; + case EOpTextureLodOffset: + case EOpSparseTextureLodOffset: + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureProjLod: + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureProjLodOffset: + cracked.offset = true; + cracked.lod = true; + cracked.proj = true; + break; + case EOpTextureGrad: + case EOpSparseTextureGrad: + cracked.grad = true; + break; + case EOpTextureGradOffset: + case EOpSparseTextureGradOffset: + cracked.grad = true; + cracked.offset = true; + break; + case EOpTextureProjGrad: + cracked.grad = true; + cracked.proj = true; + break; + case EOpTextureProjGradOffset: + cracked.grad = true; + cracked.offset = true; + cracked.proj = true; + break; +#ifndef GLSLANG_WEB + case EOpTextureClamp: + case EOpSparseTextureClamp: + cracked.lodClamp = true; + break; + case EOpTextureOffsetClamp: + case EOpSparseTextureOffsetClamp: + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGradClamp: + case EOpSparseTextureGradClamp: + cracked.grad = true; + cracked.lodClamp = true; + break; + case EOpTextureGradOffsetClamp: + case EOpSparseTextureGradOffsetClamp: + cracked.grad = true; + cracked.offset = true; + cracked.lodClamp = true; + break; + case EOpTextureGather: + case EOpSparseTextureGather: + cracked.gather = true; + break; + case EOpTextureGatherOffset: + case EOpSparseTextureGatherOffset: + cracked.gather = true; + cracked.offset = true; + break; + case EOpTextureGatherOffsets: + case EOpSparseTextureGatherOffsets: + cracked.gather = true; + cracked.offsets = true; + break; + case EOpTextureGatherLod: + case EOpSparseTextureGatherLod: + cracked.gather = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffset: + case EOpSparseTextureGatherLodOffset: + cracked.gather = true; + cracked.offset = true; + cracked.lod = true; + break; + case EOpTextureGatherLodOffsets: + case EOpSparseTextureGatherLodOffsets: + cracked.gather = true; + cracked.offsets = true; + cracked.lod = true; + break; + case EOpImageLoadLod: + case EOpImageStoreLod: + case EOpSparseImageLoadLod: + cracked.lod = true; + break; + case EOpFragmentMaskFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpFragmentFetch: + cracked.subpass = sampler.dim == EsdSubpass; + cracked.fragMask = true; + break; + case EOpImageSampleFootprintNV: + break; + case EOpImageSampleFootprintClampNV: + cracked.lodClamp = true; + break; + case EOpImageSampleFootprintLodNV: + cracked.lod = true; + break; + case EOpImageSampleFootprintGradNV: + cracked.grad = true; + break; + case EOpImageSampleFootprintGradClampNV: + cracked.lodClamp = true; + cracked.grad = true; + break; + case EOpSubpassLoad: + case EOpSubpassLoadMS: + cracked.subpass = true; + break; +#endif + default: + break; + } + } + +protected: + TIntermOperator(TOperator o) : TIntermTyped(EbtFloat), op(o), operationPrecision(EpqNone) {} + TIntermOperator(TOperator o, TType& t) : TIntermTyped(t), op(o), operationPrecision(EpqNone) {} + TOperator op; + // The result precision is in the inherited TType, and is usually meant to be both + // the operation precision and the result precision. However, some more complex things, + // like built-in function calls, distinguish between the two, in which case non-EqpNone + // 'operationPrecision' overrides the result precision as far as operation precision + // is concerned. + TPrecisionQualifier operationPrecision; +}; + +// +// Nodes for all the basic binary math operators. +// +class TIntermBinary : public TIntermOperator { +public: + TIntermBinary(TOperator o) : TIntermOperator(o) {} + virtual void traverse(TIntermTraverser*); + virtual void setLeft(TIntermTyped* n) { left = n; } + virtual void setRight(TIntermTyped* n) { right = n; } + virtual TIntermTyped* getLeft() const { return left; } + virtual TIntermTyped* getRight() const { return right; } + virtual TIntermBinary* getAsBinaryNode() { return this; } + virtual const TIntermBinary* getAsBinaryNode() const { return this; } + virtual void updatePrecision(); +protected: + TIntermTyped* left; + TIntermTyped* right; +}; + +// +// Nodes for unary math operators. +// +class TIntermUnary : public TIntermOperator { +public: + TIntermUnary(TOperator o, TType& t) : TIntermOperator(o, t), operand(0) {} + TIntermUnary(TOperator o) : TIntermOperator(o), operand(0) {} + virtual void traverse(TIntermTraverser*); + virtual void setOperand(TIntermTyped* o) { operand = o; } + virtual TIntermTyped* getOperand() { return operand; } + virtual const TIntermTyped* getOperand() const { return operand; } + virtual TIntermUnary* getAsUnaryNode() { return this; } + virtual const TIntermUnary* getAsUnaryNode() const { return this; } + virtual void updatePrecision(); +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermTyped* operand; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +typedef TVector TIntermSequence; +typedef TVector TQualifierList; +// +// Nodes that operate on an arbitrary sized set of children. +// +class TIntermAggregate : public TIntermOperator { +public: + TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(nullptr) { } + TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(nullptr) { } + ~TIntermAggregate() { delete pragmaTable; } + virtual TIntermAggregate* getAsAggregate() { return this; } + virtual const TIntermAggregate* getAsAggregate() const { return this; } + virtual void updatePrecision(); + virtual void setOperator(TOperator o) { op = o; } + virtual TIntermSequence& getSequence() { return sequence; } + virtual const TIntermSequence& getSequence() const { return sequence; } + virtual void setName(const TString& n) { name = n; } + virtual const TString& getName() const { return name; } + virtual void traverse(TIntermTraverser*); + virtual void setUserDefined() { userDefined = true; } + virtual bool isUserDefined() { return userDefined; } + virtual TQualifierList& getQualifierList() { return qualifier; } + virtual const TQualifierList& getQualifierList() const { return qualifier; } + void setOptimize(bool o) { optimize = o; } + void setDebug(bool d) { debug = d; } + bool getOptimize() const { return optimize; } + bool getDebug() const { return debug; } + void setPragmaTable(const TPragmaTable& pTable); + const TPragmaTable& getPragmaTable() const { return *pragmaTable; } +#ifndef GLSLANG_WEB + void setSpirvInstruction(const TSpirvInstruction& inst) { spirvInst = inst; } + const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif +protected: + TIntermAggregate(const TIntermAggregate&); // disallow copy constructor + TIntermAggregate& operator=(const TIntermAggregate&); // disallow assignment operator + TIntermSequence sequence; + TQualifierList qualifier; + TString name; + bool userDefined; // used for user defined function names + bool optimize; + bool debug; + TPragmaTable* pragmaTable; +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; +#endif +}; + +// +// For if tests. +// +class TIntermSelection : public TIntermTyped { +public: + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB) : + TIntermTyped(EbtVoid), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB, const TType& type) : + TIntermTyped(type), condition(cond), trueBlock(trueB), falseBlock(falseB), + shortCircuit(true), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermTyped* getCondition() const { return condition; } + virtual void setCondition(TIntermTyped* c) { condition = c; } + virtual TIntermNode* getTrueBlock() const { return trueBlock; } + virtual void setTrueBlock(TIntermTyped* tb) { trueBlock = tb; } + virtual TIntermNode* getFalseBlock() const { return falseBlock; } + virtual void setFalseBlock(TIntermTyped* fb) { falseBlock = fb; } + virtual TIntermSelection* getAsSelectionNode() { return this; } + virtual const TIntermSelection* getAsSelectionNode() const { return this; } + + void setNoShortCircuit() { shortCircuit = false; } + bool getShortCircuit() const { return shortCircuit; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermNode* trueBlock; + TIntermNode* falseBlock; + bool shortCircuit; // normally all if-then-else and all GLSL ?: short-circuit, but HLSL ?: does not + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +// +// For switch statements. Designed use is that a switch will have sequence of nodes +// that are either case/default nodes or a *single* node that represents all the code +// in between (if any) consecutive case/defaults. So, a traversal need only deal with +// 0 or 1 nodes per case/default statement. +// +class TIntermSwitch : public TIntermNode { +public: + TIntermSwitch(TIntermTyped* cond, TIntermAggregate* b) : condition(cond), body(b), + flatten(false), dontFlatten(false) {} + virtual void traverse(TIntermTraverser*); + virtual TIntermNode* getCondition() const { return condition; } + virtual TIntermAggregate* getBody() const { return body; } + virtual TIntermSwitch* getAsSwitchNode() { return this; } + virtual const TIntermSwitch* getAsSwitchNode() const { return this; } + + void setFlatten() { flatten = true; } + void setDontFlatten() { dontFlatten = true; } + bool getFlatten() const { return flatten; } + bool getDontFlatten() const { return dontFlatten; } + +protected: + TIntermTyped* condition; + TIntermAggregate* body; + bool flatten; // true if flatten requested + bool dontFlatten; // true if requested to not flatten +}; + +enum TVisit +{ + EvPreVisit, + EvInVisit, + EvPostVisit +}; + +// +// For traversing the tree. User should derive from this, +// put their traversal specific data in it, and then pass +// it to a Traverse method. +// +// When using this, just fill in the methods for nodes you want visited. +// Return false from a pre-visit to skip visiting that node's subtree. +// +// Explicitly set postVisit to true if you want post visiting, otherwise, +// filled in methods will only be called at pre-visit time (before processing +// the subtree). Similarly for inVisit for in-order visiting of nodes with +// multiple children. +// +// If you only want post-visits, explicitly turn off preVisit (and inVisit) +// and turn on postVisit. +// +// In general, for the visit*() methods, return true from interior nodes +// to have the traversal continue on to children. +// +// If you process children yourself, or don't want them processed, return false. +// +class TIntermTraverser { +public: + POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) + TIntermTraverser(bool preVisit = true, bool inVisit = false, bool postVisit = false, bool rightToLeft = false) : + preVisit(preVisit), + inVisit(inVisit), + postVisit(postVisit), + rightToLeft(rightToLeft), + depth(0), + maxDepth(0) { } + virtual ~TIntermTraverser() { } + + virtual void visitSymbol(TIntermSymbol*) { } + virtual void visitConstantUnion(TIntermConstantUnion*) { } + virtual bool visitBinary(TVisit, TIntermBinary*) { return true; } + virtual bool visitUnary(TVisit, TIntermUnary*) { return true; } + virtual bool visitSelection(TVisit, TIntermSelection*) { return true; } + virtual bool visitAggregate(TVisit, TIntermAggregate*) { return true; } + virtual bool visitLoop(TVisit, TIntermLoop*) { return true; } + virtual bool visitBranch(TVisit, TIntermBranch*) { return true; } + virtual bool visitSwitch(TVisit, TIntermSwitch*) { return true; } + + int getMaxDepth() const { return maxDepth; } + + void incrementDepth(TIntermNode *current) + { + depth++; + maxDepth = (std::max)(maxDepth, depth); + path.push_back(current); + } + + void decrementDepth() + { + depth--; + path.pop_back(); + } + + TIntermNode *getParentNode() + { + return path.size() == 0 ? NULL : path.back(); + } + + const bool preVisit; + const bool inVisit; + const bool postVisit; + const bool rightToLeft; + +protected: + TIntermTraverser& operator=(TIntermTraverser&); + + int depth; + int maxDepth; + + // All the nodes from root to the current node's parent during traversing. + TVector path; +}; + +// KHR_vulkan_glsl says "Two arrays sized with specialization constants are the same type only if +// sized with the same symbol, involving no operations" +inline bool SameSpecializationConstants(TIntermTyped* node1, TIntermTyped* node2) +{ + return node1->getAsSymbolNode() && node2->getAsSymbolNode() && + node1->getAsSymbolNode()->getId() == node2->getAsSymbolNode()->getId(); +} + +} // end namespace glslang + +#endif // __INTERMEDIATE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Initialize.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Initialize.h new file mode 100644 index 0000000..ac8ec33 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Initialize.h @@ -0,0 +1,112 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _INITIALIZE_INCLUDED_ +#define _INITIALIZE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../Include/Common.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "Versions.h" + +namespace glslang { + +// +// This is made to hold parseable strings for almost all the built-in +// functions and variables for one specific combination of version +// and profile. (Some still need to be added programmatically.) +// This is a base class for language-specific derivations, which +// can be used for language independent builtins. +// +// The strings are organized by +// commonBuiltins: intersection of all stages' built-ins, processed just once +// stageBuiltins[]: anything a stage needs that's not in commonBuiltins +// +class TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltInParseables(); + virtual ~TBuiltInParseables(); + virtual void initialize(int version, EProfile, const SpvVersion& spvVersion) = 0; + virtual void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage) = 0; + virtual const TString& getCommonString() const { return commonBuiltins; } + virtual const TString& getStageString(EShLanguage language) const { return stageBuiltins[language]; } + + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable) = 0; + virtual void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources) = 0; + +protected: + TString commonBuiltins; + TString stageBuiltins[EShLangCount]; +}; + +// +// This is a GLSL specific derivation of TBuiltInParseables. To present a stable +// interface and match other similar code, it is called TBuiltIns, rather +// than TBuiltInParseablesGlsl. +// +class TBuiltIns : public TBuiltInParseables { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TBuiltIns(); + virtual ~TBuiltIns(); + void initialize(int version, EProfile, const SpvVersion& spvVersion); + void initialize(const TBuiltInResource& resources, int version, EProfile, const SpvVersion& spvVersion, EShLanguage); + + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable); + void identifyBuiltIns(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language, TSymbolTable& symbolTable, const TBuiltInResource &resources); + +protected: + void addTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion); + void relateTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage, TSymbolTable&); + void add2ndGenerationSamplingImaging(int version, EProfile profile, const SpvVersion& spvVersion); + void addSubpassSampling(TSampler, const TString& typeName, int version, EProfile profile); + void addQueryFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addImageFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addSamplingFunctions(TSampler, const TString& typeName, int version, EProfile profile); + void addGatherFunctions(TSampler, const TString& typeName, int version, EProfile profile); + + // Helpers for making textual representations of the permutations + // of texturing/imaging functions. + const char* postfixes[5]; + const char* prefixes[EbtNumTypes]; + int dimMap[EsdNumDims]; +}; + +} // end namespace glslang + +#endif // _INITIALIZE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/LiveTraverser.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/LiveTraverser.h new file mode 100644 index 0000000..9b39b59 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/LiveTraverser.h @@ -0,0 +1,168 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +#include "../Include/Common.h" +#include "reflection.h" +#include "localintermediate.h" + +#include "gl_types.h" + +#include +#include + +namespace glslang { + +// +// The traverser: mostly pass through, except +// - processing function-call nodes to push live functions onto the stack of functions to process +// - processing selection nodes to trim semantically dead code +// +// This is in the glslang namespace directly so it can be a friend of TReflection. +// This can be derived from to implement reflection database traversers or +// binding mappers: anything that wants to traverse the live subset of the tree. +// + +class TLiveTraverser : public TIntermTraverser { +public: + TLiveTraverser(const TIntermediate& i, bool traverseAll = false, + bool preVisit = true, bool inVisit = false, bool postVisit = false) : + TIntermTraverser(preVisit, inVisit, postVisit), + intermediate(i), traverseAll(traverseAll) + { } + + // + // Given a function name, find its subroot in the tree, and push it onto the stack of + // functions left to process. + // + void pushFunction(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpFunction && candidate->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + + void pushGlobalReference(const TString& name) + { + TIntermSequence& globals = intermediate.getTreeRoot()->getAsAggregate()->getSequence(); + for (unsigned int f = 0; f < globals.size(); ++f) { + TIntermAggregate* candidate = globals[f]->getAsAggregate(); + if (candidate && candidate->getOp() == EOpSequence && + candidate->getSequence().size() == 1 && + candidate->getSequence()[0]->getAsBinaryNode()) { + TIntermBinary* binary = candidate->getSequence()[0]->getAsBinaryNode(); + TIntermSymbol* symbol = binary->getLeft()->getAsSymbolNode(); + if (symbol && symbol->getQualifier().storage == EvqGlobal && + symbol->getName() == name) { + destinations.push_back(candidate); + break; + } + } + } + } + + typedef std::list TDestinationStack; + TDestinationStack destinations; + +protected: + // To catch which function calls are not dead, and hence which functions must be visited. + virtual bool visitAggregate(TVisit, TIntermAggregate* node) + { + if (!traverseAll) + if (node->getOp() == EOpFunctionCall) + addFunctionCall(node); + + return true; // traverse this subtree + } + + // To prune semantically dead paths. + virtual bool visitSelection(TVisit /* visit */, TIntermSelection* node) + { + if (traverseAll) + return true; // traverse all code + + TIntermConstantUnion* constant = node->getCondition()->getAsConstantUnion(); + if (constant) { + // cull the path that is dead + if (constant->getConstArray()[0].getBConst() == true && node->getTrueBlock()) + node->getTrueBlock()->traverse(this); + if (constant->getConstArray()[0].getBConst() == false && node->getFalseBlock()) + node->getFalseBlock()->traverse(this); + + return false; // don't traverse any more, we did it all above + } else + return true; // traverse the whole subtree + } + + // Track live functions as well as uniforms, so that we don't visit dead functions + // and only visit each function once. + void addFunctionCall(TIntermAggregate* call) + { + // just use the map to ensure we process each function at most once + if (liveFunctions.find(call->getName()) == liveFunctions.end()) { + liveFunctions.insert(call->getName()); + pushFunction(call->getName()); + } + } + + void addGlobalReference(const TString& name) + { + // just use the map to ensure we process each global at most once + if (liveGlobals.find(name) == liveGlobals.end()) { + liveGlobals.insert(name); + pushGlobalReference(name); + } + } + + const TIntermediate& intermediate; + typedef std::unordered_set TLiveFunctions; + TLiveFunctions liveFunctions; + typedef std::unordered_set TLiveGlobals; + TLiveGlobals liveGlobals; + bool traverseAll; + +private: + // prevent copy & copy construct + TLiveTraverser(TLiveTraverser&); + TLiveTraverser& operator=(TLiveTraverser&); +}; + +} // namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/ParseHelper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/ParseHelper.h new file mode 100644 index 0000000..885fd90 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/ParseHelper.h @@ -0,0 +1,588 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This header defines a two-level parse-helper hierarchy, derived from +// TParseVersions: +// - TParseContextBase: sharable across multiple parsers +// - TParseContext: GLSL specific helper +// + +#ifndef _PARSER_HELPER_INCLUDED_ +#define _PARSER_HELPER_INCLUDED_ + +#include +#include + +#include "parseVersions.h" +#include "../Include/ShHandle.h" +#include "SymbolTable.h" +#include "localintermediate.h" +#include "Scan.h" +#include "attribute.h" + +namespace glslang { + +struct TPragma { + TPragma(bool o, bool d) : optimize(o), debug(d) { } + bool optimize; + bool debug; + TPragmaTable pragmaTable; +}; + +class TScanContext; +class TPpContext; + +typedef std::set TIdSetType; +typedef std::map> TStructRecord; + +// +// Sharable code (as well as what's in TParseVersions) across +// parse helpers. +// +class TParseContextBase : public TParseVersions { +public: + TParseContextBase(TSymbolTable& symbolTable, TIntermediate& interm, bool parsingBuiltins, int version, + EProfile profile, const SpvVersion& spvVersion, EShLanguage language, + TInfoSink& infoSink, bool forwardCompatible, EShMessages messages, + const TString* entryPoint = nullptr) + : TParseVersions(interm, version, profile, spvVersion, language, infoSink, forwardCompatible, messages), + scopeMangler("::"), + symbolTable(symbolTable), + statementNestingLevel(0), loopNestingLevel(0), structNestingLevel(0), blockNestingLevel(0), controlFlowNestingLevel(0), + currentFunctionType(nullptr), + postEntryPointReturn(false), + contextPragma(true, false), + beginInvocationInterlockCount(0), endInvocationInterlockCount(0), + parsingBuiltins(parsingBuiltins), scanContext(nullptr), ppContext(nullptr), + limits(resources.limits), + globalUniformBlock(nullptr), + globalUniformBinding(TQualifier::layoutBindingEnd), + globalUniformSet(TQualifier::layoutSetEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) + { + if (entryPoint != nullptr) + sourceEntryPointName = *entryPoint; + } + virtual ~TParseContextBase() { } + +#if !defined(GLSLANG_WEB) || defined(GLSLANG_WEB_DEVEL) + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...); +#endif + + virtual void setLimits(const TBuiltInResource&) = 0; + + void checkIndex(const TSourceLoc&, const TType&, int& index); + + EShLanguage getLanguage() const { return language; } + void setScanContext(TScanContext* c) { scanContext = c; } + TScanContext* getScanContext() const { return scanContext; } + void setPpContext(TPpContext* c) { ppContext = c; } + TPpContext* getPpContext() const { return ppContext; } + + virtual void setLineCallback(const std::function& func) { lineCallback = func; } + virtual void setExtensionCallback(const std::function& func) { extensionCallback = func; } + virtual void setVersionCallback(const std::function& func) { versionCallback = func; } + virtual void setPragmaCallback(const std::function&)>& func) { pragmaCallback = func; } + virtual void setErrorCallback(const std::function& func) { errorCallback = func; } + + virtual void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) = 0; + virtual bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) = 0; + virtual bool lineDirectiveShouldSetNextLine() const = 0; + virtual void handlePragma(const TSourceLoc&, const TVector&) = 0; + + virtual bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) = 0; + + virtual void notifyVersion(int line, int version, const char* type_string) + { + if (versionCallback) + versionCallback(line, version, type_string); + } + virtual void notifyErrorDirective(int line, const char* error_message) + { + if (errorCallback) + errorCallback(line, error_message); + } + virtual void notifyLineDirective(int curLineNo, int newLineNo, bool hasSource, int sourceNum, const char* sourceName) + { + if (lineCallback) + lineCallback(curLineNo, newLineNo, hasSource, sourceNum, sourceName); + } + virtual void notifyExtensionDirective(int line, const char* extension, const char* behavior) + { + if (extensionCallback) + extensionCallback(line, extension, behavior); + } + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Manage global buffer (used for backing atomic counters in GLSL when using relaxed Vulkan semantics) + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr); + + // Potentially rename shader entry point function + void renameShaderFunction(TString*& name) const + { + // Replace the entry point name given in the shader with the real entry point name, + // if there is a substitution. + if (name != nullptr && *name == sourceEntryPointName && intermediate.getEntryPointName().size() > 0) + name = NewPoolTString(intermediate.getEntryPointName().c_str()); + } + + virtual bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + virtual void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*); + + const char* const scopeMangler; + + // Basic parsing state, easily accessible to the grammar + + TSymbolTable& symbolTable; // symbol table that goes with the current language, version, and profile + int statementNestingLevel; // 0 if outside all flow control or compound statements + int loopNestingLevel; // 0 if outside all loops + int structNestingLevel; // 0 if outside structures + int blockNestingLevel; // 0 if outside blocks + int controlFlowNestingLevel; // 0 if outside all flow control + const TType* currentFunctionType; // the return type of the function that's currently being parsed + bool functionReturnsValue; // true if a non-void function has a return + // if inside a function, true if the function is the entry point and this is after a return statement + bool postEntryPointReturn; + // case, node, case, case, node, ...; ensure only one node between cases; stack of them for nesting + TList switchSequenceStack; + // the statementNestingLevel the current switch statement is at, which must match the level of its case statements + TList switchLevel; + struct TPragma contextPragma; + int beginInvocationInterlockCount; + int endInvocationInterlockCount; + +protected: + TParseContextBase(TParseContextBase&); + TParseContextBase& operator=(TParseContextBase&); + + const bool parsingBuiltins; // true if parsing built-in symbols/functions + TVector linkageSymbols; // will be transferred to 'linkage', after all editing is done, order preserving + TScanContext* scanContext; + TPpContext* ppContext; + TBuiltInResource resources; + TLimits& limits; + TString sourceEntryPointName; + + // These, if set, will be called when a line, pragma ... is preprocessed. + // They will be called with any parameters to the original directive. + std::function lineCallback; + std::function&)> pragmaCallback; + std::function versionCallback; + std::function extensionCallback; + std::function errorCallback; + + // see implementation for detail + const TFunction* selectFunction(const TVector, const TFunction&, + std::function, + std::function, + /* output */ bool& tie); + + virtual void parseSwizzleSelector(const TSourceLoc&, const TString&, int size, + TSwizzleSelectors&); + + // Manage the global uniform block (default uniforms in GLSL, $Global in HLSL) + TVariable* globalUniformBlock; // the actual block, inserted into the symbol table + unsigned int globalUniformBinding; // the block's binding number + unsigned int globalUniformSet; // the block's set number + int firstNewMember; // the index of the first member not yet inserted into the symbol table + // override this to set the language-specific name + virtual const char* getGlobalUniformBlockName() const { return ""; } + virtual void setUniformBlockDefaults(TType&) const { } + virtual void finalizeGlobalUniformBlockLayout(TVariable&) {} + + // Manage the atomic counter block (used for atomic_uints with Vulkan-Relaxed) + TMap atomicCounterBuffers; + unsigned int atomicCounterBlockSet; + TMap atomicCounterBlockFirstNewMember; + // override this to set the language-specific name + virtual const char* getAtomicCounterBlockName() const { return ""; } + virtual void setAtomicCounterBlockDefaults(TType&) const {} + virtual void setInvariant(const TSourceLoc&, const char*) {} + virtual void finalizeAtomicCounterBlockLayout(TVariable&) {} + bool isAtomicCounterBlock(const TSymbol& symbol) { + const TVariable* var = symbol.getAsVariable(); + if (!var) + return false; + const auto& at = atomicCounterBuffers.find(var->getType().getQualifier().layoutBinding); + return (at != atomicCounterBuffers.end() && (*at).second->getType() == var->getType()); + } + + virtual void outputMessage(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, TPrefixType prefix, + va_list args); + virtual void trackLinkage(TSymbol& symbol); + virtual void makeEditable(TSymbol*&); + virtual TVariable* getEditableVariable(const char* name); + virtual void finish(); +}; + +// +// Manage the state for when to respect precision qualifiers and when to warn about +// the defaults being different than might be expected. +// +class TPrecisionManager { +public: + TPrecisionManager() : obey(false), warn(false), explicitIntDefault(false), explicitFloatDefault(false){ } + virtual ~TPrecisionManager() {} + + void respectPrecisionQualifiers() { obey = true; } + bool respectingPrecisionQualifiers() const { return obey; } + bool shouldWarnAboutDefaults() const { return warn; } + void defaultWarningGiven() { warn = false; } + void warnAboutDefaults() { warn = true; } + void explicitIntDefaultSeen() + { + explicitIntDefault = true; + if (explicitFloatDefault) + warn = false; + } + void explicitFloatDefaultSeen() + { + explicitFloatDefault = true; + if (explicitIntDefault) + warn = false; + } + +protected: + bool obey; // respect precision qualifiers + bool warn; // need to give a warning about the defaults + bool explicitIntDefault; // user set the default for int/uint + bool explicitFloatDefault; // user set the default for float +}; + +// +// GLSL-specific parse helper. Should have GLSL in the name, but that's +// too big of a change for comparing branches at the moment, and perhaps +// impacts downstream consumers as well. +// +class TParseContext : public TParseContextBase { +public: + TParseContext(TSymbolTable&, TIntermediate&, bool parsingBuiltins, int version, EProfile, const SpvVersion& spvVersion, EShLanguage, TInfoSink&, + bool forwardCompatible = false, EShMessages messages = EShMsgDefault, + const TString* entryPoint = nullptr); + virtual ~TParseContext(); + + bool obeyPrecisionQualifiers() const { return precisionManager.respectingPrecisionQualifiers(); } + void setPrecisionDefaults(); + + void setLimits(const TBuiltInResource&) override; + bool parseShaderStrings(TPpContext&, TInputScanner& input, bool versionWillBeError = false) override; + void parserError(const char* s); // for bison's yyerror + + virtual void growGlobalUniformBlock(const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + virtual void growAtomicCounterBlock(int binding, const TSourceLoc&, TType&, const TString& memberName, TTypeList* typeList = nullptr) override; + + void reservedErrorCheck(const TSourceLoc&, const TString&); + void reservedPpErrorCheck(const TSourceLoc&, const char* name, const char* op) override; + bool lineContinuationCheck(const TSourceLoc&, bool endOfComment) override; + bool lineDirectiveShouldSetNextLine() const override; + bool builtInName(const TString&); + + void handlePragma(const TSourceLoc&, const TVector&) override; + TIntermTyped* handleVariable(const TSourceLoc&, TSymbol* symbol, const TString* string); + TIntermTyped* handleBracketDereference(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + void handleIndexLimits(const TSourceLoc&, TIntermTyped* base, TIntermTyped* index); + +#ifndef GLSLANG_WEB + void makeEditable(TSymbol*&) override; + void ioArrayCheck(const TSourceLoc&, const TType&, const TString& identifier); +#endif + bool isIoResizeArray(const TType&) const; + void fixIoArraySize(const TSourceLoc&, TType&); + void handleIoResizeArrayAccess(const TSourceLoc&, TIntermTyped* base); + void checkIoArraysConsistency(const TSourceLoc&, bool tailOnly = false); + int getIoArrayImplicitSize(const TQualifier&, TString* featureString = nullptr) const; + void checkIoArrayConsistency(const TSourceLoc&, int requiredSize, const char* feature, TType&, const TString&); + + TIntermTyped* handleBinaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* left, TIntermTyped* right); + TIntermTyped* handleUnaryMath(const TSourceLoc&, const char* str, TOperator op, TIntermTyped* childNode); + TIntermTyped* handleDotDereference(const TSourceLoc&, TIntermTyped* base, const TString& field); + TIntermTyped* handleDotSwizzle(const TSourceLoc&, TIntermTyped* base, const TString& field); + void blockMemberExtensionCheck(const TSourceLoc&, const TIntermTyped* base, int member, const TString& memberName); + TFunction* handleFunctionDeclarator(const TSourceLoc&, TFunction& function, bool prototype); + TIntermAggregate* handleFunctionDefinition(const TSourceLoc&, TFunction&); + TIntermTyped* handleFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + TIntermTyped* handleBuiltInFunctionCall(TSourceLoc, TIntermNode* arguments, const TFunction& function); + void computeBuiltinPrecisions(TIntermTyped&, const TFunction&); + TIntermNode* handleReturnValue(const TSourceLoc&, TIntermTyped*); + void checkLocation(const TSourceLoc&, TOperator); + TIntermTyped* handleLengthMethod(const TSourceLoc&, TFunction*, TIntermNode*); + void addInputArgumentConversions(const TFunction&, TIntermNode*&) const; + TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermAggregate&) const; + TIntermTyped* addAssign(const TSourceLoc&, TOperator op, TIntermTyped* left, TIntermTyped* right); + void builtInOpCheck(const TSourceLoc&, const TFunction&, TIntermOperator&); + void nonOpBuiltInCheck(const TSourceLoc&, const TFunction&, TIntermAggregate&); + void userFunctionCallCheck(const TSourceLoc&, TIntermAggregate&); + void samplerConstructorLocationCheck(const TSourceLoc&, const char* token, TIntermNode*); + TFunction* handleConstructorCall(const TSourceLoc&, const TPublicType&); + void handlePrecisionQualifier(const TSourceLoc&, TQualifier&, TPrecisionQualifier); + void checkPrecisionQualifier(const TSourceLoc&, TPrecisionQualifier); + void memorySemanticsCheck(const TSourceLoc&, const TFunction&, const TIntermOperator& callNode); + + TIntermTyped* vkRelaxedRemapFunctionCall(const TSourceLoc&, TFunction*, TIntermNode*); + // returns true if the variable was remapped to something else + bool vkRelaxedRemapUniformVariable(const TSourceLoc&, TString&, const TPublicType&, TArraySizes*, TIntermTyped*, TType&); + + void assignError(const TSourceLoc&, const char* op, TString left, TString right); + void unaryOpError(const TSourceLoc&, const char* op, TString operand); + void binaryOpError(const TSourceLoc&, const char* op, TString left, TString right); + void variableCheck(TIntermTyped*& nodePtr); + bool lValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void rValueErrorCheck(const TSourceLoc&, const char* op, TIntermTyped*) override; + void constantValueCheck(TIntermTyped* node, const char* token); + void integerCheck(const TIntermTyped* node, const char* token); + void globalCheck(const TSourceLoc&, const char* token); + bool constructorError(const TSourceLoc&, TIntermNode*, TFunction&, TOperator, TType&); + bool constructorTextureSamplerError(const TSourceLoc&, const TFunction&); + void arraySizeCheck(const TSourceLoc&, TIntermTyped* expr, TArraySize&, const char *sizeType); + bool arrayQualifierError(const TSourceLoc&, const TQualifier&); + bool arrayError(const TSourceLoc&, const TType&); + void arraySizeRequiredCheck(const TSourceLoc&, const TArraySizes&); + void structArrayCheck(const TSourceLoc&, const TType& structure); + void arraySizesCheck(const TSourceLoc&, const TQualifier&, TArraySizes*, const TIntermTyped* initializer, bool lastMember); + void arrayOfArrayVersionCheck(const TSourceLoc&, const TArraySizes*); + bool voidErrorCheck(const TSourceLoc&, const TString&, TBasicType); + void boolCheck(const TSourceLoc&, const TIntermTyped*); + void boolCheck(const TSourceLoc&, const TPublicType&); + void samplerCheck(const TSourceLoc&, const TType&, const TString& identifier, TIntermTyped* initializer); + void atomicUintCheck(const TSourceLoc&, const TType&, const TString& identifier); + void accStructCheck(const TSourceLoc & loc, const TType & type, const TString & identifier); + void transparentOpaqueCheck(const TSourceLoc&, const TType&, const TString& identifier); + void memberQualifierCheck(glslang::TPublicType&); + void globalQualifierFixCheck(const TSourceLoc&, TQualifier&, bool isMemberCheck = false); + void globalQualifierTypeCheck(const TSourceLoc&, const TQualifier&, const TPublicType&); + bool structQualifierErrorCheck(const TSourceLoc&, const TPublicType& pType); + void mergeQualifiers(const TSourceLoc&, TQualifier& dst, const TQualifier& src, bool force); + void setDefaultPrecision(const TSourceLoc&, TPublicType&, TPrecisionQualifier); + int computeSamplerTypeIndex(TSampler&); + TPrecisionQualifier getDefaultPrecision(TPublicType&); + void precisionQualifierCheck(const TSourceLoc&, TBasicType, TQualifier&); + void parameterTypeCheck(const TSourceLoc&, TStorageQualifier qualifier, const TType& type); + bool containsFieldWithBasicType(const TType& type ,TBasicType basicType); + TSymbol* redeclareBuiltinVariable(const TSourceLoc&, const TString&, const TQualifier&, const TShaderQualifiers&); + void redeclareBuiltinBlock(const TSourceLoc&, TTypeList& typeList, const TString& blockName, const TString* instanceName, TArraySizes* arraySizes); + void paramCheckFixStorage(const TSourceLoc&, const TStorageQualifier&, TType& type); + void paramCheckFix(const TSourceLoc&, const TQualifier&, TType& type); + void nestedBlockCheck(const TSourceLoc&); + void nestedStructCheck(const TSourceLoc&); + void arrayObjectCheck(const TSourceLoc&, const TType&, const char* op); + void opaqueCheck(const TSourceLoc&, const TType&, const char* op); + void referenceCheck(const TSourceLoc&, const TType&, const char* op); + void storage16BitAssignmentCheck(const TSourceLoc&, const TType&, const char* op); + void specializationCheck(const TSourceLoc&, const TType&, const char* op); + void structTypeCheck(const TSourceLoc&, TPublicType&); + void inductiveLoopCheck(const TSourceLoc&, TIntermNode* init, TIntermLoop* loop); + void arrayLimitCheck(const TSourceLoc&, const TString&, int size); + void limitCheck(const TSourceLoc&, int value, const char* limit, const char* feature); + + void inductiveLoopBodyCheck(TIntermNode*, long long loopIndexId, TSymbolTable&); + void constantIndexExpressionCheck(TIntermNode*); + + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&); + void setLayoutQualifier(const TSourceLoc&, TPublicType&, TString&, const TIntermTyped*); + void mergeObjectLayoutQualifiers(TQualifier& dest, const TQualifier& src, bool inheritOnly); + void layoutObjectCheck(const TSourceLoc&, const TSymbol&); + void layoutMemberLocationArrayCheck(const TSourceLoc&, bool memberWithLocation, TArraySizes* arraySizes); + void layoutTypeCheck(const TSourceLoc&, const TType&); + void layoutQualifierCheck(const TSourceLoc&, const TQualifier&); + void checkNoShaderLayouts(const TSourceLoc&, const TShaderQualifiers&); + void fixOffset(const TSourceLoc&, TSymbol&); + + const TFunction* findFunction(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExact(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction120(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunction400(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + const TFunction* findFunctionExplicitTypes(const TSourceLoc& loc, const TFunction& call, bool& builtIn); + void declareTypeDefaults(const TSourceLoc&, const TPublicType&); + TIntermNode* declareVariable(const TSourceLoc&, TString& identifier, const TPublicType&, TArraySizes* typeArray = 0, TIntermTyped* initializer = 0); + TIntermTyped* addConstructor(const TSourceLoc&, TIntermNode*, const TType&); + TIntermTyped* constructAggregate(TIntermNode*, const TType&, int, const TSourceLoc&); + TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermTyped*, const TSourceLoc&, bool subset); + void inheritMemoryQualifiers(const TQualifier& from, TQualifier& to); + void declareBlock(const TSourceLoc&, TTypeList& typeList, const TString* instanceName = 0, TArraySizes* arraySizes = 0); + void blockStorageRemap(const TSourceLoc&, const TString*, TQualifier&); + void blockStageIoCheck(const TSourceLoc&, const TQualifier&); + void blockQualifierCheck(const TSourceLoc&, const TQualifier&, bool instanceName); + void fixBlockLocations(const TSourceLoc&, TQualifier&, TTypeList&, bool memberWithLocation, bool memberWithoutLocation); + void fixXfbOffsets(TQualifier&, TTypeList&); + void fixBlockUniformOffsets(TQualifier&, TTypeList&); + void fixBlockUniformLayoutMatrix(TQualifier&, TTypeList*, TTypeList*); + void fixBlockUniformLayoutPacking(TQualifier&, TTypeList*, TTypeList*); + void addQualifierToExisting(const TSourceLoc&, TQualifier, const TString& identifier); + void addQualifierToExisting(const TSourceLoc&, TQualifier, TIdentifierList&); + void invariantCheck(const TSourceLoc&, const TQualifier&); + void updateStandaloneQualifierDefaults(const TSourceLoc&, const TPublicType&); + void wrapupSwitchSubsequence(TIntermAggregate* statements, TIntermNode* branchNode); + TIntermNode* addSwitch(const TSourceLoc&, TIntermTyped* expression, TIntermAggregate* body); + const TTypeList* recordStructCopy(TStructRecord&, const TType*, const TType*); + +#ifndef GLSLANG_WEB + TAttributeType attributeFromName(const TString& name) const; + TAttributes* makeAttributes(const TString& identifier) const; + TAttributes* makeAttributes(const TString& identifier, TIntermNode* node) const; + TAttributes* mergeAttributes(TAttributes*, TAttributes*) const; + + // Determine selection control from attributes + void handleSelectionAttributes(const TAttributes& attributes, TIntermNode*); + void handleSwitchAttributes(const TAttributes& attributes, TIntermNode*); + // Determine loop control from attributes + void handleLoopAttributes(const TAttributes& attributes, TIntermNode*); + // Function attributes + void handleFunctionAttributes(const TSourceLoc&, const TAttributes&); + + // GL_EXT_spirv_intrinsics + TSpirvRequirement* makeSpirvRequirement(const TSourceLoc& loc, const TString& name, + const TIntermAggregate* extensions, const TIntermAggregate* capabilities); + TSpirvRequirement* mergeSpirvRequirements(const TSourceLoc& loc, TSpirvRequirement* spirvReq1, + TSpirvRequirement* spirvReq2); + TSpirvTypeParameters* makeSpirvTypeParameters(const TSourceLoc& loc, const TIntermConstantUnion* constant); + TSpirvTypeParameters* mergeSpirvTypeParameters(TSpirvTypeParameters* spirvTypeParams1, + TSpirvTypeParameters* spirvTypeParams2); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, const TString& value); + TSpirvInstruction* makeSpirvInstruction(const TSourceLoc& loc, const TString& name, int value); + TSpirvInstruction* mergeSpirvInstruction(const TSourceLoc& loc, TSpirvInstruction* spirvInst1, + TSpirvInstruction* spirvInst2); +#endif + + void checkAndResizeMeshViewDim(const TSourceLoc&, TType&, bool isBlockMember); + +protected: + void nonInitConstCheck(const TSourceLoc&, TString& identifier, TType& type); + void inheritGlobalDefaults(TQualifier& dst) const; + TVariable* makeInternalVariable(const char* name, const TType&) const; + TVariable* declareNonArray(const TSourceLoc&, const TString& identifier, const TType&); + void declareArray(const TSourceLoc&, const TString& identifier, const TType&, TSymbol*&); + void checkRuntimeSizable(const TSourceLoc&, const TIntermTyped&); + bool isRuntimeLength(const TIntermTyped&) const; + TIntermNode* executeInitializer(const TSourceLoc&, TIntermTyped* initializer, TVariable* variable); + TIntermTyped* convertInitializerList(const TSourceLoc&, const TType&, TIntermTyped* initializer); +#ifndef GLSLANG_WEB + void finish() override; +#endif + + virtual const char* getGlobalUniformBlockName() const override; + virtual void finalizeGlobalUniformBlockLayout(TVariable&) override; + virtual void setUniformBlockDefaults(TType& block) const override; + + virtual const char* getAtomicCounterBlockName() const override; + virtual void finalizeAtomicCounterBlockLayout(TVariable&) override; + virtual void setAtomicCounterBlockDefaults(TType& block) const override; + virtual void setInvariant(const TSourceLoc& loc, const char* builtin) override; + +public: + // + // Generally, bison productions, the scanner, and the PP need read/write access to these; just give them direct access + // + + // Current state of parsing + bool inMain; // if inside a function, true if the function is main + const TString* blockName; + TQualifier currentBlockQualifier; + TPrecisionQualifier defaultPrecision[EbtNumTypes]; + TBuiltInResource resources; + TLimits& limits; + +protected: + TParseContext(TParseContext&); + TParseContext& operator=(TParseContext&); + + static const int maxSamplerIndex = EsdNumDims * (EbtNumTypes * (2 * 2 * 2 * 2 * 2)); // see computeSamplerTypeIndex() + TPrecisionQualifier defaultSamplerPrecision[maxSamplerIndex]; + TPrecisionManager precisionManager; + TQualifier globalBufferDefaults; + TQualifier globalUniformDefaults; + TQualifier globalInputDefaults; + TQualifier globalOutputDefaults; + TQualifier globalSharedDefaults; + TString currentCaller; // name of last function body entered (not valid when at global scope) +#ifndef GLSLANG_WEB + int* atomicUintOffsets; // to become an array of the right size to hold an offset per binding point + bool anyIndexLimits; + TIdSetType inductiveLoopIds; + TVector needsIndexLimitationChecking; + TStructRecord matrixFixRecord; + TStructRecord packingFixRecord; + + // + // Geometry shader input arrays: + // - array sizing is based on input primitive and/or explicit size + // + // Tessellation control output arrays: + // - array sizing is based on output layout(vertices=...) and/or explicit size + // + // Both: + // - array sizing is retroactive + // - built-in block redeclarations interact with this + // + // Design: + // - use a per-context "resize-list", a list of symbols whose array sizes + // can be fixed + // + // - the resize-list starts empty at beginning of user-shader compilation, it does + // not have built-ins in it + // + // - on built-in array use: copyUp() symbol and add it to the resize-list + // + // - on user array declaration: add it to the resize-list + // + // - on block redeclaration: copyUp() symbol and add it to the resize-list + // * note, that appropriately gives an error if redeclaring a block that + // was already used and hence already copied-up + // + // - on seeing a layout declaration that sizes the array, fix everything in the + // resize-list, giving errors for mismatch + // + // - on seeing an array size declaration, give errors on mismatch between it and previous + // array-sizing declarations + // + TVector ioArraySymbolResizeList; +#endif +}; + +} // end namespace glslang + +#endif // _PARSER_HELPER_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/RemoveTree.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/RemoveTree.h new file mode 100644 index 0000000..1ed0156 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/RemoveTree.h @@ -0,0 +1,41 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once + +namespace glslang { + +void RemoveAllTreeNodes(TIntermNode*); + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Scan.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Scan.h new file mode 100644 index 0000000..24b75cf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Scan.h @@ -0,0 +1,276 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _GLSLANG_SCAN_INCLUDED_ +#define _GLSLANG_SCAN_INCLUDED_ + +#include "Versions.h" + +namespace glslang { + +// Use a global end-of-input character, so no translation is needed across +// layers of encapsulation. Characters are all 8 bit, and positive, so there is +// no aliasing of character 255 onto -1, for example. +const int EndOfInput = -1; + +// +// A character scanner that seamlessly, on read-only strings, reads across an +// array of strings without assuming null termination. +// +class TInputScanner { +public: + TInputScanner(int n, const char* const s[], size_t L[], const char* const* names = nullptr, + int b = 0, int f = 0, bool single = false) : + numSources(n), + // up to this point, common usage is "char*", but now we need positive 8-bit characters + sources(reinterpret_cast(s)), + lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f), singleLogical(single), + endOfFileReached(false) + { + loc = new TSourceLoc[numSources]; + for (int i = 0; i < numSources; ++i) { + loc[i].init(i - stringBias); + } + if (names != nullptr) { + for (int i = 0; i < numSources; ++i) + loc[i].name = names[i] != nullptr ? NewPoolTString(names[i]) : nullptr; + } + loc[currentSource].line = 1; + logicalSourceLoc.init(1); + logicalSourceLoc.name = loc[0].name; + } + + virtual ~TInputScanner() + { + delete [] loc; + } + + // retrieve the next character and advance one character + int get() + { + int ret = peek(); + if (ret == EndOfInput) + return ret; + ++loc[currentSource].column; + ++logicalSourceLoc.column; + if (ret == '\n') { + ++loc[currentSource].line; + ++logicalSourceLoc.line; + logicalSourceLoc.column = 0; + loc[currentSource].column = 0; + } + advance(); + + return ret; + } + + // retrieve the next character, no advance + int peek() + { + if (currentSource >= numSources) { + endOfFileReached = true; + return EndOfInput; + } + // Make sure we do not read off the end of a string. + // N.B. Sources can have a length of 0. + int sourceToRead = currentSource; + size_t charToRead = currentChar; + while(charToRead >= lengths[sourceToRead]) { + charToRead = 0; + sourceToRead += 1; + if (sourceToRead >= numSources) { + return EndOfInput; + } + } + + // Here, we care about making negative valued characters positive + return sources[sourceToRead][charToRead]; + } + + // go back one character + void unget() + { + // Do not roll back once we've reached the end of the file. + if (endOfFileReached) + return; + + if (currentChar > 0) { + --currentChar; + --loc[currentSource].column; + --logicalSourceLoc.column; + if (loc[currentSource].column < 0) { + // We've moved back past a new line. Find the + // previous newline (or start of the file) to compute + // the column count on the now current line. + size_t chIndex = currentChar; + while (chIndex > 0) { + if (sources[currentSource][chIndex] == '\n') { + break; + } + --chIndex; + } + logicalSourceLoc.column = (int)(currentChar - chIndex); + loc[currentSource].column = (int)(currentChar - chIndex); + } + } else { + do { + --currentSource; + } while (currentSource > 0 && lengths[currentSource] == 0); + if (lengths[currentSource] == 0) { + // set to 0 if we've backed up to the start of an empty string + currentChar = 0; + } else + currentChar = lengths[currentSource] - 1; + } + if (peek() == '\n') { + --loc[currentSource].line; + --logicalSourceLoc.line; + } + } + + // for #line override + void setLine(int newLine) + { + logicalSourceLoc.line = newLine; + loc[getLastValidSourceIndex()].line = newLine; + } + + // for #line override in filename based parsing + void setFile(const char* filename) + { + TString* fn_tstr = NewPoolTString(filename); + logicalSourceLoc.name = fn_tstr; + loc[getLastValidSourceIndex()].name = fn_tstr; + } + + void setFile(const char* filename, int i) + { + TString* fn_tstr = NewPoolTString(filename); + if (i == getLastValidSourceIndex()) { + logicalSourceLoc.name = fn_tstr; + } + loc[i].name = fn_tstr; + } + + void setString(int newString) + { + logicalSourceLoc.string = newString; + loc[getLastValidSourceIndex()].string = newString; + logicalSourceLoc.name = nullptr; + loc[getLastValidSourceIndex()].name = nullptr; + } + + // for #include content indentation + void setColumn(int col) + { + logicalSourceLoc.column = col; + loc[getLastValidSourceIndex()].column = col; + } + + void setEndOfInput() + { + endOfFileReached = true; + currentSource = numSources; + } + + bool atEndOfInput() const { return endOfFileReached; } + + const TSourceLoc& getSourceLoc() const + { + if (singleLogical) { + return logicalSourceLoc; + } else { + return loc[std::max(0, std::min(currentSource, numSources - finale - 1))]; + } + } + // Returns the index (starting from 0) of the most recent valid source string we are reading from. + int getLastValidSourceIndex() const { return std::min(currentSource, numSources - 1); } + + void consumeWhiteSpace(bool& foundNonSpaceTab); + bool consumeComment(); + void consumeWhitespaceComment(bool& foundNonSpaceTab); + bool scanVersion(int& version, EProfile& profile, bool& notFirstToken); + +protected: + + // advance one character + void advance() + { + ++currentChar; + if (currentChar >= lengths[currentSource]) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + while (currentSource < numSources && lengths[currentSource] == 0) { + ++currentSource; + if (currentSource < numSources) { + loc[currentSource].string = loc[currentSource - 1].string + 1; + loc[currentSource].line = 1; + loc[currentSource].column = 0; + } + } + currentChar = 0; + } + } + + int numSources; // number of strings in source + const unsigned char* const *sources; // array of strings; must be converted to positive values on use, to avoid aliasing with -1 as EndOfInput + const size_t *lengths; // length of each string + int currentSource; + size_t currentChar; + + // This is for reporting what string/line an error occurred on, and can be overridden by #line. + // It remembers the last state of each source string as it is left for the next one, so unget() + // can restore that state. + TSourceLoc* loc; // an array + + int stringBias; // the first string that is the user's string number 0 + int finale; // number of internal strings after user's last string + + TSourceLoc logicalSourceLoc; + bool singleLogical; // treats the strings as a single logical string. + // locations will be reported from the first string. + + // Set to true once peek() returns EndOfFile, so that we won't roll back + // once we've reached EndOfFile. + bool endOfFileReached; +}; + +} // end namespace glslang + +#endif // _GLSLANG_SCAN_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/ScanContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/ScanContext.h new file mode 100644 index 0000000..74b2b3c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/ScanContext.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// +// This holds context specific to the GLSL scanner, which +// sits between the preprocessor scanner and parser. +// + +#pragma once + +#include "ParseHelper.h" + +namespace glslang { + +class TPpContext; +class TPpToken; +class TParserToken; + +class TScanContext { +public: + explicit TScanContext(TParseContextBase& pc) : + parseContext(pc), + afterType(false), afterStruct(false), + field(false), afterBuffer(false) { } + virtual ~TScanContext() { } + + static void fillInKeywordMap(); + static void deleteKeywordMap(); + + int tokenize(TPpContext*, TParserToken&); + +protected: + TScanContext(TScanContext&); + TScanContext& operator=(TScanContext&); + + int tokenizeIdentifier(); + int identifierOrType(); + int reservedWord(); + int identifierOrReserved(bool reserved); + int es30ReservedFromGLSL(int version); + int nonreservedKeyword(int esVersion, int nonEsVersion); + int precisionKeyword(); + int matNxM(); + int dMat(); + int firstGenerationImage(bool inEs310); + int secondGenerationImage(); + + TParseContextBase& parseContext; + bool afterType; // true if we've recognized a type, so can only be looking for an identifier + bool afterStruct; // true if we've recognized the STRUCT keyword, so can only be looking for an identifier + bool field; // true if we're on a field, right after a '.' + bool afterBuffer; // true if we've recognized the BUFFER keyword + TSourceLoc loc; + TParserToken* parserToken; + TPpToken* ppToken; + + const char* tokenText; + int keyword; +}; + +} // end namespace glslang diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/SymbolTable.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/SymbolTable.h new file mode 100644 index 0000000..0d45e48 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/SymbolTable.h @@ -0,0 +1,956 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _SYMBOL_TABLE_INCLUDED_ +#define _SYMBOL_TABLE_INCLUDED_ + +// +// Symbol table for parsing. Has these design characteristics: +// +// * Same symbol table can be used to compile many shaders, to preserve +// effort of creating and loading with the large numbers of built-in +// symbols. +// +// --> This requires a copy mechanism, so initial pools used to create +// the shared information can be popped. Done through "clone" +// methods. +// +// * Name mangling will be used to give each function a unique name +// so that symbol table lookups are never ambiguous. This allows +// a simpler symbol table structure. +// +// * Pushing and popping of scope, so symbol table will really be a stack +// of symbol tables. Searched from the top, with new inserts going into +// the top. +// +// * Constants: Compile time constant symbols will keep their values +// in the symbol table. The parser can substitute constants at parse +// time, including doing constant folding and constant propagation. +// +// * No temporaries: Temporaries made from operations (+, --, .xy, etc.) +// are tracked in the intermediate representation, not the symbol table. +// + +#include "../Include/Common.h" +#include "../Include/intermediate.h" +#include "../Include/InfoSink.h" + +namespace glslang { + +// +// Symbol base class. (Can build functions or variables out of these...) +// + +class TVariable; +class TFunction; +class TAnonMember; + +typedef TVector TExtensionList; + +class TSymbol { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + explicit TSymbol(const TString *n) : name(n), uniqueId(0), extensions(0), writable(true) { } + virtual TSymbol* clone() const = 0; + virtual ~TSymbol() { } // rely on all symbol owned memory coming from the pool + + virtual const TString& getName() const { return *name; } + virtual void changeName(const TString* newName) { name = newName; } + virtual void addPrefix(const char* prefix) + { + TString newName(prefix); + newName.append(*name); + changeName(NewPoolTString(newName.c_str())); + } + virtual const TString& getMangledName() const { return getName(); } + virtual TFunction* getAsFunction() { return 0; } + virtual const TFunction* getAsFunction() const { return 0; } + virtual TVariable* getAsVariable() { return 0; } + virtual const TVariable* getAsVariable() const { return 0; } + virtual const TAnonMember* getAsAnonMember() const { return 0; } + virtual const TType& getType() const = 0; + virtual TType& getWritableType() = 0; + virtual void setUniqueId(long long id) { uniqueId = id; } + virtual long long getUniqueId() const { return uniqueId; } + virtual void setExtensions(int numExts, const char* const exts[]) + { + assert(extensions == 0); + assert(numExts > 0); + extensions = NewPoolObject(extensions); + for (int e = 0; e < numExts; ++e) + extensions->push_back(exts[e]); + } + virtual int getNumExtensions() const { return extensions == nullptr ? 0 : (int)extensions->size(); } + virtual const char** getExtensions() const { return extensions->data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const = 0; + void dumpExtensions(TInfoSink& infoSink) const; +#endif + + virtual bool isReadOnly() const { return ! writable; } + virtual void makeReadOnly() { writable = false; } + +protected: + explicit TSymbol(const TSymbol&); + TSymbol& operator=(const TSymbol&); + + const TString *name; + unsigned long long uniqueId; // For cross-scope comparing during code generation + + // For tracking what extensions must be present + // (don't use if correct version/profile is present). + TExtensionList* extensions; // an array of pointers to existing constant char strings + + // + // N.B.: Non-const functions that will be generally used should assert on this, + // to avoid overwriting shared symbol-table information. + // + bool writable; +}; + +// +// Variable class, meaning a symbol that's not a function. +// +// There could be a separate class hierarchy for Constant variables; +// Only one of int, bool, or float, (or none) is correct for +// any particular use, but it's easy to do this way, and doesn't +// seem worth having separate classes, and "getConst" can't simply return +// different values for different types polymorphically, so this is +// just simple and pragmatic. +// +class TVariable : public TSymbol { +public: + TVariable(const TString *name, const TType& t, bool uT = false ) + : TSymbol(name), + userType(uT), + constSubtree(nullptr), + memberExtensions(nullptr), + anonId(-1) + { type.shallowCopy(t); } + virtual TVariable* clone() const; + virtual ~TVariable() { } + + virtual TVariable* getAsVariable() { return this; } + virtual const TVariable* getAsVariable() const { return this; } + virtual const TType& getType() const { return type; } + virtual TType& getWritableType() { assert(writable); return type; } + virtual bool isUserType() const { return userType; } + virtual const TConstUnionArray& getConstArray() const { return constArray; } + virtual TConstUnionArray& getWritableConstArray() { assert(writable); return constArray; } + virtual void setConstArray(const TConstUnionArray& array) { constArray = array; } + virtual void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } + virtual TIntermTyped* getConstSubtree() const { return constSubtree; } + virtual void setAnonId(int i) { anonId = i; } + virtual int getAnonId() const { return anonId; } + + virtual void setMemberExtensions(int member, int numExts, const char* const exts[]) + { + assert(type.isStruct()); + assert(numExts > 0); + if (memberExtensions == nullptr) { + memberExtensions = NewPoolObject(memberExtensions); + memberExtensions->resize(type.getStruct()->size()); + } + for (int e = 0; e < numExts; ++e) + (*memberExtensions)[member].push_back(exts[e]); + } + virtual bool hasMemberExtensions() const { return memberExtensions != nullptr; } + virtual int getNumMemberExtensions(int member) const + { + return memberExtensions == nullptr ? 0 : (int)(*memberExtensions)[member].size(); + } + virtual const char** getMemberExtensions(int member) const { return (*memberExtensions)[member].data(); } + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + +protected: + explicit TVariable(const TVariable&); + TVariable& operator=(const TVariable&); + + TType type; + bool userType; + + // we are assuming that Pool Allocator will free the memory allocated to unionArray + // when this object is destroyed + + TConstUnionArray constArray; // for compile-time constant value + TIntermTyped* constSubtree; // for specialization constant computation + TVector* memberExtensions; // per-member extension list, allocated only when needed + int anonId; // the ID used for anonymous blocks: TODO: see if uniqueId could serve a dual purpose +}; + +// +// The function sub-class of symbols and the parser will need to +// share this definition of a function parameter. +// +struct TParameter { + TString *name; + TType* type; + TIntermTyped* defaultValue; + TParameter& copyParam(const TParameter& param) + { + if (param.name) + name = NewPoolTString(param.name->c_str()); + else + name = 0; + type = param.type->clone(); + defaultValue = param.defaultValue; + return *this; + } + TBuiltInVariable getDeclaredBuiltIn() const { return type->getQualifier().declaredBuiltIn; } +}; + +// +// The function sub-class of a symbol. +// +class TFunction : public TSymbol { +public: + explicit TFunction(TOperator o) : + TSymbol(0), + op(o), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) { } + TFunction(const TString *name, const TType& retType, TOperator tOp = EOpNull) : + TSymbol(name), + mangledName(*name + '('), + op(tOp), + defined(false), prototyped(false), implicitThis(false), illegalImplicitThis(false), defaultParamCount(0) + { + returnType.shallowCopy(retType); + declaredBuiltIn = retType.getQualifier().builtIn; + } + virtual TFunction* clone() const override; + virtual ~TFunction(); + + virtual TFunction* getAsFunction() override { return this; } + virtual const TFunction* getAsFunction() const override { return this; } + + // Install 'p' as the (non-'this') last parameter. + // Non-'this' parameters are reflected in both the list of parameters and the + // mangled name. + virtual void addParameter(TParameter& p) + { + assert(writable); + parameters.push_back(p); + p.type->appendMangledName(mangledName); + + if (p.defaultValue != nullptr) + defaultParamCount++; + } + + // Install 'this' as the first parameter. + // 'this' is reflected in the list of parameters, but not the mangled name. + virtual void addThisParameter(TType& type, const char* name) + { + TParameter p = { NewPoolTString(name), new TType, nullptr }; + p.type->shallowCopy(type); + parameters.insert(parameters.begin(), p); + } + + virtual void addPrefix(const char* prefix) override + { + TSymbol::addPrefix(prefix); + mangledName.insert(0, prefix); + } + + virtual void removePrefix(const TString& prefix) + { + assert(mangledName.compare(0, prefix.size(), prefix) == 0); + mangledName.erase(0, prefix.size()); + } + + virtual const TString& getMangledName() const override { return mangledName; } + virtual const TType& getType() const override { return returnType; } + virtual TBuiltInVariable getDeclaredBuiltInType() const { return declaredBuiltIn; } + virtual TType& getWritableType() override { return returnType; } + virtual void relateToOperator(TOperator o) { assert(writable); op = o; } + virtual TOperator getBuiltInOp() const { return op; } + virtual void setDefined() { assert(writable); defined = true; } + virtual bool isDefined() const { return defined; } + virtual void setPrototyped() { assert(writable); prototyped = true; } + virtual bool isPrototyped() const { return prototyped; } + virtual void setImplicitThis() { assert(writable); implicitThis = true; } + virtual bool hasImplicitThis() const { return implicitThis; } + virtual void setIllegalImplicitThis() { assert(writable); illegalImplicitThis = true; } + virtual bool hasIllegalImplicitThis() const { return illegalImplicitThis; } + + // Return total number of parameters + virtual int getParamCount() const { return static_cast(parameters.size()); } + // Return number of parameters with default values. + virtual int getDefaultParamCount() const { return defaultParamCount; } + // Return number of fixed parameters (without default values) + virtual int getFixedParamCount() const { return getParamCount() - getDefaultParamCount(); } + + virtual TParameter& operator[](int i) { assert(writable); return parameters[i]; } + virtual const TParameter& operator[](int i) const { return parameters[i]; } + +#ifndef GLSLANG_WEB + virtual void setSpirvInstruction(const TSpirvInstruction& inst) + { + relateToOperator(EOpSpirvInst); + spirvInst = inst; + } + virtual const TSpirvInstruction& getSpirvInstruction() const { return spirvInst; } +#endif + +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TFunction(const TFunction&); + TFunction& operator=(const TFunction&); + + typedef TVector TParamList; + TParamList parameters; + TType returnType; + TBuiltInVariable declaredBuiltIn; + + TString mangledName; + TOperator op; + bool defined; + bool prototyped; + bool implicitThis; // True if this function is allowed to see all members of 'this' + bool illegalImplicitThis; // True if this function is not supposed to have access to dynamic members of 'this', + // even if it finds member variables in the symbol table. + // This is important for a static member function that has member variables in scope, + // but is not allowed to use them, or see hidden symbols instead. + int defaultParamCount; + +#ifndef GLSLANG_WEB + TSpirvInstruction spirvInst; // SPIR-V instruction qualifiers +#endif +}; + +// +// Members of anonymous blocks are a kind of TSymbol. They are not hidden in +// the symbol table behind a container; rather they are visible and point to +// their anonymous container. (The anonymous container is found through the +// member, not the other way around.) +// +class TAnonMember : public TSymbol { +public: + TAnonMember(const TString* n, unsigned int m, TVariable& a, int an) : TSymbol(n), anonContainer(a), memberNumber(m), anonId(an) { } + virtual TAnonMember* clone() const override; + virtual ~TAnonMember() { } + + virtual const TAnonMember* getAsAnonMember() const override { return this; } + virtual const TVariable& getAnonContainer() const { return anonContainer; } + virtual unsigned int getMemberNumber() const { return memberNumber; } + + virtual const TType& getType() const override + { + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual TType& getWritableType() override + { + assert(writable); + const TTypeList& types = *anonContainer.getType().getStruct(); + return *types[memberNumber].type; + } + + virtual void setExtensions(int numExts, const char* const exts[]) override + { + anonContainer.setMemberExtensions(memberNumber, numExts, exts); + } + virtual int getNumExtensions() const override { return anonContainer.getNumMemberExtensions(memberNumber); } + virtual const char** getExtensions() const override { return anonContainer.getMemberExtensions(memberNumber); } + + virtual int getAnonId() const { return anonId; } +#if !defined(GLSLANG_WEB) + virtual void dump(TInfoSink& infoSink, bool complete = false) const override; +#endif + +protected: + explicit TAnonMember(const TAnonMember&); + TAnonMember& operator=(const TAnonMember&); + + TVariable& anonContainer; + unsigned int memberNumber; + int anonId; +}; + +class TSymbolTableLevel { +public: + POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) + TSymbolTableLevel() : defaultPrecision(0), anonId(0), thisLevel(false) { } + ~TSymbolTableLevel(); + + bool insert(const TString& name, TSymbol* symbol) { + return level.insert(tLevelPair(name, symbol)).second; + } + + bool insert(TSymbol& symbol, bool separateNameSpaces, const TString& forcedKeyName = TString()) + { + // + // returning true means symbol was added to the table with no semantic errors + // + const TString& name = symbol.getName(); + if (forcedKeyName.length()) { + return level.insert(tLevelPair(forcedKeyName, &symbol)).second; + } + else if (name == "") { + symbol.getAsVariable()->setAnonId(anonId++); + // An empty name means an anonymous container, exposing its members to the external scope. + // Give it a name and insert its members in the symbol table, pointing to the container. + char buf[20]; + snprintf(buf, 20, "%s%d", AnonymousPrefix, symbol.getAsVariable()->getAnonId()); + symbol.changeName(NewPoolTString(buf)); + + return insertAnonymousMembers(symbol, 0); + } else { + // Check for redefinition errors: + // - STL itself will tell us if there is a direct name collision, with name mangling, at this level + // - additionally, check for function-redefining-variable name collisions + const TString& insertName = symbol.getMangledName(); + if (symbol.getAsFunction()) { + // make sure there isn't a variable of this name + if (! separateNameSpaces && level.find(name) != level.end()) + return false; + + // insert, and whatever happens is okay + level.insert(tLevelPair(insertName, &symbol)); + + return true; + } else + return level.insert(tLevelPair(insertName, &symbol)).second; + } + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + // Only supporting amend of anonymous blocks so far. + if (IsAnonymous(symbol.getName())) + return insertAnonymousMembers(symbol, firstNewMember); + else + return false; + } + + bool insertAnonymousMembers(TSymbol& symbol, int firstMember) + { + const TTypeList& types = *symbol.getAsVariable()->getType().getStruct(); + for (unsigned int m = firstMember; m < types.size(); ++m) { + TAnonMember* member = new TAnonMember(&types[m].type->getFieldName(), m, *symbol.getAsVariable(), symbol.getAsVariable()->getAnonId()); + if (! level.insert(tLevelPair(member->getMangledName(), member)).second) + return false; + } + + return true; + } + + void retargetSymbol(const TString& from, const TString& to) { + tLevel::const_iterator fromIt = level.find(from); + tLevel::const_iterator toIt = level.find(to); + if (fromIt == level.end() || toIt == level.end()) + return; + delete fromIt->second; + level[from] = toIt->second; + retargetedSymbols.push_back({from, to}); + } + + TSymbol* find(const TString& name) const + { + tLevel::const_iterator it = level.find(name); + if (it == level.end()) + return 0; + else + return (*it).second; + } + + void findFunctionNameList(const TString& name, TVector& list) + { + size_t parenAt = name.find_first_of('('); + TString base(name, 0, parenAt + 1); + + tLevel::const_iterator begin = level.lower_bound(base); + base[parenAt] = ')'; // assume ')' is lexically after '(' + tLevel::const_iterator end = level.upper_bound(base); + for (tLevel::const_iterator it = begin; it != end; ++it) + list.push_back(it->second->getAsFunction()); + } + + // See if there is already a function in the table having the given non-function-style name. + bool hasFunctionName(const TString& name) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt != candidateName.npos && candidateName.compare(0, parenAt, name) == 0) + + return true; + } + + return false; + } + + // See if there is a variable at this level having the given non-function-style name. + // Return true if name is found, and set variable to true if the name was a variable. + bool findFunctionVariableName(const TString& name, bool& variable) const + { + tLevel::const_iterator candidate = level.lower_bound(name); + if (candidate != level.end()) { + const TString& candidateName = (*candidate).first; + TString::size_type parenAt = candidateName.find_first_of('('); + if (parenAt == candidateName.npos) { + // not a mangled name + if (candidateName == name) { + // found a variable name match + variable = true; + return true; + } + } else { + // a mangled name + if (candidateName.compare(0, parenAt, name) == 0) { + // found a function name match + variable = false; + return true; + } + } + } + + return false; + } + + // Use this to do a lazy 'push' of precision defaults the first time + // a precision statement is seen in a new scope. Leave it at 0 for + // when no push was needed. Thus, it is not the current defaults, + // it is what to restore the defaults to when popping a level. + void setPreviousDefaultPrecisions(const TPrecisionQualifier *p) + { + // can call multiple times at one scope, will only latch on first call, + // as we're tracking the previous scope's values, not the current values + if (defaultPrecision != 0) + return; + + defaultPrecision = new TPrecisionQualifier[EbtNumTypes]; + for (int t = 0; t < EbtNumTypes; ++t) + defaultPrecision[t] = p[t]; + } + + void getPreviousDefaultPrecisions(TPrecisionQualifier *p) + { + // can be called for table level pops that didn't set the + // defaults + if (defaultPrecision == 0 || p == 0) + return; + + for (int t = 0; t < EbtNumTypes; ++t) + p[t] = defaultPrecision[t]; + } + + void relateToOperator(const char* name, TOperator op); + void setFunctionExtensions(const char* name, int num, const char* const extensions[]); +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + TSymbolTableLevel* clone() const; + void readOnly(); + + void setThisLevel() { thisLevel = true; } + bool isThisLevel() const { return thisLevel; } + +protected: + explicit TSymbolTableLevel(TSymbolTableLevel&); + TSymbolTableLevel& operator=(TSymbolTableLevel&); + + typedef std::map, pool_allocator > > tLevel; + typedef const tLevel::value_type tLevelPair; + typedef std::pair tInsertResult; + + tLevel level; // named mappings + TPrecisionQualifier *defaultPrecision; + // pair + TVector> retargetedSymbols; + int anonId; + bool thisLevel; // True if this level of the symbol table is a structure scope containing member function + // that are supposed to see anonymous access to member variables. +}; + +class TSymbolTable { +public: + TSymbolTable() : uniqueId(0), noBuiltInRedeclarations(false), separateNameSpaces(false), adoptedLevels(0) + { + // + // This symbol table cannot be used until push() is called. + // + } + ~TSymbolTable() + { + // this can be called explicitly; safest to code it so it can be called multiple times + + // don't deallocate levels passed in from elsewhere + while (table.size() > adoptedLevels) + pop(0); + } + + void adoptLevels(TSymbolTable& symTable) + { + for (unsigned int level = 0; level < symTable.table.size(); ++level) { + table.push_back(symTable.table[level]); + ++adoptedLevels; + } + uniqueId = symTable.uniqueId; + noBuiltInRedeclarations = symTable.noBuiltInRedeclarations; + separateNameSpaces = symTable.separateNameSpaces; + } + + // + // While level adopting is generic, the methods below enact a the following + // convention for levels: + // 0: common built-ins shared across all stages, all compiles, only one copy for all symbol tables + // 1: per-stage built-ins, shared across all compiles, but a different copy per stage + // 2: built-ins specific to a compile, like resources that are context-dependent, or redeclared built-ins + // 3: user-shader globals + // +protected: + static const uint32_t LevelFlagBitOffset = 56; + static const int globalLevel = 3; + static bool isSharedLevel(int level) { return level <= 1; } // exclude all per-compile levels + static bool isBuiltInLevel(int level) { return level <= 2; } // exclude user globals + static bool isGlobalLevel(int level) { return level <= globalLevel; } // include user globals +public: + bool isEmpty() { return table.size() == 0; } + bool atBuiltInLevel() { return isBuiltInLevel(currentLevel()); } + bool atGlobalLevel() { return isGlobalLevel(currentLevel()); } + static bool isBuiltInSymbol(long long uniqueId) { + int level = static_cast(uniqueId >> LevelFlagBitOffset); + return isBuiltInLevel(level); + } + static constexpr uint64_t uniqueIdMask = (1LL << LevelFlagBitOffset) - 1; + static const uint32_t MaxLevelInUniqueID = 127; + void setNoBuiltInRedeclarations() { noBuiltInRedeclarations = true; } + void setSeparateNameSpaces() { separateNameSpaces = true; } + + void push() + { + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + } + + // Make a new symbol-table level to represent the scope introduced by a structure + // containing member functions, such that the member functions can find anonymous + // references to member variables. + // + // 'thisSymbol' should have a name of "" to trigger anonymous structure-member + // symbol finds. + void pushThis(TSymbol& thisSymbol) + { + assert(thisSymbol.getName().size() == 0); + table.push_back(new TSymbolTableLevel); + updateUniqueIdLevelFlag(); + table.back()->setThisLevel(); + insert(thisSymbol); + } + + void pop(TPrecisionQualifier *p) + { + table[currentLevel()]->getPreviousDefaultPrecisions(p); + delete table.back(); + table.pop_back(); + updateUniqueIdLevelFlag(); + } + + // + // Insert a visible symbol into the symbol table so it can + // be found later by name. + // + // Returns false if the was a name collision. + // + bool insert(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + + // make sure there isn't a function of this variable name + if (! separateNameSpaces && ! symbol.getAsFunction() && table[currentLevel()]->hasFunctionName(symbol.getName())) + return false; + + // check for not overloading or redefining a built-in function + if (noBuiltInRedeclarations) { + if (atGlobalLevel() && currentLevel() > 0) { + if (table[0]->hasFunctionName(symbol.getName())) + return false; + if (currentLevel() > 1 && table[1]->hasFunctionName(symbol.getName())) + return false; + } + } + + return table[currentLevel()]->insert(symbol, separateNameSpaces); + } + + // Add more members to an already inserted aggregate object + bool amend(TSymbol& symbol, int firstNewMember) + { + // See insert() for comments on basic explanation of insert. + // This operates similarly, but more simply. + return table[currentLevel()]->amend(symbol, firstNewMember); + } + + // Update the level info in symbol's unique ID to current level + void amendSymbolIdLevel(TSymbol& symbol) + { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uint64_t symbolId = symbol.getUniqueId(); + symbolId &= uniqueIdMask; + symbolId |= (level << LevelFlagBitOffset); + symbol.setUniqueId(symbolId); + } + // + // To allocate an internal temporary, which will need to be uniquely + // identified by the consumer of the AST, but never need to + // found by doing a symbol table search by name, hence allowed an + // arbitrary name in the symbol with no worry of collision. + // + void makeInternalVariable(TSymbol& symbol) + { + symbol.setUniqueId(++uniqueId); + } + + // + // Copy a variable or anonymous member's structure from a shared level so that + // it can be added (soon after return) to the symbol table where it can be + // modified without impacting other users of the shared table. + // + TSymbol* copyUpDeferredInsert(TSymbol* shared) + { + if (shared->getAsVariable()) { + TSymbol* copy = shared->clone(); + copy->setUniqueId(shared->getUniqueId()); + return copy; + } else { + const TAnonMember* anon = shared->getAsAnonMember(); + assert(anon); + TVariable* container = anon->getAnonContainer().clone(); + container->changeName(NewPoolTString("")); + container->setUniqueId(anon->getAnonContainer().getUniqueId()); + return container; + } + } + + TSymbol* copyUp(TSymbol* shared) + { + TSymbol* copy = copyUpDeferredInsert(shared); + table[globalLevel]->insert(*copy, separateNameSpaces); + if (shared->getAsVariable()) + return copy; + else { + // return the copy of the anonymous member + return table[globalLevel]->find(shared->getName()); + } + } + + // Normal find of a symbol, that can optionally say whether the symbol was found + // at a built-in level or the current top-scope level. + TSymbol* find(const TString& name, bool* builtIn = 0, bool* currentScope = 0, int* thisDepthP = 0) + { + int level = currentLevel(); + TSymbol* symbol; + int thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == nullptr && level >= 0); + level++; + if (builtIn) + *builtIn = isBuiltInLevel(level); + if (currentScope) + *currentScope = isGlobalLevel(currentLevel()) || level == currentLevel(); // consider shared levels as "current scope" WRT user globals + if (thisDepthP != nullptr) { + if (! table[level]->isThisLevel()) + thisDepth = 0; + *thisDepthP = thisDepth; + } + + return symbol; + } + + void retargetSymbol(const TString& from, const TString& to) { + int level = currentLevel(); + table[level]->retargetSymbol(from, to); + } + + + // Find of a symbol that returns how many layers deep of nested + // structures-with-member-functions ('this' scopes) deep the symbol was + // found in. + TSymbol* find(const TString& name, int& thisDepth) + { + int level = currentLevel(); + TSymbol* symbol; + thisDepth = 0; + do { + if (table[level]->isThisLevel()) + ++thisDepth; + symbol = table[level]->find(name); + --level; + } while (symbol == 0 && level >= 0); + + if (! table[level + 1]->isThisLevel()) + thisDepth = 0; + + return symbol; + } + + bool isFunctionNameVariable(const TString& name) const + { + if (separateNameSpaces) + return false; + + int level = currentLevel(); + do { + bool variable; + bool found = table[level]->findFunctionVariableName(name, variable); + if (found) + return variable; + --level; + } while (level >= 0); + + return false; + } + + void findFunctionNameList(const TString& name, TVector& list, bool& builtIn) + { + // For user levels, return the set found in the first scope with a match + builtIn = false; + int level = currentLevel(); + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (list.empty() && level >= globalLevel); + + if (! list.empty()) + return; + + // Gather across all built-in levels; they don't hide each other + builtIn = true; + do { + table[level]->findFunctionNameList(name, list); + --level; + } while (level >= 0); + } + + void relateToOperator(const char* name, TOperator op) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->relateToOperator(name, op); + } + + void setFunctionExtensions(const char* name, int num, const char* const extensions[]) + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->setFunctionExtensions(name, num, extensions); + } + + void setVariableExtensions(const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(name)); + if (symbol == nullptr) + return; + + symbol->setExtensions(numExts, extensions); + } + + void setVariableExtensions(const char* blockName, const char* name, int numExts, const char* const extensions[]) + { + TSymbol* symbol = find(TString(blockName)); + if (symbol == nullptr) + return; + TVariable* variable = symbol->getAsVariable(); + assert(variable != nullptr); + + const TTypeList& structure = *variable->getAsVariable()->getType().getStruct(); + for (int member = 0; member < (int)structure.size(); ++member) { + if (structure[member].type->getFieldName().compare(name) == 0) { + variable->setMemberExtensions(member, numExts, extensions); + return; + } + } + } + + long long getMaxSymbolId() { return uniqueId; } +#if !defined(GLSLANG_WEB) + void dump(TInfoSink& infoSink, bool complete = false) const; +#endif + void copyTable(const TSymbolTable& copyOf); + + void setPreviousDefaultPrecisions(TPrecisionQualifier *p) { table[currentLevel()]->setPreviousDefaultPrecisions(p); } + + void readOnly() + { + for (unsigned int level = 0; level < table.size(); ++level) + table[level]->readOnly(); + } + + // Add current level in the high-bits of unique id + void updateUniqueIdLevelFlag() { + // clamp level to avoid overflow + uint64_t level = (uint32_t)currentLevel() > MaxLevelInUniqueID ? MaxLevelInUniqueID : currentLevel(); + uniqueId &= uniqueIdMask; + uniqueId |= (level << LevelFlagBitOffset); + } + + void overwriteUniqueId(long long id) + { + uniqueId = id; + updateUniqueIdLevelFlag(); + } + +protected: + TSymbolTable(TSymbolTable&); + TSymbolTable& operator=(TSymbolTableLevel&); + + int currentLevel() const { return static_cast(table.size()) - 1; } + std::vector table; + long long uniqueId; // for unique identification in code generation + bool noBuiltInRedeclarations; + bool separateNameSpaces; + unsigned int adoptedLevels; +}; + +} // end namespace glslang + +#endif // _SYMBOL_TABLE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Versions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Versions.h new file mode 100644 index 0000000..f06abdd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/Versions.h @@ -0,0 +1,359 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2012-2013 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _VERSIONS_INCLUDED_ +#define _VERSIONS_INCLUDED_ + +#define LAST_ELEMENT_MARKER(x) x + +// +// Help manage multiple profiles, versions, extensions etc. +// + +// +// Profiles are set up for masking operations, so queries can be done on multiple +// profiles at the same time. +// +// Don't maintain an ordinal set of enums (0,1,2,3...) to avoid all possible +// defects from mixing the two different forms. +// +typedef enum : unsigned { + EBadProfile = 0, + ENoProfile = (1 << 0), // only for desktop, before profiles showed up + ECoreProfile = (1 << 1), + ECompatibilityProfile = (1 << 2), + EEsProfile = (1 << 3), + LAST_ELEMENT_MARKER(EProfileCount), +} EProfile; + +namespace glslang { + +// +// Map from profile enum to externally readable text name. +// +inline const char* ProfileName(EProfile profile) +{ + switch (profile) { + case ENoProfile: return "none"; + case ECoreProfile: return "core"; + case ECompatibilityProfile: return "compatibility"; + case EEsProfile: return "es"; + default: return "unknown profile"; + } +} + +// +// What source rules, validation rules, target language, etc. are needed or +// desired for SPIR-V? +// +// 0 means a target or rule set is not enabled (ignore rules from that entity). +// Non-0 means to apply semantic rules arising from that version of its rule set. +// The union of all requested rule sets will be applied. +// +struct SpvVersion { + SpvVersion() : spv(0), vulkanGlsl(0), vulkan(0), openGl(0), vulkanRelaxed(false) {} + unsigned int spv; // the version of SPIR-V to target, as defined by "word 1" of the SPIR-V binary header + int vulkanGlsl; // the version of GLSL semantics for Vulkan, from GL_KHR_vulkan_glsl, for "#define VULKAN XXX" + int vulkan; // the version of Vulkan, for which SPIR-V execution environment rules to use + int openGl; // the version of GLSL semantics for OpenGL, from GL_ARB_gl_spirv, for "#define GL_SPIRV XXX" + bool vulkanRelaxed; // relax changes to GLSL for Vulkan, allowing some GL-specific to be compiled to Vulkan SPIR-V target +}; + +// +// The behaviors from the GLSL "#extension extension_name : behavior" +// +typedef enum { + EBhMissing = 0, + EBhRequire, + EBhEnable, + EBhWarn, + EBhDisable, + EBhDisablePartial // use as initial state of an extension that is only partially implemented +} TExtensionBehavior; + +// +// Symbolic names for extensions. Strings may be directly used when calling the +// functions, but better to have the compiler do spelling checks. +// +const char* const E_GL_OES_texture_3D = "GL_OES_texture_3D"; +const char* const E_GL_OES_standard_derivatives = "GL_OES_standard_derivatives"; +const char* const E_GL_EXT_frag_depth = "GL_EXT_frag_depth"; +const char* const E_GL_OES_EGL_image_external = "GL_OES_EGL_image_external"; +const char* const E_GL_OES_EGL_image_external_essl3 = "GL_OES_EGL_image_external_essl3"; +const char* const E_GL_EXT_YUV_target = "GL_EXT_YUV_target"; +const char* const E_GL_EXT_shader_texture_lod = "GL_EXT_shader_texture_lod"; +const char* const E_GL_EXT_shadow_samplers = "GL_EXT_shadow_samplers"; + +const char* const E_GL_ARB_texture_rectangle = "GL_ARB_texture_rectangle"; +const char* const E_GL_3DL_array_objects = "GL_3DL_array_objects"; +const char* const E_GL_ARB_shading_language_420pack = "GL_ARB_shading_language_420pack"; +const char* const E_GL_ARB_texture_gather = "GL_ARB_texture_gather"; +const char* const E_GL_ARB_gpu_shader5 = "GL_ARB_gpu_shader5"; +const char* const E_GL_ARB_separate_shader_objects = "GL_ARB_separate_shader_objects"; +const char* const E_GL_ARB_compute_shader = "GL_ARB_compute_shader"; +const char* const E_GL_ARB_tessellation_shader = "GL_ARB_tessellation_shader"; +const char* const E_GL_ARB_enhanced_layouts = "GL_ARB_enhanced_layouts"; +const char* const E_GL_ARB_texture_cube_map_array = "GL_ARB_texture_cube_map_array"; +const char* const E_GL_ARB_texture_multisample = "GL_ARB_texture_multisample"; +const char* const E_GL_ARB_shader_texture_lod = "GL_ARB_shader_texture_lod"; +const char* const E_GL_ARB_explicit_attrib_location = "GL_ARB_explicit_attrib_location"; +const char* const E_GL_ARB_explicit_uniform_location = "GL_ARB_explicit_uniform_location"; +const char* const E_GL_ARB_shader_image_load_store = "GL_ARB_shader_image_load_store"; +const char* const E_GL_ARB_shader_atomic_counters = "GL_ARB_shader_atomic_counters"; +const char* const E_GL_ARB_shader_atomic_counter_ops = "GL_ARB_shader_atomic_counter_ops"; +const char* const E_GL_ARB_shader_draw_parameters = "GL_ARB_shader_draw_parameters"; +const char* const E_GL_ARB_shader_group_vote = "GL_ARB_shader_group_vote"; +const char* const E_GL_ARB_derivative_control = "GL_ARB_derivative_control"; +const char* const E_GL_ARB_shader_texture_image_samples = "GL_ARB_shader_texture_image_samples"; +const char* const E_GL_ARB_viewport_array = "GL_ARB_viewport_array"; +const char* const E_GL_ARB_gpu_shader_int64 = "GL_ARB_gpu_shader_int64"; +const char* const E_GL_ARB_gpu_shader_fp64 = "GL_ARB_gpu_shader_fp64"; +const char* const E_GL_ARB_shader_ballot = "GL_ARB_shader_ballot"; +const char* const E_GL_ARB_sparse_texture2 = "GL_ARB_sparse_texture2"; +const char* const E_GL_ARB_sparse_texture_clamp = "GL_ARB_sparse_texture_clamp"; +const char* const E_GL_ARB_shader_stencil_export = "GL_ARB_shader_stencil_export"; +// const char* const E_GL_ARB_cull_distance = "GL_ARB_cull_distance"; // present for 4.5, but need extension control over block members +const char* const E_GL_ARB_post_depth_coverage = "GL_ARB_post_depth_coverage"; +const char* const E_GL_ARB_shader_viewport_layer_array = "GL_ARB_shader_viewport_layer_array"; +const char* const E_GL_ARB_fragment_shader_interlock = "GL_ARB_fragment_shader_interlock"; +const char* const E_GL_ARB_shader_clock = "GL_ARB_shader_clock"; +const char* const E_GL_ARB_uniform_buffer_object = "GL_ARB_uniform_buffer_object"; +const char* const E_GL_ARB_sample_shading = "GL_ARB_sample_shading"; +const char* const E_GL_ARB_shader_bit_encoding = "GL_ARB_shader_bit_encoding"; +const char* const E_GL_ARB_shader_image_size = "GL_ARB_shader_image_size"; +const char* const E_GL_ARB_shader_storage_buffer_object = "GL_ARB_shader_storage_buffer_object"; +const char* const E_GL_ARB_shading_language_packing = "GL_ARB_shading_language_packing"; +const char* const E_GL_ARB_texture_query_lod = "GL_ARB_texture_query_lod"; +const char* const E_GL_ARB_vertex_attrib_64bit = "GL_ARB_vertex_attrib_64bit"; +const char* const E_GL_ARB_draw_instanced = "GL_ARB_draw_instanced"; +const char* const E_GL_ARB_fragment_coord_conventions = "GL_ARB_fragment_coord_conventions"; + +const char* const E_GL_KHR_shader_subgroup_basic = "GL_KHR_shader_subgroup_basic"; +const char* const E_GL_KHR_shader_subgroup_vote = "GL_KHR_shader_subgroup_vote"; +const char* const E_GL_KHR_shader_subgroup_arithmetic = "GL_KHR_shader_subgroup_arithmetic"; +const char* const E_GL_KHR_shader_subgroup_ballot = "GL_KHR_shader_subgroup_ballot"; +const char* const E_GL_KHR_shader_subgroup_shuffle = "GL_KHR_shader_subgroup_shuffle"; +const char* const E_GL_KHR_shader_subgroup_shuffle_relative = "GL_KHR_shader_subgroup_shuffle_relative"; +const char* const E_GL_KHR_shader_subgroup_clustered = "GL_KHR_shader_subgroup_clustered"; +const char* const E_GL_KHR_shader_subgroup_quad = "GL_KHR_shader_subgroup_quad"; +const char* const E_GL_KHR_memory_scope_semantics = "GL_KHR_memory_scope_semantics"; + +const char* const E_GL_EXT_shader_atomic_int64 = "GL_EXT_shader_atomic_int64"; + +const char* const E_GL_EXT_shader_non_constant_global_initializers = "GL_EXT_shader_non_constant_global_initializers"; +const char* const E_GL_EXT_shader_image_load_formatted = "GL_EXT_shader_image_load_formatted"; + +const char* const E_GL_EXT_shader_16bit_storage = "GL_EXT_shader_16bit_storage"; +const char* const E_GL_EXT_shader_8bit_storage = "GL_EXT_shader_8bit_storage"; + + +// EXT extensions +const char* const E_GL_EXT_device_group = "GL_EXT_device_group"; +const char* const E_GL_EXT_multiview = "GL_EXT_multiview"; +const char* const E_GL_EXT_post_depth_coverage = "GL_EXT_post_depth_coverage"; +const char* const E_GL_EXT_control_flow_attributes = "GL_EXT_control_flow_attributes"; +const char* const E_GL_EXT_nonuniform_qualifier = "GL_EXT_nonuniform_qualifier"; +const char* const E_GL_EXT_samplerless_texture_functions = "GL_EXT_samplerless_texture_functions"; +const char* const E_GL_EXT_scalar_block_layout = "GL_EXT_scalar_block_layout"; +const char* const E_GL_EXT_fragment_invocation_density = "GL_EXT_fragment_invocation_density"; +const char* const E_GL_EXT_buffer_reference = "GL_EXT_buffer_reference"; +const char* const E_GL_EXT_buffer_reference2 = "GL_EXT_buffer_reference2"; +const char* const E_GL_EXT_buffer_reference_uvec2 = "GL_EXT_buffer_reference_uvec2"; +const char* const E_GL_EXT_demote_to_helper_invocation = "GL_EXT_demote_to_helper_invocation"; +const char* const E_GL_EXT_shader_realtime_clock = "GL_EXT_shader_realtime_clock"; +const char* const E_GL_EXT_debug_printf = "GL_EXT_debug_printf"; +const char* const E_GL_EXT_ray_tracing = "GL_EXT_ray_tracing"; +const char* const E_GL_EXT_ray_query = "GL_EXT_ray_query"; +const char* const E_GL_EXT_ray_flags_primitive_culling = "GL_EXT_ray_flags_primitive_culling"; +const char* const E_GL_EXT_ray_cull_mask = "GL_EXT_ray_cull_mask"; +const char* const E_GL_EXT_blend_func_extended = "GL_EXT_blend_func_extended"; +const char* const E_GL_EXT_shader_implicit_conversions = "GL_EXT_shader_implicit_conversions"; +const char* const E_GL_EXT_fragment_shading_rate = "GL_EXT_fragment_shading_rate"; +const char* const E_GL_EXT_shader_image_int64 = "GL_EXT_shader_image_int64"; +const char* const E_GL_EXT_null_initializer = "GL_EXT_null_initializer"; +const char* const E_GL_EXT_shared_memory_block = "GL_EXT_shared_memory_block"; +const char* const E_GL_EXT_subgroup_uniform_control_flow = "GL_EXT_subgroup_uniform_control_flow"; +const char* const E_GL_EXT_spirv_intrinsics = "GL_EXT_spirv_intrinsics"; +const char* const E_GL_EXT_fragment_shader_barycentric = "GL_EXT_fragment_shader_barycentric"; +const char* const E_GL_EXT_mesh_shader = "GL_EXT_mesh_shader"; +const char* const E_GL_EXT_opacity_micromap = "GL_EXT_opacity_micromap"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const post_depth_coverageEXTs[] = { E_GL_ARB_post_depth_coverage, E_GL_EXT_post_depth_coverage }; +const int Num_post_depth_coverageEXTs = sizeof(post_depth_coverageEXTs) / sizeof(post_depth_coverageEXTs[0]); + +// Array of extensions to cover both extensions providing ray tracing capabilities. +const char* const ray_tracing_EXTs[] = { E_GL_EXT_ray_query, E_GL_EXT_ray_tracing }; +const int Num_ray_tracing_EXTs = sizeof(ray_tracing_EXTs) / sizeof(ray_tracing_EXTs[0]); + +// OVR extensions +const char* const E_GL_OVR_multiview = "GL_OVR_multiview"; +const char* const E_GL_OVR_multiview2 = "GL_OVR_multiview2"; + +const char* const OVR_multiview_EXTs[] = { E_GL_OVR_multiview, E_GL_OVR_multiview2 }; +const int Num_OVR_multiview_EXTs = sizeof(OVR_multiview_EXTs) / sizeof(OVR_multiview_EXTs[0]); + +// #line and #include +const char* const E_GL_GOOGLE_cpp_style_line_directive = "GL_GOOGLE_cpp_style_line_directive"; +const char* const E_GL_GOOGLE_include_directive = "GL_GOOGLE_include_directive"; + +const char* const E_GL_AMD_shader_ballot = "GL_AMD_shader_ballot"; +const char* const E_GL_AMD_shader_trinary_minmax = "GL_AMD_shader_trinary_minmax"; +const char* const E_GL_AMD_shader_explicit_vertex_parameter = "GL_AMD_shader_explicit_vertex_parameter"; +const char* const E_GL_AMD_gcn_shader = "GL_AMD_gcn_shader"; +const char* const E_GL_AMD_gpu_shader_half_float = "GL_AMD_gpu_shader_half_float"; +const char* const E_GL_AMD_texture_gather_bias_lod = "GL_AMD_texture_gather_bias_lod"; +const char* const E_GL_AMD_gpu_shader_int16 = "GL_AMD_gpu_shader_int16"; +const char* const E_GL_AMD_shader_image_load_store_lod = "GL_AMD_shader_image_load_store_lod"; +const char* const E_GL_AMD_shader_fragment_mask = "GL_AMD_shader_fragment_mask"; +const char* const E_GL_AMD_gpu_shader_half_float_fetch = "GL_AMD_gpu_shader_half_float_fetch"; +const char* const E_GL_AMD_shader_early_and_late_fragment_tests = "GL_AMD_shader_early_and_late_fragment_tests"; + +const char* const E_GL_INTEL_shader_integer_functions2 = "GL_INTEL_shader_integer_functions2"; + +const char* const E_GL_NV_sample_mask_override_coverage = "GL_NV_sample_mask_override_coverage"; +const char* const E_SPV_NV_geometry_shader_passthrough = "GL_NV_geometry_shader_passthrough"; +const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2"; +const char* const E_GL_NV_stereo_view_rendering = "GL_NV_stereo_view_rendering"; +const char* const E_GL_NVX_multiview_per_view_attributes = "GL_NVX_multiview_per_view_attributes"; +const char* const E_GL_NV_shader_atomic_int64 = "GL_NV_shader_atomic_int64"; +const char* const E_GL_NV_conservative_raster_underestimation = "GL_NV_conservative_raster_underestimation"; +const char* const E_GL_NV_shader_noperspective_interpolation = "GL_NV_shader_noperspective_interpolation"; +const char* const E_GL_NV_shader_subgroup_partitioned = "GL_NV_shader_subgroup_partitioned"; +const char* const E_GL_NV_shading_rate_image = "GL_NV_shading_rate_image"; +const char* const E_GL_NV_ray_tracing = "GL_NV_ray_tracing"; +const char* const E_GL_NV_ray_tracing_motion_blur = "GL_NV_ray_tracing_motion_blur"; +const char* const E_GL_NV_fragment_shader_barycentric = "GL_NV_fragment_shader_barycentric"; +const char* const E_GL_NV_compute_shader_derivatives = "GL_NV_compute_shader_derivatives"; +const char* const E_GL_NV_shader_texture_footprint = "GL_NV_shader_texture_footprint"; +const char* const E_GL_NV_mesh_shader = "GL_NV_mesh_shader"; + +// Arrays of extensions for the above viewportEXTs duplications + +const char* const viewportEXTs[] = { E_GL_ARB_shader_viewport_layer_array, E_GL_NV_viewport_array2 }; +const int Num_viewportEXTs = sizeof(viewportEXTs) / sizeof(viewportEXTs[0]); + +const char* const E_GL_NV_cooperative_matrix = "GL_NV_cooperative_matrix"; +const char* const E_GL_NV_shader_sm_builtins = "GL_NV_shader_sm_builtins"; +const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer_cooperative_matrix"; + +// AEP +const char* const E_GL_ANDROID_extension_pack_es31a = "GL_ANDROID_extension_pack_es31a"; +const char* const E_GL_KHR_blend_equation_advanced = "GL_KHR_blend_equation_advanced"; +const char* const E_GL_OES_sample_variables = "GL_OES_sample_variables"; +const char* const E_GL_OES_shader_image_atomic = "GL_OES_shader_image_atomic"; +const char* const E_GL_OES_shader_multisample_interpolation = "GL_OES_shader_multisample_interpolation"; +const char* const E_GL_OES_texture_storage_multisample_2d_array = "GL_OES_texture_storage_multisample_2d_array"; +const char* const E_GL_EXT_geometry_shader = "GL_EXT_geometry_shader"; +const char* const E_GL_EXT_geometry_point_size = "GL_EXT_geometry_point_size"; +const char* const E_GL_EXT_gpu_shader5 = "GL_EXT_gpu_shader5"; +const char* const E_GL_EXT_primitive_bounding_box = "GL_EXT_primitive_bounding_box"; +const char* const E_GL_EXT_shader_io_blocks = "GL_EXT_shader_io_blocks"; +const char* const E_GL_EXT_tessellation_shader = "GL_EXT_tessellation_shader"; +const char* const E_GL_EXT_tessellation_point_size = "GL_EXT_tessellation_point_size"; +const char* const E_GL_EXT_texture_buffer = "GL_EXT_texture_buffer"; +const char* const E_GL_EXT_texture_cube_map_array = "GL_EXT_texture_cube_map_array"; +const char* const E_GL_EXT_shader_integer_mix = "GL_EXT_shader_integer_mix"; + +// OES matching AEP +const char* const E_GL_OES_geometry_shader = "GL_OES_geometry_shader"; +const char* const E_GL_OES_geometry_point_size = "GL_OES_geometry_point_size"; +const char* const E_GL_OES_gpu_shader5 = "GL_OES_gpu_shader5"; +const char* const E_GL_OES_primitive_bounding_box = "GL_OES_primitive_bounding_box"; +const char* const E_GL_OES_shader_io_blocks = "GL_OES_shader_io_blocks"; +const char* const E_GL_OES_tessellation_shader = "GL_OES_tessellation_shader"; +const char* const E_GL_OES_tessellation_point_size = "GL_OES_tessellation_point_size"; +const char* const E_GL_OES_texture_buffer = "GL_OES_texture_buffer"; +const char* const E_GL_OES_texture_cube_map_array = "GL_OES_texture_cube_map_array"; + +// EXT +const char* const E_GL_EXT_shader_explicit_arithmetic_types = "GL_EXT_shader_explicit_arithmetic_types"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int8 = "GL_EXT_shader_explicit_arithmetic_types_int8"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int16 = "GL_EXT_shader_explicit_arithmetic_types_int16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int32 = "GL_EXT_shader_explicit_arithmetic_types_int32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_int64 = "GL_EXT_shader_explicit_arithmetic_types_int64"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float16 = "GL_EXT_shader_explicit_arithmetic_types_float16"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float32 = "GL_EXT_shader_explicit_arithmetic_types_float32"; +const char* const E_GL_EXT_shader_explicit_arithmetic_types_float64 = "GL_EXT_shader_explicit_arithmetic_types_float64"; + +const char* const E_GL_EXT_shader_subgroup_extended_types_int8 = "GL_EXT_shader_subgroup_extended_types_int8"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int16 = "GL_EXT_shader_subgroup_extended_types_int16"; +const char* const E_GL_EXT_shader_subgroup_extended_types_int64 = "GL_EXT_shader_subgroup_extended_types_int64"; +const char* const E_GL_EXT_shader_subgroup_extended_types_float16 = "GL_EXT_shader_subgroup_extended_types_float16"; +const char* const E_GL_EXT_terminate_invocation = "GL_EXT_terminate_invocation"; + +const char* const E_GL_EXT_shader_atomic_float = "GL_EXT_shader_atomic_float"; +const char* const E_GL_EXT_shader_atomic_float2 = "GL_EXT_shader_atomic_float2"; + +// Arrays of extensions for the above AEP duplications + +const char* const AEP_geometry_shader[] = { E_GL_EXT_geometry_shader, E_GL_OES_geometry_shader }; +const int Num_AEP_geometry_shader = sizeof(AEP_geometry_shader)/sizeof(AEP_geometry_shader[0]); + +const char* const AEP_geometry_point_size[] = { E_GL_EXT_geometry_point_size, E_GL_OES_geometry_point_size }; +const int Num_AEP_geometry_point_size = sizeof(AEP_geometry_point_size)/sizeof(AEP_geometry_point_size[0]); + +const char* const AEP_gpu_shader5[] = { E_GL_EXT_gpu_shader5, E_GL_OES_gpu_shader5 }; +const int Num_AEP_gpu_shader5 = sizeof(AEP_gpu_shader5)/sizeof(AEP_gpu_shader5[0]); + +const char* const AEP_primitive_bounding_box[] = { E_GL_EXT_primitive_bounding_box, E_GL_OES_primitive_bounding_box }; +const int Num_AEP_primitive_bounding_box = sizeof(AEP_primitive_bounding_box)/sizeof(AEP_primitive_bounding_box[0]); + +const char* const AEP_shader_io_blocks[] = { E_GL_EXT_shader_io_blocks, E_GL_OES_shader_io_blocks }; +const int Num_AEP_shader_io_blocks = sizeof(AEP_shader_io_blocks)/sizeof(AEP_shader_io_blocks[0]); + +const char* const AEP_tessellation_shader[] = { E_GL_EXT_tessellation_shader, E_GL_OES_tessellation_shader }; +const int Num_AEP_tessellation_shader = sizeof(AEP_tessellation_shader)/sizeof(AEP_tessellation_shader[0]); + +const char* const AEP_tessellation_point_size[] = { E_GL_EXT_tessellation_point_size, E_GL_OES_tessellation_point_size }; +const int Num_AEP_tessellation_point_size = sizeof(AEP_tessellation_point_size)/sizeof(AEP_tessellation_point_size[0]); + +const char* const AEP_texture_buffer[] = { E_GL_EXT_texture_buffer, E_GL_OES_texture_buffer }; +const int Num_AEP_texture_buffer = sizeof(AEP_texture_buffer)/sizeof(AEP_texture_buffer[0]); + +const char* const AEP_texture_cube_map_array[] = { E_GL_EXT_texture_cube_map_array, E_GL_OES_texture_cube_map_array }; +const int Num_AEP_texture_cube_map_array = sizeof(AEP_texture_cube_map_array)/sizeof(AEP_texture_cube_map_array[0]); + +const char* const AEP_mesh_shader[] = { E_GL_NV_mesh_shader, E_GL_EXT_mesh_shader }; +const int Num_AEP_mesh_shader = sizeof(AEP_mesh_shader)/sizeof(AEP_mesh_shader[0]); + +} // end namespace glslang + +#endif // _VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/attribute.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/attribute.h new file mode 100644 index 0000000..c5b2917 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/attribute.h @@ -0,0 +1,150 @@ +// +// Copyright (C) 2017 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _ATTRIBUTE_INCLUDED_ +#define _ATTRIBUTE_INCLUDED_ + +#include "../Include/Common.h" +#include "../Include/ConstantUnion.h" + +namespace glslang { + + enum TAttributeType { + EatNone, + EatAllow_uav_condition, + EatBranch, + EatCall, + EatDomain, + EatEarlyDepthStencil, + EatFastOpt, + EatFlatten, + EatForceCase, + EatInstance, + EatMaxTessFactor, + EatNumThreads, + EatMaxVertexCount, + EatOutputControlPoints, + EatOutputTopology, + EatPartitioning, + EatPatchConstantFunc, + EatPatchSize, + EatUnroll, + EatLoop, + EatBinding, + EatGlobalBinding, + EatLocation, + EatInputAttachment, + EatBuiltIn, + EatPushConstant, + EatConstantId, + EatDependencyInfinite, + EatDependencyLength, + EatMinIterations, + EatMaxIterations, + EatIterationMultiple, + EatPeelCount, + EatPartialCount, + EatFormatRgba32f, + EatFormatRgba16f, + EatFormatR32f, + EatFormatRgba8, + EatFormatRgba8Snorm, + EatFormatRg32f, + EatFormatRg16f, + EatFormatR11fG11fB10f, + EatFormatR16f, + EatFormatRgba16, + EatFormatRgb10A2, + EatFormatRg16, + EatFormatRg8, + EatFormatR16, + EatFormatR8, + EatFormatRgba16Snorm, + EatFormatRg16Snorm, + EatFormatRg8Snorm, + EatFormatR16Snorm, + EatFormatR8Snorm, + EatFormatRgba32i, + EatFormatRgba16i, + EatFormatRgba8i, + EatFormatR32i, + EatFormatRg32i, + EatFormatRg16i, + EatFormatRg8i, + EatFormatR16i, + EatFormatR8i, + EatFormatRgba32ui, + EatFormatRgba16ui, + EatFormatRgba8ui, + EatFormatR32ui, + EatFormatRgb10a2ui, + EatFormatRg32ui, + EatFormatRg16ui, + EatFormatRg8ui, + EatFormatR16ui, + EatFormatR8ui, + EatFormatUnknown, + EatNonWritable, + EatNonReadable, + EatSubgroupUniformControlFlow, + }; + + class TIntermAggregate; + + struct TAttributeArgs { + TAttributeType name; + const TIntermAggregate* args; + + // Obtain attribute as integer + // Return false if it cannot be obtained + bool getInt(int& value, int argNum = 0) const; + + // Obtain attribute as string, with optional to-lower transform + // Return false if it cannot be obtained + bool getString(TString& value, int argNum = 0, bool convertToLower = true) const; + + // How many arguments were provided to the attribute? + int size() const; + + protected: + const TConstUnion* getConstUnion(TBasicType basicType, int argNum) const; + }; + + typedef TList TAttributes; + +} // end namespace glslang + +#endif // _ATTRIBUTE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/gl_types.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/gl_types.h new file mode 100644 index 0000000..d6c9393 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/gl_types.h @@ -0,0 +1,218 @@ +/* +** Copyright (c) 2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +#pragma once + +#define GL_FLOAT 0x1406 +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 + +#define GL_DOUBLE 0x140A +#define GL_DOUBLE_VEC2 0x8FFC +#define GL_DOUBLE_VEC3 0x8FFD +#define GL_DOUBLE_VEC4 0x8FFE + +#define GL_INT 0x1404 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 + +#define GL_UNSIGNED_INT 0x1405 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 + +#define GL_INT64_ARB 0x140E +#define GL_INT64_VEC2_ARB 0x8FE9 +#define GL_INT64_VEC3_ARB 0x8FEA +#define GL_INT64_VEC4_ARB 0x8FEB + +#define GL_UNSIGNED_INT64_ARB 0x140F +#define GL_UNSIGNED_INT64_VEC2_ARB 0x8FF5 +#define GL_UNSIGNED_INT64_VEC3_ARB 0x8FF6 +#define GL_UNSIGNED_INT64_VEC4_ARB 0x8FF7 +#define GL_UNSIGNED_INT16_VEC2_NV 0x8FF1 +#define GL_UNSIGNED_INT16_VEC3_NV 0x8FF2 +#define GL_UNSIGNED_INT16_VEC4_NV 0x8FF3 + +#define GL_INT16_NV 0x8FE4 +#define GL_INT16_VEC2_NV 0x8FE5 +#define GL_INT16_VEC3_NV 0x8FE6 +#define GL_INT16_VEC4_NV 0x8FE7 + +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 + +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A + +#define GL_DOUBLE_MAT2 0x8F46 +#define GL_DOUBLE_MAT3 0x8F47 +#define GL_DOUBLE_MAT4 0x8F48 +#define GL_DOUBLE_MAT2x3 0x8F49 +#define GL_DOUBLE_MAT2x4 0x8F4A +#define GL_DOUBLE_MAT3x2 0x8F4B +#define GL_DOUBLE_MAT3x4 0x8F4C +#define GL_DOUBLE_MAT4x2 0x8F4D +#define GL_DOUBLE_MAT4x3 0x8F4E + +// Those constants are borrowed from extension NV_gpu_shader5 +#define GL_FLOAT16_NV 0x8FF8 +#define GL_FLOAT16_VEC2_NV 0x8FF9 +#define GL_FLOAT16_VEC3_NV 0x8FFA +#define GL_FLOAT16_VEC4_NV 0x8FFB + +#define GL_FLOAT16_MAT2_AMD 0x91C5 +#define GL_FLOAT16_MAT3_AMD 0x91C6 +#define GL_FLOAT16_MAT4_AMD 0x91C7 +#define GL_FLOAT16_MAT2x3_AMD 0x91C8 +#define GL_FLOAT16_MAT2x4_AMD 0x91C9 +#define GL_FLOAT16_MAT3x2_AMD 0x91CA +#define GL_FLOAT16_MAT3x4_AMD 0x91CB +#define GL_FLOAT16_MAT4x2_AMD 0x91CC +#define GL_FLOAT16_MAT4x3_AMD 0x91CD + +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D +#define GL_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900C +#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW_ARB 0x900D + +#define GL_FLOAT16_SAMPLER_1D_AMD 0x91CE +#define GL_FLOAT16_SAMPLER_2D_AMD 0x91CF +#define GL_FLOAT16_SAMPLER_3D_AMD 0x91D0 +#define GL_FLOAT16_SAMPLER_CUBE_AMD 0x91D1 +#define GL_FLOAT16_SAMPLER_2D_RECT_AMD 0x91D2 +#define GL_FLOAT16_SAMPLER_1D_ARRAY_AMD 0x91D3 +#define GL_FLOAT16_SAMPLER_2D_ARRAY_AMD 0x91D4 +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_AMD 0x91D5 +#define GL_FLOAT16_SAMPLER_BUFFER_AMD 0x91D6 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_AMD 0x91D7 +#define GL_FLOAT16_SAMPLER_2D_MULTISAMPLE_ARRAY_AMD 0x91D8 + +#define GL_FLOAT16_SAMPLER_1D_SHADOW_AMD 0x91D9 +#define GL_FLOAT16_SAMPLER_2D_SHADOW_AMD 0x91DA +#define GL_FLOAT16_SAMPLER_2D_RECT_SHADOW_AMD 0x91DB +#define GL_FLOAT16_SAMPLER_1D_ARRAY_SHADOW_AMD 0x91DC +#define GL_FLOAT16_SAMPLER_2D_ARRAY_SHADOW_AMD 0x91DD +#define GL_FLOAT16_SAMPLER_CUBE_SHADOW_AMD 0x91DE +#define GL_FLOAT16_SAMPLER_CUBE_MAP_ARRAY_SHADOW_AMD 0x91DF + +#define GL_FLOAT16_IMAGE_1D_AMD 0x91E0 +#define GL_FLOAT16_IMAGE_2D_AMD 0x91E1 +#define GL_FLOAT16_IMAGE_3D_AMD 0x91E2 +#define GL_FLOAT16_IMAGE_2D_RECT_AMD 0x91E3 +#define GL_FLOAT16_IMAGE_CUBE_AMD 0x91E4 +#define GL_FLOAT16_IMAGE_1D_ARRAY_AMD 0x91E5 +#define GL_FLOAT16_IMAGE_2D_ARRAY_AMD 0x91E6 +#define GL_FLOAT16_IMAGE_CUBE_MAP_ARRAY_AMD 0x91E7 +#define GL_FLOAT16_IMAGE_BUFFER_AMD 0x91E8 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_AMD 0x91E9 +#define GL_FLOAT16_IMAGE_2D_MULTISAMPLE_ARRAY_AMD 0x91EA + +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E +#define GL_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900E + +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F +#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900F +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A + +#define GL_IMAGE_1D 0x904C +#define GL_IMAGE_2D 0x904D +#define GL_IMAGE_3D 0x904E +#define GL_IMAGE_2D_RECT 0x904F +#define GL_IMAGE_CUBE 0x9050 +#define GL_IMAGE_BUFFER 0x9051 +#define GL_IMAGE_1D_ARRAY 0x9052 +#define GL_IMAGE_2D_ARRAY 0x9053 +#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054 +#define GL_IMAGE_2D_MULTISAMPLE 0x9055 +#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056 +#define GL_INT_IMAGE_1D 0x9057 +#define GL_INT_IMAGE_2D 0x9058 +#define GL_INT_IMAGE_3D 0x9059 +#define GL_INT_IMAGE_2D_RECT 0x905A +#define GL_INT_IMAGE_CUBE 0x905B +#define GL_INT_IMAGE_BUFFER 0x905C +#define GL_INT_IMAGE_1D_ARRAY 0x905D +#define GL_INT_IMAGE_2D_ARRAY 0x905E +#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F +#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060 +#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061 +#define GL_UNSIGNED_INT_IMAGE_1D 0x9062 +#define GL_UNSIGNED_INT_IMAGE_2D 0x9063 +#define GL_UNSIGNED_INT_IMAGE_3D 0x9064 +#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065 +#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066 +#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067 +#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068 +#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069 +#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B +#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C + +#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/glslang_tab.cpp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/glslang_tab.cpp.h new file mode 100644 index 0000000..18cef46 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/glslang_tab.cpp.h @@ -0,0 +1,571 @@ +/* A Bison parser, made by GNU Bison 3.7.4. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +#ifndef YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +# define YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 1 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token kinds. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + YYEMPTY = -2, + YYEOF = 0, /* "end of file" */ + YYerror = 256, /* error */ + YYUNDEF = 257, /* "invalid token" */ + CONST = 258, /* CONST */ + BOOL = 259, /* BOOL */ + INT = 260, /* INT */ + UINT = 261, /* UINT */ + FLOAT = 262, /* FLOAT */ + BVEC2 = 263, /* BVEC2 */ + BVEC3 = 264, /* BVEC3 */ + BVEC4 = 265, /* BVEC4 */ + IVEC2 = 266, /* IVEC2 */ + IVEC3 = 267, /* IVEC3 */ + IVEC4 = 268, /* IVEC4 */ + UVEC2 = 269, /* UVEC2 */ + UVEC3 = 270, /* UVEC3 */ + UVEC4 = 271, /* UVEC4 */ + VEC2 = 272, /* VEC2 */ + VEC3 = 273, /* VEC3 */ + VEC4 = 274, /* VEC4 */ + MAT2 = 275, /* MAT2 */ + MAT3 = 276, /* MAT3 */ + MAT4 = 277, /* MAT4 */ + MAT2X2 = 278, /* MAT2X2 */ + MAT2X3 = 279, /* MAT2X3 */ + MAT2X4 = 280, /* MAT2X4 */ + MAT3X2 = 281, /* MAT3X2 */ + MAT3X3 = 282, /* MAT3X3 */ + MAT3X4 = 283, /* MAT3X4 */ + MAT4X2 = 284, /* MAT4X2 */ + MAT4X3 = 285, /* MAT4X3 */ + MAT4X4 = 286, /* MAT4X4 */ + SAMPLER2D = 287, /* SAMPLER2D */ + SAMPLER3D = 288, /* SAMPLER3D */ + SAMPLERCUBE = 289, /* SAMPLERCUBE */ + SAMPLER2DSHADOW = 290, /* SAMPLER2DSHADOW */ + SAMPLERCUBESHADOW = 291, /* SAMPLERCUBESHADOW */ + SAMPLER2DARRAY = 292, /* SAMPLER2DARRAY */ + SAMPLER2DARRAYSHADOW = 293, /* SAMPLER2DARRAYSHADOW */ + ISAMPLER2D = 294, /* ISAMPLER2D */ + ISAMPLER3D = 295, /* ISAMPLER3D */ + ISAMPLERCUBE = 296, /* ISAMPLERCUBE */ + ISAMPLER2DARRAY = 297, /* ISAMPLER2DARRAY */ + USAMPLER2D = 298, /* USAMPLER2D */ + USAMPLER3D = 299, /* USAMPLER3D */ + USAMPLERCUBE = 300, /* USAMPLERCUBE */ + USAMPLER2DARRAY = 301, /* USAMPLER2DARRAY */ + SAMPLER = 302, /* SAMPLER */ + SAMPLERSHADOW = 303, /* SAMPLERSHADOW */ + TEXTURE2D = 304, /* TEXTURE2D */ + TEXTURE3D = 305, /* TEXTURE3D */ + TEXTURECUBE = 306, /* TEXTURECUBE */ + TEXTURE2DARRAY = 307, /* TEXTURE2DARRAY */ + ITEXTURE2D = 308, /* ITEXTURE2D */ + ITEXTURE3D = 309, /* ITEXTURE3D */ + ITEXTURECUBE = 310, /* ITEXTURECUBE */ + ITEXTURE2DARRAY = 311, /* ITEXTURE2DARRAY */ + UTEXTURE2D = 312, /* UTEXTURE2D */ + UTEXTURE3D = 313, /* UTEXTURE3D */ + UTEXTURECUBE = 314, /* UTEXTURECUBE */ + UTEXTURE2DARRAY = 315, /* UTEXTURE2DARRAY */ + ATTRIBUTE = 316, /* ATTRIBUTE */ + VARYING = 317, /* VARYING */ + FLOAT16_T = 318, /* FLOAT16_T */ + FLOAT32_T = 319, /* FLOAT32_T */ + DOUBLE = 320, /* DOUBLE */ + FLOAT64_T = 321, /* FLOAT64_T */ + INT64_T = 322, /* INT64_T */ + UINT64_T = 323, /* UINT64_T */ + INT32_T = 324, /* INT32_T */ + UINT32_T = 325, /* UINT32_T */ + INT16_T = 326, /* INT16_T */ + UINT16_T = 327, /* UINT16_T */ + INT8_T = 328, /* INT8_T */ + UINT8_T = 329, /* UINT8_T */ + I64VEC2 = 330, /* I64VEC2 */ + I64VEC3 = 331, /* I64VEC3 */ + I64VEC4 = 332, /* I64VEC4 */ + U64VEC2 = 333, /* U64VEC2 */ + U64VEC3 = 334, /* U64VEC3 */ + U64VEC4 = 335, /* U64VEC4 */ + I32VEC2 = 336, /* I32VEC2 */ + I32VEC3 = 337, /* I32VEC3 */ + I32VEC4 = 338, /* I32VEC4 */ + U32VEC2 = 339, /* U32VEC2 */ + U32VEC3 = 340, /* U32VEC3 */ + U32VEC4 = 341, /* U32VEC4 */ + I16VEC2 = 342, /* I16VEC2 */ + I16VEC3 = 343, /* I16VEC3 */ + I16VEC4 = 344, /* I16VEC4 */ + U16VEC2 = 345, /* U16VEC2 */ + U16VEC3 = 346, /* U16VEC3 */ + U16VEC4 = 347, /* U16VEC4 */ + I8VEC2 = 348, /* I8VEC2 */ + I8VEC3 = 349, /* I8VEC3 */ + I8VEC4 = 350, /* I8VEC4 */ + U8VEC2 = 351, /* U8VEC2 */ + U8VEC3 = 352, /* U8VEC3 */ + U8VEC4 = 353, /* U8VEC4 */ + DVEC2 = 354, /* DVEC2 */ + DVEC3 = 355, /* DVEC3 */ + DVEC4 = 356, /* DVEC4 */ + DMAT2 = 357, /* DMAT2 */ + DMAT3 = 358, /* DMAT3 */ + DMAT4 = 359, /* DMAT4 */ + F16VEC2 = 360, /* F16VEC2 */ + F16VEC3 = 361, /* F16VEC3 */ + F16VEC4 = 362, /* F16VEC4 */ + F16MAT2 = 363, /* F16MAT2 */ + F16MAT3 = 364, /* F16MAT3 */ + F16MAT4 = 365, /* F16MAT4 */ + F32VEC2 = 366, /* F32VEC2 */ + F32VEC3 = 367, /* F32VEC3 */ + F32VEC4 = 368, /* F32VEC4 */ + F32MAT2 = 369, /* F32MAT2 */ + F32MAT3 = 370, /* F32MAT3 */ + F32MAT4 = 371, /* F32MAT4 */ + F64VEC2 = 372, /* F64VEC2 */ + F64VEC3 = 373, /* F64VEC3 */ + F64VEC4 = 374, /* F64VEC4 */ + F64MAT2 = 375, /* F64MAT2 */ + F64MAT3 = 376, /* F64MAT3 */ + F64MAT4 = 377, /* F64MAT4 */ + DMAT2X2 = 378, /* DMAT2X2 */ + DMAT2X3 = 379, /* DMAT2X3 */ + DMAT2X4 = 380, /* DMAT2X4 */ + DMAT3X2 = 381, /* DMAT3X2 */ + DMAT3X3 = 382, /* DMAT3X3 */ + DMAT3X4 = 383, /* DMAT3X4 */ + DMAT4X2 = 384, /* DMAT4X2 */ + DMAT4X3 = 385, /* DMAT4X3 */ + DMAT4X4 = 386, /* DMAT4X4 */ + F16MAT2X2 = 387, /* F16MAT2X2 */ + F16MAT2X3 = 388, /* F16MAT2X3 */ + F16MAT2X4 = 389, /* F16MAT2X4 */ + F16MAT3X2 = 390, /* F16MAT3X2 */ + F16MAT3X3 = 391, /* F16MAT3X3 */ + F16MAT3X4 = 392, /* F16MAT3X4 */ + F16MAT4X2 = 393, /* F16MAT4X2 */ + F16MAT4X3 = 394, /* F16MAT4X3 */ + F16MAT4X4 = 395, /* F16MAT4X4 */ + F32MAT2X2 = 396, /* F32MAT2X2 */ + F32MAT2X3 = 397, /* F32MAT2X3 */ + F32MAT2X4 = 398, /* F32MAT2X4 */ + F32MAT3X2 = 399, /* F32MAT3X2 */ + F32MAT3X3 = 400, /* F32MAT3X3 */ + F32MAT3X4 = 401, /* F32MAT3X4 */ + F32MAT4X2 = 402, /* F32MAT4X2 */ + F32MAT4X3 = 403, /* F32MAT4X3 */ + F32MAT4X4 = 404, /* F32MAT4X4 */ + F64MAT2X2 = 405, /* F64MAT2X2 */ + F64MAT2X3 = 406, /* F64MAT2X3 */ + F64MAT2X4 = 407, /* F64MAT2X4 */ + F64MAT3X2 = 408, /* F64MAT3X2 */ + F64MAT3X3 = 409, /* F64MAT3X3 */ + F64MAT3X4 = 410, /* F64MAT3X4 */ + F64MAT4X2 = 411, /* F64MAT4X2 */ + F64MAT4X3 = 412, /* F64MAT4X3 */ + F64MAT4X4 = 413, /* F64MAT4X4 */ + ATOMIC_UINT = 414, /* ATOMIC_UINT */ + ACCSTRUCTNV = 415, /* ACCSTRUCTNV */ + ACCSTRUCTEXT = 416, /* ACCSTRUCTEXT */ + RAYQUERYEXT = 417, /* RAYQUERYEXT */ + FCOOPMATNV = 418, /* FCOOPMATNV */ + ICOOPMATNV = 419, /* ICOOPMATNV */ + UCOOPMATNV = 420, /* UCOOPMATNV */ + SAMPLERCUBEARRAY = 421, /* SAMPLERCUBEARRAY */ + SAMPLERCUBEARRAYSHADOW = 422, /* SAMPLERCUBEARRAYSHADOW */ + ISAMPLERCUBEARRAY = 423, /* ISAMPLERCUBEARRAY */ + USAMPLERCUBEARRAY = 424, /* USAMPLERCUBEARRAY */ + SAMPLER1D = 425, /* SAMPLER1D */ + SAMPLER1DARRAY = 426, /* SAMPLER1DARRAY */ + SAMPLER1DARRAYSHADOW = 427, /* SAMPLER1DARRAYSHADOW */ + ISAMPLER1D = 428, /* ISAMPLER1D */ + SAMPLER1DSHADOW = 429, /* SAMPLER1DSHADOW */ + SAMPLER2DRECT = 430, /* SAMPLER2DRECT */ + SAMPLER2DRECTSHADOW = 431, /* SAMPLER2DRECTSHADOW */ + ISAMPLER2DRECT = 432, /* ISAMPLER2DRECT */ + USAMPLER2DRECT = 433, /* USAMPLER2DRECT */ + SAMPLERBUFFER = 434, /* SAMPLERBUFFER */ + ISAMPLERBUFFER = 435, /* ISAMPLERBUFFER */ + USAMPLERBUFFER = 436, /* USAMPLERBUFFER */ + SAMPLER2DMS = 437, /* SAMPLER2DMS */ + ISAMPLER2DMS = 438, /* ISAMPLER2DMS */ + USAMPLER2DMS = 439, /* USAMPLER2DMS */ + SAMPLER2DMSARRAY = 440, /* SAMPLER2DMSARRAY */ + ISAMPLER2DMSARRAY = 441, /* ISAMPLER2DMSARRAY */ + USAMPLER2DMSARRAY = 442, /* USAMPLER2DMSARRAY */ + SAMPLEREXTERNALOES = 443, /* SAMPLEREXTERNALOES */ + SAMPLEREXTERNAL2DY2YEXT = 444, /* SAMPLEREXTERNAL2DY2YEXT */ + ISAMPLER1DARRAY = 445, /* ISAMPLER1DARRAY */ + USAMPLER1D = 446, /* USAMPLER1D */ + USAMPLER1DARRAY = 447, /* USAMPLER1DARRAY */ + F16SAMPLER1D = 448, /* F16SAMPLER1D */ + F16SAMPLER2D = 449, /* F16SAMPLER2D */ + F16SAMPLER3D = 450, /* F16SAMPLER3D */ + F16SAMPLER2DRECT = 451, /* F16SAMPLER2DRECT */ + F16SAMPLERCUBE = 452, /* F16SAMPLERCUBE */ + F16SAMPLER1DARRAY = 453, /* F16SAMPLER1DARRAY */ + F16SAMPLER2DARRAY = 454, /* F16SAMPLER2DARRAY */ + F16SAMPLERCUBEARRAY = 455, /* F16SAMPLERCUBEARRAY */ + F16SAMPLERBUFFER = 456, /* F16SAMPLERBUFFER */ + F16SAMPLER2DMS = 457, /* F16SAMPLER2DMS */ + F16SAMPLER2DMSARRAY = 458, /* F16SAMPLER2DMSARRAY */ + F16SAMPLER1DSHADOW = 459, /* F16SAMPLER1DSHADOW */ + F16SAMPLER2DSHADOW = 460, /* F16SAMPLER2DSHADOW */ + F16SAMPLER1DARRAYSHADOW = 461, /* F16SAMPLER1DARRAYSHADOW */ + F16SAMPLER2DARRAYSHADOW = 462, /* F16SAMPLER2DARRAYSHADOW */ + F16SAMPLER2DRECTSHADOW = 463, /* F16SAMPLER2DRECTSHADOW */ + F16SAMPLERCUBESHADOW = 464, /* F16SAMPLERCUBESHADOW */ + F16SAMPLERCUBEARRAYSHADOW = 465, /* F16SAMPLERCUBEARRAYSHADOW */ + IMAGE1D = 466, /* IMAGE1D */ + IIMAGE1D = 467, /* IIMAGE1D */ + UIMAGE1D = 468, /* UIMAGE1D */ + IMAGE2D = 469, /* IMAGE2D */ + IIMAGE2D = 470, /* IIMAGE2D */ + UIMAGE2D = 471, /* UIMAGE2D */ + IMAGE3D = 472, /* IMAGE3D */ + IIMAGE3D = 473, /* IIMAGE3D */ + UIMAGE3D = 474, /* UIMAGE3D */ + IMAGE2DRECT = 475, /* IMAGE2DRECT */ + IIMAGE2DRECT = 476, /* IIMAGE2DRECT */ + UIMAGE2DRECT = 477, /* UIMAGE2DRECT */ + IMAGECUBE = 478, /* IMAGECUBE */ + IIMAGECUBE = 479, /* IIMAGECUBE */ + UIMAGECUBE = 480, /* UIMAGECUBE */ + IMAGEBUFFER = 481, /* IMAGEBUFFER */ + IIMAGEBUFFER = 482, /* IIMAGEBUFFER */ + UIMAGEBUFFER = 483, /* UIMAGEBUFFER */ + IMAGE1DARRAY = 484, /* IMAGE1DARRAY */ + IIMAGE1DARRAY = 485, /* IIMAGE1DARRAY */ + UIMAGE1DARRAY = 486, /* UIMAGE1DARRAY */ + IMAGE2DARRAY = 487, /* IMAGE2DARRAY */ + IIMAGE2DARRAY = 488, /* IIMAGE2DARRAY */ + UIMAGE2DARRAY = 489, /* UIMAGE2DARRAY */ + IMAGECUBEARRAY = 490, /* IMAGECUBEARRAY */ + IIMAGECUBEARRAY = 491, /* IIMAGECUBEARRAY */ + UIMAGECUBEARRAY = 492, /* UIMAGECUBEARRAY */ + IMAGE2DMS = 493, /* IMAGE2DMS */ + IIMAGE2DMS = 494, /* IIMAGE2DMS */ + UIMAGE2DMS = 495, /* UIMAGE2DMS */ + IMAGE2DMSARRAY = 496, /* IMAGE2DMSARRAY */ + IIMAGE2DMSARRAY = 497, /* IIMAGE2DMSARRAY */ + UIMAGE2DMSARRAY = 498, /* UIMAGE2DMSARRAY */ + F16IMAGE1D = 499, /* F16IMAGE1D */ + F16IMAGE2D = 500, /* F16IMAGE2D */ + F16IMAGE3D = 501, /* F16IMAGE3D */ + F16IMAGE2DRECT = 502, /* F16IMAGE2DRECT */ + F16IMAGECUBE = 503, /* F16IMAGECUBE */ + F16IMAGE1DARRAY = 504, /* F16IMAGE1DARRAY */ + F16IMAGE2DARRAY = 505, /* F16IMAGE2DARRAY */ + F16IMAGECUBEARRAY = 506, /* F16IMAGECUBEARRAY */ + F16IMAGEBUFFER = 507, /* F16IMAGEBUFFER */ + F16IMAGE2DMS = 508, /* F16IMAGE2DMS */ + F16IMAGE2DMSARRAY = 509, /* F16IMAGE2DMSARRAY */ + I64IMAGE1D = 510, /* I64IMAGE1D */ + U64IMAGE1D = 511, /* U64IMAGE1D */ + I64IMAGE2D = 512, /* I64IMAGE2D */ + U64IMAGE2D = 513, /* U64IMAGE2D */ + I64IMAGE3D = 514, /* I64IMAGE3D */ + U64IMAGE3D = 515, /* U64IMAGE3D */ + I64IMAGE2DRECT = 516, /* I64IMAGE2DRECT */ + U64IMAGE2DRECT = 517, /* U64IMAGE2DRECT */ + I64IMAGECUBE = 518, /* I64IMAGECUBE */ + U64IMAGECUBE = 519, /* U64IMAGECUBE */ + I64IMAGEBUFFER = 520, /* I64IMAGEBUFFER */ + U64IMAGEBUFFER = 521, /* U64IMAGEBUFFER */ + I64IMAGE1DARRAY = 522, /* I64IMAGE1DARRAY */ + U64IMAGE1DARRAY = 523, /* U64IMAGE1DARRAY */ + I64IMAGE2DARRAY = 524, /* I64IMAGE2DARRAY */ + U64IMAGE2DARRAY = 525, /* U64IMAGE2DARRAY */ + I64IMAGECUBEARRAY = 526, /* I64IMAGECUBEARRAY */ + U64IMAGECUBEARRAY = 527, /* U64IMAGECUBEARRAY */ + I64IMAGE2DMS = 528, /* I64IMAGE2DMS */ + U64IMAGE2DMS = 529, /* U64IMAGE2DMS */ + I64IMAGE2DMSARRAY = 530, /* I64IMAGE2DMSARRAY */ + U64IMAGE2DMSARRAY = 531, /* U64IMAGE2DMSARRAY */ + TEXTURECUBEARRAY = 532, /* TEXTURECUBEARRAY */ + ITEXTURECUBEARRAY = 533, /* ITEXTURECUBEARRAY */ + UTEXTURECUBEARRAY = 534, /* UTEXTURECUBEARRAY */ + TEXTURE1D = 535, /* TEXTURE1D */ + ITEXTURE1D = 536, /* ITEXTURE1D */ + UTEXTURE1D = 537, /* UTEXTURE1D */ + TEXTURE1DARRAY = 538, /* TEXTURE1DARRAY */ + ITEXTURE1DARRAY = 539, /* ITEXTURE1DARRAY */ + UTEXTURE1DARRAY = 540, /* UTEXTURE1DARRAY */ + TEXTURE2DRECT = 541, /* TEXTURE2DRECT */ + ITEXTURE2DRECT = 542, /* ITEXTURE2DRECT */ + UTEXTURE2DRECT = 543, /* UTEXTURE2DRECT */ + TEXTUREBUFFER = 544, /* TEXTUREBUFFER */ + ITEXTUREBUFFER = 545, /* ITEXTUREBUFFER */ + UTEXTUREBUFFER = 546, /* UTEXTUREBUFFER */ + TEXTURE2DMS = 547, /* TEXTURE2DMS */ + ITEXTURE2DMS = 548, /* ITEXTURE2DMS */ + UTEXTURE2DMS = 549, /* UTEXTURE2DMS */ + TEXTURE2DMSARRAY = 550, /* TEXTURE2DMSARRAY */ + ITEXTURE2DMSARRAY = 551, /* ITEXTURE2DMSARRAY */ + UTEXTURE2DMSARRAY = 552, /* UTEXTURE2DMSARRAY */ + F16TEXTURE1D = 553, /* F16TEXTURE1D */ + F16TEXTURE2D = 554, /* F16TEXTURE2D */ + F16TEXTURE3D = 555, /* F16TEXTURE3D */ + F16TEXTURE2DRECT = 556, /* F16TEXTURE2DRECT */ + F16TEXTURECUBE = 557, /* F16TEXTURECUBE */ + F16TEXTURE1DARRAY = 558, /* F16TEXTURE1DARRAY */ + F16TEXTURE2DARRAY = 559, /* F16TEXTURE2DARRAY */ + F16TEXTURECUBEARRAY = 560, /* F16TEXTURECUBEARRAY */ + F16TEXTUREBUFFER = 561, /* F16TEXTUREBUFFER */ + F16TEXTURE2DMS = 562, /* F16TEXTURE2DMS */ + F16TEXTURE2DMSARRAY = 563, /* F16TEXTURE2DMSARRAY */ + SUBPASSINPUT = 564, /* SUBPASSINPUT */ + SUBPASSINPUTMS = 565, /* SUBPASSINPUTMS */ + ISUBPASSINPUT = 566, /* ISUBPASSINPUT */ + ISUBPASSINPUTMS = 567, /* ISUBPASSINPUTMS */ + USUBPASSINPUT = 568, /* USUBPASSINPUT */ + USUBPASSINPUTMS = 569, /* USUBPASSINPUTMS */ + F16SUBPASSINPUT = 570, /* F16SUBPASSINPUT */ + F16SUBPASSINPUTMS = 571, /* F16SUBPASSINPUTMS */ + SPIRV_INSTRUCTION = 572, /* SPIRV_INSTRUCTION */ + SPIRV_EXECUTION_MODE = 573, /* SPIRV_EXECUTION_MODE */ + SPIRV_EXECUTION_MODE_ID = 574, /* SPIRV_EXECUTION_MODE_ID */ + SPIRV_DECORATE = 575, /* SPIRV_DECORATE */ + SPIRV_DECORATE_ID = 576, /* SPIRV_DECORATE_ID */ + SPIRV_DECORATE_STRING = 577, /* SPIRV_DECORATE_STRING */ + SPIRV_TYPE = 578, /* SPIRV_TYPE */ + SPIRV_STORAGE_CLASS = 579, /* SPIRV_STORAGE_CLASS */ + SPIRV_BY_REFERENCE = 580, /* SPIRV_BY_REFERENCE */ + SPIRV_LITERAL = 581, /* SPIRV_LITERAL */ + LEFT_OP = 582, /* LEFT_OP */ + RIGHT_OP = 583, /* RIGHT_OP */ + INC_OP = 584, /* INC_OP */ + DEC_OP = 585, /* DEC_OP */ + LE_OP = 586, /* LE_OP */ + GE_OP = 587, /* GE_OP */ + EQ_OP = 588, /* EQ_OP */ + NE_OP = 589, /* NE_OP */ + AND_OP = 590, /* AND_OP */ + OR_OP = 591, /* OR_OP */ + XOR_OP = 592, /* XOR_OP */ + MUL_ASSIGN = 593, /* MUL_ASSIGN */ + DIV_ASSIGN = 594, /* DIV_ASSIGN */ + ADD_ASSIGN = 595, /* ADD_ASSIGN */ + MOD_ASSIGN = 596, /* MOD_ASSIGN */ + LEFT_ASSIGN = 597, /* LEFT_ASSIGN */ + RIGHT_ASSIGN = 598, /* RIGHT_ASSIGN */ + AND_ASSIGN = 599, /* AND_ASSIGN */ + XOR_ASSIGN = 600, /* XOR_ASSIGN */ + OR_ASSIGN = 601, /* OR_ASSIGN */ + SUB_ASSIGN = 602, /* SUB_ASSIGN */ + STRING_LITERAL = 603, /* STRING_LITERAL */ + LEFT_PAREN = 604, /* LEFT_PAREN */ + RIGHT_PAREN = 605, /* RIGHT_PAREN */ + LEFT_BRACKET = 606, /* LEFT_BRACKET */ + RIGHT_BRACKET = 607, /* RIGHT_BRACKET */ + LEFT_BRACE = 608, /* LEFT_BRACE */ + RIGHT_BRACE = 609, /* RIGHT_BRACE */ + DOT = 610, /* DOT */ + COMMA = 611, /* COMMA */ + COLON = 612, /* COLON */ + EQUAL = 613, /* EQUAL */ + SEMICOLON = 614, /* SEMICOLON */ + BANG = 615, /* BANG */ + DASH = 616, /* DASH */ + TILDE = 617, /* TILDE */ + PLUS = 618, /* PLUS */ + STAR = 619, /* STAR */ + SLASH = 620, /* SLASH */ + PERCENT = 621, /* PERCENT */ + LEFT_ANGLE = 622, /* LEFT_ANGLE */ + RIGHT_ANGLE = 623, /* RIGHT_ANGLE */ + VERTICAL_BAR = 624, /* VERTICAL_BAR */ + CARET = 625, /* CARET */ + AMPERSAND = 626, /* AMPERSAND */ + QUESTION = 627, /* QUESTION */ + INVARIANT = 628, /* INVARIANT */ + HIGH_PRECISION = 629, /* HIGH_PRECISION */ + MEDIUM_PRECISION = 630, /* MEDIUM_PRECISION */ + LOW_PRECISION = 631, /* LOW_PRECISION */ + PRECISION = 632, /* PRECISION */ + PACKED = 633, /* PACKED */ + RESOURCE = 634, /* RESOURCE */ + SUPERP = 635, /* SUPERP */ + FLOATCONSTANT = 636, /* FLOATCONSTANT */ + INTCONSTANT = 637, /* INTCONSTANT */ + UINTCONSTANT = 638, /* UINTCONSTANT */ + BOOLCONSTANT = 639, /* BOOLCONSTANT */ + IDENTIFIER = 640, /* IDENTIFIER */ + TYPE_NAME = 641, /* TYPE_NAME */ + CENTROID = 642, /* CENTROID */ + IN = 643, /* IN */ + OUT = 644, /* OUT */ + INOUT = 645, /* INOUT */ + STRUCT = 646, /* STRUCT */ + VOID = 647, /* VOID */ + WHILE = 648, /* WHILE */ + BREAK = 649, /* BREAK */ + CONTINUE = 650, /* CONTINUE */ + DO = 651, /* DO */ + ELSE = 652, /* ELSE */ + FOR = 653, /* FOR */ + IF = 654, /* IF */ + DISCARD = 655, /* DISCARD */ + RETURN = 656, /* RETURN */ + SWITCH = 657, /* SWITCH */ + CASE = 658, /* CASE */ + DEFAULT = 659, /* DEFAULT */ + TERMINATE_INVOCATION = 660, /* TERMINATE_INVOCATION */ + TERMINATE_RAY = 661, /* TERMINATE_RAY */ + IGNORE_INTERSECTION = 662, /* IGNORE_INTERSECTION */ + UNIFORM = 663, /* UNIFORM */ + SHARED = 664, /* SHARED */ + BUFFER = 665, /* BUFFER */ + FLAT = 666, /* FLAT */ + SMOOTH = 667, /* SMOOTH */ + LAYOUT = 668, /* LAYOUT */ + DOUBLECONSTANT = 669, /* DOUBLECONSTANT */ + INT16CONSTANT = 670, /* INT16CONSTANT */ + UINT16CONSTANT = 671, /* UINT16CONSTANT */ + FLOAT16CONSTANT = 672, /* FLOAT16CONSTANT */ + INT32CONSTANT = 673, /* INT32CONSTANT */ + UINT32CONSTANT = 674, /* UINT32CONSTANT */ + INT64CONSTANT = 675, /* INT64CONSTANT */ + UINT64CONSTANT = 676, /* UINT64CONSTANT */ + SUBROUTINE = 677, /* SUBROUTINE */ + DEMOTE = 678, /* DEMOTE */ + PAYLOADNV = 679, /* PAYLOADNV */ + PAYLOADINNV = 680, /* PAYLOADINNV */ + HITATTRNV = 681, /* HITATTRNV */ + CALLDATANV = 682, /* CALLDATANV */ + CALLDATAINNV = 683, /* CALLDATAINNV */ + PAYLOADEXT = 684, /* PAYLOADEXT */ + PAYLOADINEXT = 685, /* PAYLOADINEXT */ + HITATTREXT = 686, /* HITATTREXT */ + CALLDATAEXT = 687, /* CALLDATAEXT */ + CALLDATAINEXT = 688, /* CALLDATAINEXT */ + PATCH = 689, /* PATCH */ + SAMPLE = 690, /* SAMPLE */ + NONUNIFORM = 691, /* NONUNIFORM */ + COHERENT = 692, /* COHERENT */ + VOLATILE = 693, /* VOLATILE */ + RESTRICT = 694, /* RESTRICT */ + READONLY = 695, /* READONLY */ + WRITEONLY = 696, /* WRITEONLY */ + DEVICECOHERENT = 697, /* DEVICECOHERENT */ + QUEUEFAMILYCOHERENT = 698, /* QUEUEFAMILYCOHERENT */ + WORKGROUPCOHERENT = 699, /* WORKGROUPCOHERENT */ + SUBGROUPCOHERENT = 700, /* SUBGROUPCOHERENT */ + NONPRIVATE = 701, /* NONPRIVATE */ + SHADERCALLCOHERENT = 702, /* SHADERCALLCOHERENT */ + NOPERSPECTIVE = 703, /* NOPERSPECTIVE */ + EXPLICITINTERPAMD = 704, /* EXPLICITINTERPAMD */ + PERVERTEXEXT = 705, /* PERVERTEXEXT */ + PERVERTEXNV = 706, /* PERVERTEXNV */ + PERPRIMITIVENV = 707, /* PERPRIMITIVENV */ + PERVIEWNV = 708, /* PERVIEWNV */ + PERTASKNV = 709, /* PERTASKNV */ + PERPRIMITIVEEXT = 710, /* PERPRIMITIVEEXT */ + TASKPAYLOADWORKGROUPEXT = 711, /* TASKPAYLOADWORKGROUPEXT */ + PRECISE = 712 /* PRECISE */ + }; + typedef enum yytokentype yytoken_kind_t; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +union YYSTYPE +{ +#line 97 "MachineIndependent/glslang.y" + + struct { + glslang::TSourceLoc loc; + union { + glslang::TString *string; + int i; + unsigned int u; + long long i64; + unsigned long long u64; + bool b; + double d; + }; + glslang::TSymbol* symbol; + } lex; + struct { + glslang::TSourceLoc loc; + glslang::TOperator op; + union { + TIntermNode* intermNode; + glslang::TIntermNodePair nodePair; + glslang::TIntermTyped* intermTypedNode; + glslang::TAttributes* attributes; + glslang::TSpirvRequirement* spirvReq; + glslang::TSpirvInstruction* spirvInst; + glslang::TSpirvTypeParameters* spirvTypeParams; + }; + union { + glslang::TPublicType type; + glslang::TFunction* function; + glslang::TParameter param; + glslang::TTypeLoc typeLine; + glslang::TTypeList* typeList; + glslang::TArraySizes* arraySizes; + glslang::TIdentifierList* identifierList; + }; + glslang::TArraySizes* typeParameters; + } interm; + +#line 560 "MachineIndependent/glslang_tab.cpp.h" + +}; +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + +int yyparse (glslang::TParseContext* pParseContext); + +#endif /* !YY_YY_MACHINEINDEPENDENT_GLSLANG_TAB_CPP_H_INCLUDED */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/iomapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/iomapper.h new file mode 100644 index 0000000..0003a74 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/iomapper.h @@ -0,0 +1,361 @@ +// +// Copyright (C) 2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _IOMAPPER_INCLUDED +#define _IOMAPPER_INCLUDED + +#include +#include "LiveTraverser.h" +#include +#include +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +class TInfoSink; + +namespace glslang { + +class TIntermediate; +struct TVarEntryInfo { + long long id; + TIntermSymbol* symbol; + bool live; + int newBinding; + int newSet; + int newLocation; + int newComponent; + int newIndex; + EShLanguage stage; + + void clearNewAssignments() { + newBinding = -1; + newSet = -1; + newLocation = -1; + newComponent = -1; + newIndex = -1; + } + + struct TOrderById { + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { return l.id < r.id; } + }; + + struct TOrderByPriority { + // ordering: + // 1) has both binding and set + // 2) has binding but no set + // 3) has no binding but set + // 4) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (lPoints == rPoints) + return l.id < r.id; + return lPoints > rPoints; + } + }; + + struct TOrderByPriorityAndLive { + // ordering: + // 1) do live variables first + // 2) has both binding and set + // 3) has binding but no set + // 4) has no binding but set + // 5) has no binding and no set + inline bool operator()(const TVarEntryInfo& l, const TVarEntryInfo& r) { + + const TQualifier& lq = l.symbol->getQualifier(); + const TQualifier& rq = r.symbol->getQualifier(); + + // simple rules: + // has binding gives 2 points + // has set gives 1 point + // who has the most points is more important. + int lPoints = (lq.hasBinding() ? 2 : 0) + (lq.hasSet() ? 1 : 0); + int rPoints = (rq.hasBinding() ? 2 : 0) + (rq.hasSet() ? 1 : 0); + + if (l.live != r.live) + return l.live > r.live; + + if (lPoints != rPoints) + return lPoints > rPoints; + + return l.id < r.id; + } + }; +}; + +// Base class for shared TIoMapResolver services, used by several derivations. +struct TDefaultIoResolverBase : public glslang::TIoMapResolver { +public: + TDefaultIoResolverBase(const TIntermediate& intermediate); + typedef std::vector TSlotSet; + typedef std::unordered_map TSlotSetMap; + + // grow the reflection stage by stage + void notifyBinding(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void notifyInOut(EShLanguage, TVarEntryInfo& /*ent*/) override {} + void beginNotifications(EShLanguage) override {} + void endNotifications(EShLanguage) override {} + void beginResolve(EShLanguage) override {} + void endResolve(EShLanguage) override {} + void beginCollect(EShLanguage) override {} + void endCollect(EShLanguage) override {} + void reserverResourceSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + void reserverStorageSlot(TVarEntryInfo& /*ent*/, TInfoSink& /*infoSink*/) override {} + int getBaseBinding(EShLanguage stage, TResourceType res, unsigned int set) const; + const std::vector& getResourceSetBinding(EShLanguage stage) const; + virtual TResourceType getResourceType(const glslang::TType& type) = 0; + bool doAutoBindingMapping() const; + bool doAutoLocationMapping() const; + TSlotSet::iterator findSlot(int set, int slot); + bool checkEmpty(int set, int slot); + bool validateInOut(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + int reserveSlot(int set, int slot, int size = 1); + int getFreeSlot(int set, int base, int size = 1); + int resolveSet(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveInOutComponent(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveInOutIndex(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void addStage(EShLanguage stage, TIntermediate& stageIntermediate) override { + if (stage < EShLangCount) { + stageMask[stage] = true; + stageIntermediates[stage] = &stageIntermediate; + } + } + uint32_t computeTypeLocationSize(const TType& type, EShLanguage stage); + + TSlotSetMap slots; + bool hasError = false; + +protected: + TDefaultIoResolverBase(TDefaultIoResolverBase&); + TDefaultIoResolverBase& operator=(TDefaultIoResolverBase&); + const TIntermediate& referenceIntermediate; + int nextUniformLocation; + int nextInputLocation; + int nextOutputLocation; + bool stageMask[EShLangCount + 1]; + const TIntermediate* stageIntermediates[EShLangCount]; + + // Return descriptor set specific base if there is one, and the generic base otherwise. + int selectBaseBinding(int base, int descriptorSetBase) const { + return descriptorSetBase != -1 ? descriptorSetBase : base; + } + + static int getLayoutSet(const glslang::TType& type) { + if (type.getQualifier().hasSet()) + return type.getQualifier().layoutSet; + else + return 0; + } + + static bool isSamplerType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isPureSampler(); + } + + static bool isTextureType(const glslang::TType& type) { + return (type.getBasicType() == glslang::EbtSampler && + (type.getSampler().isTexture() || type.getSampler().isSubpass())); + } + + static bool isUboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqUniform; + } + + static bool isImageType(const glslang::TType& type) { + return type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage(); + } + + static bool isSsboType(const glslang::TType& type) { + return type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a SRV (shader resource view) type: + static bool isSrvType(const glslang::TType& type) { + return isTextureType(type) || type.getQualifier().storage == EvqBuffer; + } + + // Return true if this is a UAV (unordered access view) type: + static bool isUavType(const glslang::TType& type) { + if (type.getQualifier().isReadOnly()) + return false; + return (type.getBasicType() == glslang::EbtSampler && type.getSampler().isImage()) || + (type.getQualifier().storage == EvqBuffer); + } +}; + +// Default I/O resolver for OpenGL +struct TDefaultGlslIoResolver : public TDefaultIoResolverBase { +public: + typedef std::map TVarSlotMap; // + typedef std::map TSlotMap; // + TDefaultGlslIoResolver(const TIntermediate& intermediate); + bool validateBinding(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; } + TResourceType getResourceType(const glslang::TType& type) override; + int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override; + int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + int resolveBinding(EShLanguage /*stage*/, TVarEntryInfo& ent) override; + void beginResolve(EShLanguage /*stage*/) override; + void endResolve(EShLanguage stage) override; + void beginCollect(EShLanguage) override; + void endCollect(EShLanguage) override; + void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) override; + // in/out symbol and uniform symbol are stored in the same resourceSlotMap, the storage key is used to identify each type of symbol. + // We use stage and storage qualifier to construct a storage key. it can help us identify the same storage resource used in different stage. + // if a resource is a program resource and we don't need know it usage stage, we can use same stage to build storage key. + // Note: both stage and type must less then 0xffff. + int buildStorageKey(EShLanguage stage, TStorageQualifier type) { + assert(static_cast(stage) <= 0x0000ffff && static_cast(type) <= 0x0000ffff); + return (stage << 16) | type; + } + +protected: + // Use for mark pre stage, to get more interface symbol information. + EShLanguage preStage; + // Use for mark current shader stage for resolver + EShLanguage currentStage; + // Slot map for storage resource(location of uniform and interface symbol) It's a program share slot + TSlotMap resourceSlotMap; + // Slot map for other resource(image, ubo, ssbo), It's a program share slot. + TSlotMap storageSlotMap; +}; + +typedef std::map TVarLiveMap; + +// override function "operator=", if a vector being sort, +// when use vc++, the sort function will call : +// pair& operator=(const pair<_Other1, _Other2>& _Right) +// { +// first = _Right.first; +// second = _Right.second; +// return (*this); +// } +// that will make a const type handing on left. +// override this function can avoid a compiler error. +// In the future, if the vc++ compiler can handle such a situation, +// this part of the code will be removed. +struct TVarLivePair : std::pair { + TVarLivePair(const std::pair& _Right) : pair(_Right.first, _Right.second) {} + TVarLivePair& operator=(const TVarLivePair& _Right) { + const_cast(first) = _Right.first; + second = _Right.second; + return (*this); + } + TVarLivePair(const TVarLivePair& src) : pair(src) { } +}; +typedef std::vector TVarLiveVector; + +// I/O mapper +class TIoMapper { +public: + TIoMapper() {} + virtual ~TIoMapper() {} + // grow the reflection stage by stage + bool virtual addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*); + bool virtual doMap(TIoMapResolver*, TInfoSink&) { return true; } +}; + +// I/O mapper for GLSL +class TGlslIoMapper : public TIoMapper { +public: + TGlslIoMapper() { + memset(inVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(outVarMaps, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(uniformVarMap, 0, sizeof(TVarLiveMap*) * (EShLangCount + 1)); + memset(intermediates, 0, sizeof(TIntermediate*) * (EShLangCount + 1)); + profile = ENoProfile; + version = 0; + autoPushConstantMaxSize = 128; + autoPushConstantBlockPacking = ElpStd430; + } + virtual ~TGlslIoMapper() { + for (size_t stage = 0; stage < EShLangCount; stage++) { + if (inVarMaps[stage] != nullptr) { + delete inVarMaps[stage]; + inVarMaps[stage] = nullptr; + } + if (outVarMaps[stage] != nullptr) { + delete outVarMaps[stage]; + outVarMaps[stage] = nullptr; + } + if (uniformVarMap[stage] != nullptr) { + delete uniformVarMap[stage]; + uniformVarMap[stage] = nullptr; + } + if (intermediates[stage] != nullptr) + intermediates[stage] = nullptr; + } + } + // If set, the uniform block with the given name will be changed to be backed by + // push_constant if it's size is <= maxSize + void setAutoPushConstantBlock(const char* name, unsigned int maxSize, TLayoutPacking packing) { + autoPushConstantBlockName = name; + autoPushConstantMaxSize = maxSize; + autoPushConstantBlockPacking = packing; + } + // grow the reflection stage by stage + bool addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*) override; + bool doMap(TIoMapResolver*, TInfoSink&) override; + TVarLiveMap *inVarMaps[EShLangCount], *outVarMaps[EShLangCount], + *uniformVarMap[EShLangCount]; + TIntermediate* intermediates[EShLangCount]; + bool hadError = false; + EProfile profile; + int version; + +private: + TString autoPushConstantBlockName; + unsigned int autoPushConstantMaxSize; + TLayoutPacking autoPushConstantBlockPacking; +}; + +} // end namespace glslang + +#endif // _IOMAPPER_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/localintermediate.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/localintermediate.h new file mode 100644 index 0000000..1bb4e97 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/localintermediate.h @@ -0,0 +1,1222 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2016 LunarG, Inc. +// Copyright (C) 2017 ARM Limited. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef _LOCAL_INTERMEDIATE_INCLUDED_ +#define _LOCAL_INTERMEDIATE_INCLUDED_ + +#include "../Include/intermediate.h" +#include "../Public/ShaderLang.h" +#include "Versions.h" + +#include +#include +#include +#include +#include + +class TInfoSink; + +namespace glslang { + +struct TMatrixSelector { + int coord1; // stay agnostic about column/row; this is parse order + int coord2; +}; + +typedef int TVectorSelector; + +const int MaxSwizzleSelectors = 4; + +template +class TSwizzleSelectors { +public: + TSwizzleSelectors() : size_(0) { } + + void push_back(selectorType comp) + { + if (size_ < MaxSwizzleSelectors) + components[size_++] = comp; + } + void resize(int s) + { + assert(s <= size_); + size_ = s; + } + int size() const { return size_; } + selectorType operator[](int i) const + { + assert(i < MaxSwizzleSelectors); + return components[i]; + } + +private: + int size_; + selectorType components[MaxSwizzleSelectors]; +}; + +// +// Some helper structures for TIntermediate. Their contents are encapsulated +// by TIntermediate. +// + +// Used for call-graph algorithms for detecting recursion, missing bodies, and dead bodies. +// A "call" is a pair: . +// There can be duplicates. General assumption is the list is small. +struct TCall { + TCall(const TString& pCaller, const TString& pCallee) : caller(pCaller), callee(pCallee) { } + TString caller; + TString callee; + bool visited; + bool currentPath; + bool errorGiven; + int calleeBodyPosition; +}; + +// A generic 1-D range. +struct TRange { + TRange(int start, int last) : start(start), last(last) { } + bool overlap(const TRange& rhs) const + { + return last >= rhs.start && start <= rhs.last; + } + int start; + int last; +}; + +// An IO range is a 3-D rectangle; the set of (location, component, index) triples all lying +// within the same location range, component range, and index value. Locations don't alias unless +// all other dimensions of their range overlap. +struct TIoRange { + TIoRange(TRange location, TRange component, TBasicType basicType, int index) + : location(location), component(component), basicType(basicType), index(index) { } + bool overlap(const TIoRange& rhs) const + { + return location.overlap(rhs.location) && component.overlap(rhs.component) && index == rhs.index; + } + TRange location; + TRange component; + TBasicType basicType; + int index; +}; + +// An offset range is a 2-D rectangle; the set of (binding, offset) pairs all lying +// within the same binding and offset range. +struct TOffsetRange { + TOffsetRange(TRange binding, TRange offset) + : binding(binding), offset(offset) { } + bool overlap(const TOffsetRange& rhs) const + { + return binding.overlap(rhs.binding) && offset.overlap(rhs.offset); + } + TRange binding; + TRange offset; +}; + +#ifndef GLSLANG_WEB +// Things that need to be tracked per xfb buffer. +struct TXfbBuffer { + TXfbBuffer() : stride(TQualifier::layoutXfbStrideEnd), implicitStride(0), contains64BitType(false), + contains32BitType(false), contains16BitType(false) { } + std::vector ranges; // byte offsets that have already been assigned + unsigned int stride; + unsigned int implicitStride; + bool contains64BitType; + bool contains32BitType; + bool contains16BitType; +}; +#endif + +// Track a set of strings describing how the module was processed. +// This includes command line options, transforms, etc., ideally inclusive enough +// to reproduce the steps used to transform the input source to the output. +// E.g., see SPIR-V OpModuleProcessed. +// Each "process" or "transform" uses is expressed in the form: +// process arg0 arg1 arg2 ... +// process arg0 arg1 arg2 ... +// where everything is textual, and there can be zero or more arguments +class TProcesses { +public: + TProcesses() {} + ~TProcesses() {} + + void addProcess(const char* process) + { + processes.push_back(process); + } + void addProcess(const std::string& process) + { + processes.push_back(process); + } + void addArgument(int arg) + { + processes.back().append(" "); + std::string argString = std::to_string(arg); + processes.back().append(argString); + } + void addArgument(const char* arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addArgument(const std::string& arg) + { + processes.back().append(" "); + processes.back().append(arg); + } + void addIfNonZero(const char* process, int value) + { + if (value != 0) { + addProcess(process); + addArgument(value); + } + } + + const std::vector& getProcesses() const { return processes; } + +private: + std::vector processes; +}; + +class TSymbolTable; +class TSymbol; +class TVariable; + +// +// Texture and Sampler transformation mode. +// +enum ComputeDerivativeMode { + LayoutDerivativeNone, // default layout as SPV_NV_compute_shader_derivatives not enabled + LayoutDerivativeGroupQuads, // derivative_group_quadsNV + LayoutDerivativeGroupLinear, // derivative_group_linearNV +}; + +class TIdMaps { +public: + TMap& operator[](long long i) { return maps[i]; } + const TMap& operator[](long long i) const { return maps[i]; } +private: + TMap maps[EsiCount]; +}; + +class TNumericFeatures { +public: + TNumericFeatures() : features(0) { } + TNumericFeatures(const TNumericFeatures&) = delete; + TNumericFeatures& operator=(const TNumericFeatures&) = delete; + typedef enum : unsigned int { + shader_explicit_arithmetic_types = 1 << 0, + shader_explicit_arithmetic_types_int8 = 1 << 1, + shader_explicit_arithmetic_types_int16 = 1 << 2, + shader_explicit_arithmetic_types_int32 = 1 << 3, + shader_explicit_arithmetic_types_int64 = 1 << 4, + shader_explicit_arithmetic_types_float16 = 1 << 5, + shader_explicit_arithmetic_types_float32 = 1 << 6, + shader_explicit_arithmetic_types_float64 = 1 << 7, + shader_implicit_conversions = 1 << 8, + gpu_shader_fp64 = 1 << 9, + gpu_shader_int16 = 1 << 10, + gpu_shader_half_float = 1 << 11, + } feature; + void insert(feature f) { features |= f; } + void erase(feature f) { features &= ~f; } + bool contains(feature f) const { return (features & f) != 0; } +private: + unsigned int features; +}; + +// MustBeAssigned wraps a T, asserting that it has been assigned with +// operator =() before attempting to read with operator T() or operator ->(). +// Used to catch cases where fields are read before they have been assigned. +template +class MustBeAssigned +{ +public: + MustBeAssigned() = default; + MustBeAssigned(const T& v) : value(v) {} + operator const T&() const { assert(isSet); return value; } + const T* operator ->() const { assert(isSet); return &value; } + MustBeAssigned& operator = (const T& v) { value = v; isSet = true; return *this; } +private: + T value; + bool isSet = false; +}; + +// +// Set of helper functions to help parse and build the tree. +// +class TIntermediate { +public: + explicit TIntermediate(EShLanguage l, int v = 0, EProfile p = ENoProfile) : + language(l), + profile(p), version(v), + treeRoot(0), + resources(TBuiltInResource{}), + numEntryPoints(0), numErrors(0), numPushConstants(0), recursive(false), + invertY(false), + dxPositionW(false), + enhancedMsgs(false), + debugInfo(false), + useStorageBuffer(false), + invariantAll(false), + nanMinMaxClamp(false), + depthReplacing(false), + stencilReplacing(false), + uniqueId(0), + globalUniformBlockName(""), + atomicCounterBlockName(""), + globalUniformBlockSet(TQualifier::layoutSetEnd), + globalUniformBlockBinding(TQualifier::layoutBindingEnd), + atomicCounterBlockSet(TQualifier::layoutSetEnd) +#ifndef GLSLANG_WEB + , + implicitThisName("@this"), implicitCounterName("@count"), + source(EShSourceNone), + useVulkanMemoryModel(false), + invocations(TQualifier::layoutNotSet), vertices(TQualifier::layoutNotSet), + inputPrimitive(ElgNone), outputPrimitive(ElgNone), + pixelCenterInteger(false), originUpperLeft(false),texCoordBuiltinRedeclared(false), + vertexSpacing(EvsNone), vertexOrder(EvoNone), interlockOrdering(EioNone), pointMode(false), earlyFragmentTests(false), + postDepthCoverage(false), earlyAndLateFragmentTestsAMD(false), depthLayout(EldNone), stencilLayout(ElsNone), + hlslFunctionality1(false), + blendEquations(0), xfbMode(false), multiStream(false), + layoutOverrideCoverage(false), + geoPassthroughEXT(false), + numShaderRecordBlocks(0), + computeDerivativeMode(LayoutDerivativeNone), + primitives(TQualifier::layoutNotSet), + numTaskNVBlocks(0), + layoutPrimitiveCulling(false), + numTaskEXTPayloads(0), + autoMapBindings(false), + autoMapLocations(false), + flattenUniformArrays(false), + useUnknownFormat(false), + hlslOffsets(false), + hlslIoMapping(false), + useVariablePointers(false), + textureSamplerTransformMode(EShTexSampTransKeep), + needToLegalize(false), + binaryDoubleOutput(false), + subgroupUniformControlFlow(false), + usePhysicalStorageBuffer(false), + spirvRequirement(nullptr), + spirvExecutionMode(nullptr), + uniformLocationBase(0) +#endif + { + localSize[0] = 1; + localSize[1] = 1; + localSize[2] = 1; + localSizeNotDefault[0] = false; + localSizeNotDefault[1] = false; + localSizeNotDefault[2] = false; + localSizeSpecId[0] = TQualifier::layoutNotSet; + localSizeSpecId[1] = TQualifier::layoutNotSet; + localSizeSpecId[2] = TQualifier::layoutNotSet; +#ifndef GLSLANG_WEB + xfbBuffers.resize(TQualifier::layoutXfbBufferEnd); + shiftBinding.fill(0); +#endif + } + + void setVersion(int v) + { + version = v; + } + void setProfile(EProfile p) + { + profile = p; + } + + int getVersion() const { return version; } + EProfile getProfile() const { return profile; } + void setSpv(const SpvVersion& s) + { + spvVersion = s; + + // client processes + if (spvVersion.vulkan > 0) + processes.addProcess("client vulkan100"); + if (spvVersion.openGl > 0) + processes.addProcess("client opengl100"); + + // target SPV + switch (spvVersion.spv) { + case 0: + break; + case EShTargetSpv_1_0: + break; + case EShTargetSpv_1_1: + processes.addProcess("target-env spirv1.1"); + break; + case EShTargetSpv_1_2: + processes.addProcess("target-env spirv1.2"); + break; + case EShTargetSpv_1_3: + processes.addProcess("target-env spirv1.3"); + break; + case EShTargetSpv_1_4: + processes.addProcess("target-env spirv1.4"); + break; + case EShTargetSpv_1_5: + processes.addProcess("target-env spirv1.5"); + break; + case EShTargetSpv_1_6: + processes.addProcess("target-env spirv1.6"); + break; + default: + processes.addProcess("target-env spirvUnknown"); + break; + } + + // target-environment processes + switch (spvVersion.vulkan) { + case 0: + break; + case EShTargetVulkan_1_0: + processes.addProcess("target-env vulkan1.0"); + break; + case EShTargetVulkan_1_1: + processes.addProcess("target-env vulkan1.1"); + break; + case EShTargetVulkan_1_2: + processes.addProcess("target-env vulkan1.2"); + break; + case EShTargetVulkan_1_3: + processes.addProcess("target-env vulkan1.3"); + break; + default: + processes.addProcess("target-env vulkanUnknown"); + break; + } + if (spvVersion.openGl > 0) + processes.addProcess("target-env opengl"); + } + const SpvVersion& getSpv() const { return spvVersion; } + EShLanguage getStage() const { return language; } + void addRequestedExtension(const char* extension) { requestedExtensions.insert(extension); } + const std::set& getRequestedExtensions() const { return requestedExtensions; } + bool isRayTracingStage() const { + return language >= EShLangRayGen && language <= EShLangCallableNV; + } + + void setTreeRoot(TIntermNode* r) { treeRoot = r; } + TIntermNode* getTreeRoot() const { return treeRoot; } + void incrementEntryPointCount() { ++numEntryPoints; } + int getNumEntryPoints() const { return numEntryPoints; } + int getNumErrors() const { return numErrors; } + void addPushConstantCount() { ++numPushConstants; } + void setLimits(const TBuiltInResource& r) { resources = r; } + const TBuiltInResource& getLimits() const { return resources; } + + bool postProcess(TIntermNode*, EShLanguage); + void removeTree(); + + void setEntryPointName(const char* ep) + { + entryPointName = ep; + processes.addProcess("entry-point"); + processes.addArgument(entryPointName); + } + void setEntryPointMangledName(const char* ep) { entryPointMangledName = ep; } + const std::string& getEntryPointName() const { return entryPointName; } + const std::string& getEntryPointMangledName() const { return entryPointMangledName; } + + void setDebugInfo(bool debuginfo) + { + debugInfo = debuginfo; + } + bool getDebugInfo() const { return debugInfo; } + + void setInvertY(bool invert) + { + invertY = invert; + if (invertY) + processes.addProcess("invert-y"); + } + bool getInvertY() const { return invertY; } + + void setDxPositionW(bool dxPosW) + { + dxPositionW = dxPosW; + if (dxPositionW) + processes.addProcess("dx-position-w"); + } + bool getDxPositionW() const { return dxPositionW; } + + void setEnhancedMsgs() + { + enhancedMsgs = true; + } + bool getEnhancedMsgs() const { return enhancedMsgs && getSource() == EShSourceGlsl; } + +#ifdef ENABLE_HLSL + void setSource(EShSource s) { source = s; } + EShSource getSource() const { return source; } +#else + void setSource(EShSource s) { assert(s == EShSourceGlsl); (void)s; } + EShSource getSource() const { return EShSourceGlsl; } +#endif + + bool isRecursive() const { return recursive; } + + TIntermSymbol* addSymbol(const TVariable&); + TIntermSymbol* addSymbol(const TVariable&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TType&, const TSourceLoc&); + TIntermSymbol* addSymbol(const TIntermSymbol&); + TIntermTyped* addConversion(TOperator, const TType&, TIntermTyped*); + std::tuple addPairConversion(TOperator op, TIntermTyped* node0, TIntermTyped* node1); + TIntermTyped* addUniShapeConversion(TOperator, const TType&, TIntermTyped*); + TIntermTyped* addConversion(TBasicType convertTo, TIntermTyped* node) const; + void addBiShapeConversion(TOperator, TIntermTyped*& lhsNode, TIntermTyped*& rhsNode); + TIntermTyped* addShapeConversion(const TType&, TIntermTyped*); + TIntermTyped* addBinaryMath(TOperator, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addAssign(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addIndex(TOperator op, TIntermTyped* base, TIntermTyped* index, const TSourceLoc&); + TIntermTyped* addUnaryMath(TOperator, TIntermTyped* child, const TSourceLoc&); + TIntermTyped* addBuiltInFunctionCall(const TSourceLoc& line, TOperator, bool unary, TIntermNode*, const TType& returnType); + bool canImplicitlyPromote(TBasicType from, TBasicType to, TOperator op = EOpNull) const; + bool isIntegralPromotion(TBasicType from, TBasicType to) const; + bool isFPPromotion(TBasicType from, TBasicType to) const; + bool isIntegralConversion(TBasicType from, TBasicType to) const; + bool isFPConversion(TBasicType from, TBasicType to) const; + bool isFPIntegralConversion(TBasicType from, TBasicType to) const; + TOperator mapTypeToConstructorOp(const TType&) const; + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right); + TIntermAggregate* growAggregate(TIntermNode* left, TIntermNode* right, const TSourceLoc&); + TIntermAggregate* makeAggregate(TIntermNode* node); + TIntermAggregate* makeAggregate(TIntermNode* node, const TSourceLoc&); + TIntermAggregate* makeAggregate(const TSourceLoc&); + TIntermTyped* setAggregateOperator(TIntermNode*, TOperator, const TType& type, const TSourceLoc&); + bool areAllChildConst(TIntermAggregate* aggrNode); + TIntermSelection* addSelection(TIntermTyped* cond, TIntermNodePair code, const TSourceLoc&); + TIntermTyped* addSelection(TIntermTyped* cond, TIntermTyped* trueBlock, TIntermTyped* falseBlock, const TSourceLoc&); + TIntermTyped* addComma(TIntermTyped* left, TIntermTyped* right, const TSourceLoc&); + TIntermTyped* addMethod(TIntermTyped*, const TType&, const TString*, const TSourceLoc&); + TIntermConstantUnion* addConstantUnion(const TConstUnionArray&, const TType&, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned char, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(signed short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned short, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned int, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(unsigned long long, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(bool, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(double, TBasicType, const TSourceLoc&, bool literal = false) const; + TIntermConstantUnion* addConstantUnion(const TString*, const TSourceLoc&, bool literal = false) const; + TIntermTyped* promoteConstantUnion(TBasicType, TIntermConstantUnion*) const; + bool parseConstTree(TIntermNode*, TConstUnionArray, TOperator, const TType&, bool singleConstantParam = false); + TIntermLoop* addLoop(TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, const TSourceLoc&); + TIntermAggregate* addForLoop(TIntermNode*, TIntermNode*, TIntermTyped*, TIntermTyped*, bool testFirst, + const TSourceLoc&, TIntermLoop*&); + TIntermBranch* addBranch(TOperator, const TSourceLoc&); + TIntermBranch* addBranch(TOperator, TIntermTyped*, const TSourceLoc&); + template TIntermTyped* addSwizzle(TSwizzleSelectors&, const TSourceLoc&); + + // Low level functions to add nodes (no conversions or other higher level transformations) + // If a type is provided, the node's type will be set to it. + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&) const; + TIntermBinary* addBinaryNode(TOperator op, TIntermTyped* left, TIntermTyped* right, const TSourceLoc&, + const TType&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&) const; + TIntermUnary* addUnaryNode(TOperator op, TIntermTyped* child, const TSourceLoc&, const TType&) const; + + // Constant folding (in Constant.cpp) + TIntermTyped* fold(TIntermAggregate* aggrNode); + TIntermTyped* foldConstructor(TIntermAggregate* aggrNode); + TIntermTyped* foldDereference(TIntermTyped* node, int index, const TSourceLoc&); + TIntermTyped* foldSwizzle(TIntermTyped* node, TSwizzleSelectors& fields, const TSourceLoc&); + + // Tree ops + static const TIntermTyped* findLValueBase(const TIntermTyped*, bool swizzleOkay , bool BufferReferenceOk = false); + + // Linkage related + void addSymbolLinkageNodes(TIntermAggregate*& linkage, EShLanguage, TSymbolTable&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, const TSymbol&); + TIntermAggregate* findLinkerObjects() const; + + void setGlobalUniformBlockName(const char* name) { globalUniformBlockName = std::string(name); } + const char* getGlobalUniformBlockName() const { return globalUniformBlockName.c_str(); } + void setGlobalUniformSet(unsigned int set) { globalUniformBlockSet = set; } + unsigned int getGlobalUniformSet() const { return globalUniformBlockSet; } + void setGlobalUniformBinding(unsigned int binding) { globalUniformBlockBinding = binding; } + unsigned int getGlobalUniformBinding() const { return globalUniformBlockBinding; } + + void setAtomicCounterBlockName(const char* name) { atomicCounterBlockName = std::string(name); } + const char* getAtomicCounterBlockName() const { return atomicCounterBlockName.c_str(); } + void setAtomicCounterBlockSet(unsigned int set) { atomicCounterBlockSet = set; } + unsigned int getAtomicCounterBlockSet() const { return atomicCounterBlockSet; } + + + void setUseStorageBuffer() { useStorageBuffer = true; } + bool usingStorageBuffer() const { return useStorageBuffer; } + void setInvariantAll() { invariantAll = true; } + bool isInvariantAll() const { return invariantAll; } + void setDepthReplacing() { depthReplacing = true; } + bool isDepthReplacing() const { return depthReplacing; } + void setStencilReplacing() { stencilReplacing = true; } + bool isStencilReplacing() const { return stencilReplacing; } + bool setLocalSize(int dim, int size) + { + if (localSizeNotDefault[dim]) + return size == localSize[dim]; + localSizeNotDefault[dim] = true; + localSize[dim] = size; + return true; + } + unsigned int getLocalSize(int dim) const { return localSize[dim]; } + bool isLocalSizeSet() const + { + // Return true if any component has been set (i.e. any component is not default). + return localSizeNotDefault[0] || localSizeNotDefault[1] || localSizeNotDefault[2]; + } + bool setLocalSizeSpecId(int dim, int id) + { + if (localSizeSpecId[dim] != TQualifier::layoutNotSet) + return id == localSizeSpecId[dim]; + localSizeSpecId[dim] = id; + return true; + } + int getLocalSizeSpecId(int dim) const { return localSizeSpecId[dim]; } + bool isLocalSizeSpecialized() const + { + // Return true if any component has been specialized. + return localSizeSpecId[0] != TQualifier::layoutNotSet || + localSizeSpecId[1] != TQualifier::layoutNotSet || + localSizeSpecId[2] != TQualifier::layoutNotSet; + } +#ifdef GLSLANG_WEB + void output(TInfoSink&, bool tree) { } + + bool isEsProfile() const { return false; } + bool getXfbMode() const { return false; } + bool isMultiStream() const { return false; } + TLayoutGeometry getOutputPrimitive() const { return ElgNone; } + bool getPostDepthCoverage() const { return false; } + bool getEarlyFragmentTests() const { return false; } + TLayoutDepth getDepth() const { return EldNone; } + bool getPixelCenterInteger() const { return false; } + void setOriginUpperLeft() { } + bool getOriginUpperLeft() const { return true; } + TInterlockOrdering getInterlockOrdering() const { return EioNone; } + + bool getAutoMapBindings() const { return false; } + bool getAutoMapLocations() const { return false; } + int getNumPushConstants() const { return 0; } + void addShaderRecordCount() { } + void addTaskNVCount() { } + void addTaskPayloadEXTCount() { } + void setUseVulkanMemoryModel() { } + bool usingVulkanMemoryModel() const { return false; } + bool usingPhysicalStorageBuffer() const { return false; } + bool usingVariablePointers() const { return false; } + unsigned getXfbStride(int buffer) const { return 0; } + bool hasLayoutDerivativeModeNone() const { return false; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return LayoutDerivativeNone; } +#else + void output(TInfoSink&, bool tree); + + bool isEsProfile() const { return profile == EEsProfile; } + + void setShiftBinding(TResourceType res, unsigned int shift) + { + shiftBinding[res] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) + processes.addIfNonZero(name, shift); + } + + unsigned int getShiftBinding(TResourceType res) const { return shiftBinding[res]; } + + void setShiftBindingForSet(TResourceType res, unsigned int shift, unsigned int set) + { + if (shift == 0) // ignore if there's no shift: it's a no-op. + return; + + shiftBindingForSet[res][set] = shift; + + const char* name = getResourceName(res); + if (name != nullptr) { + processes.addProcess(name); + processes.addArgument(shift); + processes.addArgument(set); + } + } + + int getShiftBindingForSet(TResourceType res, unsigned int set) const + { + const auto shift = shiftBindingForSet[res].find(set); + return shift == shiftBindingForSet[res].end() ? -1 : shift->second; + } + bool hasShiftBindingForSet(TResourceType res) const { return !shiftBindingForSet[res].empty(); } + + void setResourceSetBinding(const std::vector& shift) + { + resourceSetBinding = shift; + if (shift.size() > 0) { + processes.addProcess("resource-set-binding"); + for (int s = 0; s < (int)shift.size(); ++s) + processes.addArgument(shift[s]); + } + } + const std::vector& getResourceSetBinding() const { return resourceSetBinding; } + void setAutoMapBindings(bool map) + { + autoMapBindings = map; + if (autoMapBindings) + processes.addProcess("auto-map-bindings"); + } + bool getAutoMapBindings() const { return autoMapBindings; } + void setAutoMapLocations(bool map) + { + autoMapLocations = map; + if (autoMapLocations) + processes.addProcess("auto-map-locations"); + } + bool getAutoMapLocations() const { return autoMapLocations; } + +#ifdef ENABLE_HLSL + void setFlattenUniformArrays(bool flatten) + { + flattenUniformArrays = flatten; + if (flattenUniformArrays) + processes.addProcess("flatten-uniform-arrays"); + } + bool getFlattenUniformArrays() const { return flattenUniformArrays; } +#endif + void setNoStorageFormat(bool b) + { + useUnknownFormat = b; + if (useUnknownFormat) + processes.addProcess("no-storage-format"); + } + bool getNoStorageFormat() const { return useUnknownFormat; } + void setUseVulkanMemoryModel() + { + useVulkanMemoryModel = true; + processes.addProcess("use-vulkan-memory-model"); + } + bool usingVulkanMemoryModel() const { return useVulkanMemoryModel; } + void setUsePhysicalStorageBuffer() + { + usePhysicalStorageBuffer = true; + } + bool usingPhysicalStorageBuffer() const { return usePhysicalStorageBuffer; } + void setUseVariablePointers() + { + useVariablePointers = true; + processes.addProcess("use-variable-pointers"); + } + bool usingVariablePointers() const { return useVariablePointers; } + +#ifdef ENABLE_HLSL + template T addCounterBufferName(const T& name) const { return name + implicitCounterName; } + bool hasCounterBufferName(const TString& name) const { + size_t len = strlen(implicitCounterName); + return name.size() > len && + name.compare(name.size() - len, len, implicitCounterName) == 0; + } +#endif + + void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode) { textureSamplerTransformMode = mode; } + int getNumPushConstants() const { return numPushConstants; } + void addShaderRecordCount() { ++numShaderRecordBlocks; } + void addTaskNVCount() { ++numTaskNVBlocks; } + void addTaskPayloadEXTCount() { ++numTaskEXTPayloads; } + + bool setInvocations(int i) + { + if (invocations != TQualifier::layoutNotSet) + return invocations == i; + invocations = i; + return true; + } + int getInvocations() const { return invocations; } + bool setVertices(int m) + { + if (vertices != TQualifier::layoutNotSet) + return vertices == m; + vertices = m; + return true; + } + int getVertices() const { return vertices; } + bool setInputPrimitive(TLayoutGeometry p) + { + if (inputPrimitive != ElgNone) + return inputPrimitive == p; + inputPrimitive = p; + return true; + } + TLayoutGeometry getInputPrimitive() const { return inputPrimitive; } + bool setVertexSpacing(TVertexSpacing s) + { + if (vertexSpacing != EvsNone) + return vertexSpacing == s; + vertexSpacing = s; + return true; + } + TVertexSpacing getVertexSpacing() const { return vertexSpacing; } + bool setVertexOrder(TVertexOrder o) + { + if (vertexOrder != EvoNone) + return vertexOrder == o; + vertexOrder = o; + return true; + } + TVertexOrder getVertexOrder() const { return vertexOrder; } + void setPointMode() { pointMode = true; } + bool getPointMode() const { return pointMode; } + + bool setInterlockOrdering(TInterlockOrdering o) + { + if (interlockOrdering != EioNone) + return interlockOrdering == o; + interlockOrdering = o; + return true; + } + TInterlockOrdering getInterlockOrdering() const { return interlockOrdering; } + + void setXfbMode() { xfbMode = true; } + bool getXfbMode() const { return xfbMode; } + void setMultiStream() { multiStream = true; } + bool isMultiStream() const { return multiStream; } + bool setOutputPrimitive(TLayoutGeometry p) + { + if (outputPrimitive != ElgNone) + return outputPrimitive == p; + outputPrimitive = p; + return true; + } + TLayoutGeometry getOutputPrimitive() const { return outputPrimitive; } + void setPostDepthCoverage() { postDepthCoverage = true; } + bool getPostDepthCoverage() const { return postDepthCoverage; } + void setEarlyFragmentTests() { earlyFragmentTests = true; } + void setEarlyAndLateFragmentTestsAMD() { earlyAndLateFragmentTestsAMD = true; } + bool getEarlyFragmentTests() const { return earlyFragmentTests; } + bool getEarlyAndLateFragmentTestsAMD() const { return earlyAndLateFragmentTestsAMD; } + bool setDepth(TLayoutDepth d) + { + if (depthLayout != EldNone) + return depthLayout == d; + depthLayout = d; + return true; + } + bool setStencil(TLayoutStencil s) + { + if (stencilLayout != ElsNone) + return stencilLayout == s; + stencilLayout = s; + return true; + } + TLayoutDepth getDepth() const { return depthLayout; } + TLayoutStencil getStencil() const { return stencilLayout; } + void setOriginUpperLeft() { originUpperLeft = true; } + bool getOriginUpperLeft() const { return originUpperLeft; } + void setPixelCenterInteger() { pixelCenterInteger = true; } + bool getPixelCenterInteger() const { return pixelCenterInteger; } + void setTexCoordRedeclared() { texCoordBuiltinRedeclared = true; } + bool getTexCoordRedeclared() const { return texCoordBuiltinRedeclared; } + void addBlendEquation(TBlendEquationShift b) { blendEquations |= (1 << b); } + unsigned int getBlendEquations() const { return blendEquations; } + bool setXfbBufferStride(int buffer, unsigned stride) + { + if (xfbBuffers[buffer].stride != TQualifier::layoutXfbStrideEnd) + return xfbBuffers[buffer].stride == stride; + xfbBuffers[buffer].stride = stride; + return true; + } + unsigned getXfbStride(int buffer) const { return xfbBuffers[buffer].stride; } + int addXfbBufferOffset(const TType&); + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType, bool& contains32BitType, bool& contains16BitType) const; + unsigned int computeTypeXfbSize(const TType&, bool& contains64BitType) const; + void setLayoutOverrideCoverage() { layoutOverrideCoverage = true; } + bool getLayoutOverrideCoverage() const { return layoutOverrideCoverage; } + void setGeoPassthroughEXT() { geoPassthroughEXT = true; } + bool getGeoPassthroughEXT() const { return geoPassthroughEXT; } + void setLayoutDerivativeMode(ComputeDerivativeMode mode) { computeDerivativeMode = mode; } + bool hasLayoutDerivativeModeNone() const { return computeDerivativeMode != LayoutDerivativeNone; } + ComputeDerivativeMode getLayoutDerivativeModeNone() const { return computeDerivativeMode; } + void setLayoutPrimitiveCulling() { layoutPrimitiveCulling = true; } + bool getLayoutPrimitiveCulling() const { return layoutPrimitiveCulling; } + bool setPrimitives(int m) + { + if (primitives != TQualifier::layoutNotSet) + return primitives == m; + primitives = m; + return true; + } + int getPrimitives() const { return primitives; } + const char* addSemanticName(const TString& name) + { + return semanticNameSet.insert(name).first->c_str(); + } + void addUniformLocationOverride(const char* nameStr, int location) + { + std::string name = nameStr; + uniformLocationOverrides[name] = location; + } + + int getUniformLocationOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = uniformLocationOverrides.find(name); + if (pos == uniformLocationOverrides.end()) + return -1; + else + return pos->second; + } + + void setUniformLocationBase(int base) { uniformLocationBase = base; } + int getUniformLocationBase() const { return uniformLocationBase; } + + void setNeedsLegalization() { needToLegalize = true; } + bool needsLegalization() const { return needToLegalize; } + + void setBinaryDoubleOutput() { binaryDoubleOutput = true; } + bool getBinaryDoubleOutput() { return binaryDoubleOutput; } + + void setSubgroupUniformControlFlow() { subgroupUniformControlFlow = true; } + bool getSubgroupUniformControlFlow() const { return subgroupUniformControlFlow; } + + // GL_EXT_spirv_intrinsics + void insertSpirvRequirement(const TSpirvRequirement* spirvReq); + bool hasSpirvRequirement() const { return spirvRequirement != nullptr; } + const TSpirvRequirement& getSpirvRequirement() const { return *spirvRequirement; } + void insertSpirvExecutionMode(int executionMode, const TIntermAggregate* args = nullptr); + void insertSpirvExecutionModeId(int executionMode, const TIntermAggregate* args); + bool hasSpirvExecutionMode() const { return spirvExecutionMode != nullptr; } + const TSpirvExecutionMode& getSpirvExecutionMode() const { return *spirvExecutionMode; } +#endif // GLSLANG_WEB + + void addBlockStorageOverride(const char* nameStr, TBlockStorageClass backing) + { + std::string name(nameStr); + blockBackingOverrides[name] = backing; + } + TBlockStorageClass getBlockStorageOverride(const char* nameStr) const + { + std::string name = nameStr; + auto pos = blockBackingOverrides.find(name); + if (pos == blockBackingOverrides.end()) + return EbsNone; + else + return pos->second; + } +#ifdef ENABLE_HLSL + void setHlslFunctionality1() { hlslFunctionality1 = true; } + bool getHlslFunctionality1() const { return hlslFunctionality1; } + void setHlslOffsets() + { + hlslOffsets = true; + if (hlslOffsets) + processes.addProcess("hlsl-offsets"); + } + bool usingHlslOffsets() const { return hlslOffsets; } + void setHlslIoMapping(bool b) + { + hlslIoMapping = b; + if (hlslIoMapping) + processes.addProcess("hlsl-iomap"); + } + bool usingHlslIoMapping() { return hlslIoMapping; } +#else + bool getHlslFunctionality1() const { return false; } + bool usingHlslOffsets() const { return false; } + bool usingHlslIoMapping() { return false; } +#endif + + bool usingScalarBlockLayout() const { + for (auto extIt = requestedExtensions.begin(); extIt != requestedExtensions.end(); ++extIt) { + if (*extIt == E_GL_EXT_scalar_block_layout) + return true; + } + return false; + } + + bool IsRequestedExtension(const char* extension) const + { + return (requestedExtensions.find(extension) != requestedExtensions.end()); + } + + void addToCallGraph(TInfoSink&, const TString& caller, const TString& callee); + void merge(TInfoSink&, TIntermediate&); + void finalCheck(TInfoSink&, bool keepUncalled); + + void mergeGlobalUniformBlocks(TInfoSink& infoSink, TIntermediate& unit, bool mergeExistingOnly); + void mergeUniformObjects(TInfoSink& infoSink, TIntermediate& unit); + void checkStageIO(TInfoSink&, TIntermediate&); + + bool buildConvertOp(TBasicType dst, TBasicType src, TOperator& convertOp) const; + TIntermTyped* createConversion(TBasicType convertTo, TIntermTyped* node) const; + + void addIoAccessed(const TString& name) { ioAccessed.insert(name); } + bool inIoAccessed(const TString& name) const { return ioAccessed.find(name) != ioAccessed.end(); } + + int addUsedLocation(const TQualifier&, const TType&, bool& typeCollision); + int checkLocationRange(int set, const TIoRange& range, const TType&, bool& typeCollision); + int checkLocationRT(int set, int location); + int addUsedOffsets(int binding, int offset, int numOffsets); + bool addUsedConstantId(int id); + static int computeTypeLocationSize(const TType&, EShLanguage); + static int computeTypeUniformLocationSize(const TType&); + + static int getBaseAlignmentScalar(const TType&, int& size); + static int getBaseAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static int getScalarAlignment(const TType&, int& size, int& stride, bool rowMajor); + static int getMemberAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); + static bool improperStraddle(const TType& type, int size, int offset); + static void updateOffset(const TType& parentType, const TType& memberType, int& offset, int& memberSize); + static int getOffset(const TType& type, int index); + static int getBlockSize(const TType& blockType); + static int computeBufferReferenceTypeSize(const TType&); + static bool isIoResizeArray(const TType& type, EShLanguage language); + + bool promote(TIntermOperator*); + void setNanMinMaxClamp(bool setting) { nanMinMaxClamp = setting; } + bool getNanMinMaxClamp() const { return nanMinMaxClamp; } + + void setSourceFile(const char* file) { if (file != nullptr) sourceFile = file; } + const std::string& getSourceFile() const { return sourceFile; } + void addSourceText(const char* text, size_t len) { sourceText.append(text, len); } + const std::string& getSourceText() const { return sourceText; } + const std::map& getIncludeText() const { return includeText; } + void addIncludeText(const char* name, const char* text, size_t len) { includeText[name].assign(text,len); } + void addProcesses(const std::vector& p) + { + for (int i = 0; i < (int)p.size(); ++i) + processes.addProcess(p[i]); + } + void addProcess(const std::string& process) { processes.addProcess(process); } + void addProcessArgument(const std::string& arg) { processes.addArgument(arg); } + const std::vector& getProcesses() const { return processes.getProcesses(); } + unsigned long long getUniqueId() const { return uniqueId; } + void setUniqueId(unsigned long long id) { uniqueId = id; } + + // Certain explicit conversions are allowed conditionally +#ifdef GLSLANG_WEB + bool getArithemeticInt8Enabled() const { return false; } + bool getArithemeticInt16Enabled() const { return false; } + bool getArithemeticFloat16Enabled() const { return false; } + void updateNumericFeature(TNumericFeatures::feature f, bool on) { } +#else + bool getArithemeticInt8Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int8); + } + bool getArithemeticInt16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_int16) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_int16); + } + + bool getArithemeticFloat16Enabled() const { + return numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types) || + numericFeatures.contains(TNumericFeatures::gpu_shader_half_float) || + numericFeatures.contains(TNumericFeatures::shader_explicit_arithmetic_types_float16); + } + void updateNumericFeature(TNumericFeatures::feature f, bool on) + { on ? numericFeatures.insert(f) : numericFeatures.erase(f); } +#endif + +protected: + TIntermSymbol* addSymbol(long long Id, const TString&, const TType&, const TConstUnionArray&, TIntermTyped* subtree, const TSourceLoc&); + void error(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void warn(TInfoSink& infoSink, const char*, EShLanguage unitStage = EShLangCount); + void mergeCallGraphs(TInfoSink&, TIntermediate&); + void mergeModes(TInfoSink&, TIntermediate&); + void mergeTrees(TInfoSink&, TIntermediate&); + void seedIdMap(TIdMaps& idMaps, long long& IdShift); + void remapIds(const TIdMaps& idMaps, long long idShift, TIntermediate&); + void mergeBodies(TInfoSink&, TIntermSequence& globals, const TIntermSequence& unitGlobals); + void mergeLinkerObjects(TInfoSink&, TIntermSequence& linkerObjects, const TIntermSequence& unitLinkerObjects, EShLanguage); + void mergeBlockDefinitions(TInfoSink&, TIntermSymbol* block, TIntermSymbol* unitBlock, TIntermediate* unitRoot); + void mergeImplicitArraySizes(TType&, const TType&); + void mergeErrorCheck(TInfoSink&, const TIntermSymbol&, const TIntermSymbol&, EShLanguage); + void checkCallGraphCycles(TInfoSink&); + void checkCallGraphBodies(TInfoSink&, bool keepUncalled); + void inOutLocationCheck(TInfoSink&); + void sharedBlockCheck(TInfoSink&); + bool userOutputUsed() const; + bool isSpecializationOperation(const TIntermOperator&) const; + bool isNonuniformPropagating(TOperator) const; + bool promoteUnary(TIntermUnary&); + bool promoteBinary(TIntermBinary&); + void addSymbolLinkageNode(TIntermAggregate*& linkage, TSymbolTable&, const TString&); + bool promoteAggregate(TIntermAggregate&); + void pushSelector(TIntermSequence&, const TVectorSelector&, const TSourceLoc&); + void pushSelector(TIntermSequence&, const TMatrixSelector&, const TSourceLoc&); + bool specConstantPropagates(const TIntermTyped&, const TIntermTyped&); + void performTextureUpgradeAndSamplerRemovalTransformation(TIntermNode* root); + bool isConversionAllowed(TOperator op, TIntermTyped* node) const; + std::tuple getConversionDestinationType(TBasicType type0, TBasicType type1, TOperator op) const; + + static const char* getResourceName(TResourceType); + + const EShLanguage language; // stage, known at construction time + std::string entryPointName; + std::string entryPointMangledName; + typedef std::list TGraph; + TGraph callGraph; + + EProfile profile; // source profile + int version; // source version + SpvVersion spvVersion; + TIntermNode* treeRoot; + std::set requestedExtensions; // cumulation of all enabled or required extensions; not connected to what subset of the shader used them + MustBeAssigned resources; + int numEntryPoints; + int numErrors; + int numPushConstants; + bool recursive; + bool invertY; + bool dxPositionW; + bool enhancedMsgs; + bool debugInfo; + bool useStorageBuffer; + bool invariantAll; + bool nanMinMaxClamp; // true if desiring min/max/clamp to favor non-NaN over NaN + bool depthReplacing; + bool stencilReplacing; + int localSize[3]; + bool localSizeNotDefault[3]; + int localSizeSpecId[3]; + unsigned long long uniqueId; + + std::string globalUniformBlockName; + std::string atomicCounterBlockName; + unsigned int globalUniformBlockSet; + unsigned int globalUniformBlockBinding; + unsigned int atomicCounterBlockSet; + +#ifndef GLSLANG_WEB +public: + const char* const implicitThisName; + const char* const implicitCounterName; +protected: + EShSource source; // source language, known a bit later + bool useVulkanMemoryModel; + int invocations; + int vertices; + TLayoutGeometry inputPrimitive; + TLayoutGeometry outputPrimitive; + bool pixelCenterInteger; + bool originUpperLeft; + bool texCoordBuiltinRedeclared; + TVertexSpacing vertexSpacing; + TVertexOrder vertexOrder; + TInterlockOrdering interlockOrdering; + bool pointMode; + bool earlyFragmentTests; + bool postDepthCoverage; + bool earlyAndLateFragmentTestsAMD; + TLayoutDepth depthLayout; + TLayoutStencil stencilLayout; + bool hlslFunctionality1; + int blendEquations; // an 'or'ing of masks of shifts of TBlendEquationShift + bool xfbMode; + std::vector xfbBuffers; // all the data we need to track per xfb buffer + bool multiStream; + bool layoutOverrideCoverage; + bool geoPassthroughEXT; + int numShaderRecordBlocks; + ComputeDerivativeMode computeDerivativeMode; + int primitives; + int numTaskNVBlocks; + bool layoutPrimitiveCulling; + int numTaskEXTPayloads; + + // Base shift values + std::array shiftBinding; + + // Per-descriptor-set shift values + std::array, EResCount> shiftBindingForSet; + + std::vector resourceSetBinding; + bool autoMapBindings; + bool autoMapLocations; + bool flattenUniformArrays; + bool useUnknownFormat; + bool hlslOffsets; + bool hlslIoMapping; + bool useVariablePointers; + + std::set semanticNameSet; + + EShTextureSamplerTransformMode textureSamplerTransformMode; + + bool needToLegalize; + bool binaryDoubleOutput; + bool subgroupUniformControlFlow; + bool usePhysicalStorageBuffer; + + TSpirvRequirement* spirvRequirement; + TSpirvExecutionMode* spirvExecutionMode; + + std::unordered_map uniformLocationOverrides; + int uniformLocationBase; + TNumericFeatures numericFeatures; +#endif + std::unordered_map blockBackingOverrides; + + std::unordered_set usedConstantId; // specialization constant ids used + std::vector usedAtomics; // sets of bindings used by atomic counters + std::vector usedIo[4]; // sets of used locations, one for each of in, out, uniform, and buffers + std::vector usedIoRT[2]; // sets of used location, one for rayPayload/rayPayloadIN and other + // for callableData/callableDataIn + // set of names of statically read/written I/O that might need extra checking + std::set ioAccessed; + + // source code of shader, useful as part of debug information + std::string sourceFile; + std::string sourceText; + + // Included text. First string is a name, second is the included text + std::map includeText; + + // for OpModuleProcessed, or equivalent + TProcesses processes; + +private: + void operator=(TIntermediate&); // prevent assignments +}; + +} // end namespace glslang + +#endif // _LOCAL_INTERMEDIATE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/parseVersions.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/parseVersions.h new file mode 100644 index 0000000..3c52ff1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/parseVersions.h @@ -0,0 +1,240 @@ +// +// Copyright (C) 2015-2018 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +// This is implemented in Versions.cpp + +#ifndef _PARSE_VERSIONS_INCLUDED_ +#define _PARSE_VERSIONS_INCLUDED_ + +#include "../Public/ShaderLang.h" +#include "../Include/InfoSink.h" +#include "Scan.h" + +#include + +namespace glslang { + +// +// Base class for parse helpers. +// This just has version-related information and checking. +// This class should be sufficient for preprocessing. +// +class TParseVersions { +public: + TParseVersions(TIntermediate& interm, int version, EProfile profile, + const SpvVersion& spvVersion, EShLanguage language, TInfoSink& infoSink, + bool forwardCompatible, EShMessages messages) + : +#if !defined(GLSLANG_WEB) + forwardCompatible(forwardCompatible), + profile(profile), +#endif + infoSink(infoSink), version(version), + language(language), + spvVersion(spvVersion), + intermediate(interm), messages(messages), numErrors(0), currentScanner(0) { } + virtual ~TParseVersions() { } + void requireStage(const TSourceLoc&, EShLanguageMask, const char* featureDesc); + void requireStage(const TSourceLoc&, EShLanguage, const char* featureDesc); +#ifdef GLSLANG_WEB + const EProfile profile = EEsProfile; + bool isEsProfile() const { return true; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc) + { + if (! (EEsProfile & profileMask)) + error(loc, "not supported with this profile:", featureDesc, ProfileName(profile)); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc) + { + if ((EEsProfile & profileMask) && (minVersion == 0 || version < minVersion)) + error(loc, "not supported for this version or the enabled extensions", featureDesc, ""); + } + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc) + { + profileRequires(loc, profileMask, minVersion, extension ? 1 : 0, &extension, featureDesc); + } + void initializeExtensionBehavior() { } + void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc) { } + void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc) { } + void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc) { } + TExtensionBehavior getExtensionBehavior(const char*) { return EBhMissing; } + bool extensionTurnedOn(const char* const extension) { return false; } + bool extensionsTurnedOn(int numExtensions, const char* const extensions[]) { return false; } + void updateExtensionBehavior(int line, const char* const extension, const char* behavior) { } + void updateExtensionBehavior(const char* const extension, TExtensionBehavior) { } + void checkExtensionStage(const TSourceLoc&, const char* const extension) { } + void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior) { } + void fullIntegerCheck(const TSourceLoc&, const char* op) { } + void doubleCheck(const TSourceLoc&, const char* op) { } + bool float16Arithmetic() { return false; } + void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int16Arithmetic() { return false; } + void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + bool int8Arithmetic() { return false; } + void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc) { } + void int64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false) { } + bool relaxedErrors() const { return false; } + bool suppressWarnings() const { return true; } + bool isForwardCompatible() const { return false; } +#else + bool forwardCompatible; // true if errors are to be given for use of deprecated features + EProfile profile; // the declared profile in the shader (core by default) + bool isEsProfile() const { return profile == EEsProfile; } + void requireProfile(const TSourceLoc& loc, int profileMask, const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, int numExtensions, + const char* const extensions[], const char* featureDesc); + void profileRequires(const TSourceLoc& loc, int profileMask, int minVersion, const char* extension, + const char* featureDesc); + virtual void initializeExtensionBehavior(); + virtual void checkDeprecated(const TSourceLoc&, int queryProfiles, int depVersion, const char* featureDesc); + virtual void requireNotRemoved(const TSourceLoc&, int queryProfiles, int removedVersion, const char* featureDesc); + virtual void requireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void ppRequireExtensions(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual TExtensionBehavior getExtensionBehavior(const char*); + virtual bool extensionTurnedOn(const char* const extension); + virtual bool extensionsTurnedOn(int numExtensions, const char* const extensions[]); + virtual void updateExtensionBehavior(int line, const char* const extension, const char* behavior); + virtual void updateExtensionBehavior(const char* const extension, TExtensionBehavior); + virtual bool checkExtensionsRequested(const TSourceLoc&, int numExtensions, const char* const extensions[], + const char* featureDesc); + virtual void checkExtensionStage(const TSourceLoc&, const char* const extension); + virtual void extensionRequires(const TSourceLoc&, const char* const extension, const char* behavior); + virtual void fullIntegerCheck(const TSourceLoc&, const char* op); + + virtual void unimplemented(const TSourceLoc&, const char* featureDesc); + virtual void doubleCheck(const TSourceLoc&, const char* op); + virtual void float16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void float16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool float16Arithmetic(); + virtual void requireFloat16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int16ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int16Arithmetic(); + virtual void requireInt16Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void int8ScalarVectorCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual bool int8Arithmetic(); + virtual void requireInt8Arithmetic(const TSourceLoc& loc, const char* op, const char* featureDesc); + virtual void float16OpaqueCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void int64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt8Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt16Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitInt32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat32Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void explicitFloat64Check(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void fcoopmatCheck(const TSourceLoc&, const char* op, bool builtIn = false); + virtual void intcoopmatCheck(const TSourceLoc&, const char *op, bool builtIn = false); + bool relaxedErrors() const { return (messages & EShMsgRelaxedErrors) != 0; } + bool suppressWarnings() const { return (messages & EShMsgSuppressWarnings) != 0; } + bool isForwardCompatible() const { return forwardCompatible; } +#endif // GLSLANG_WEB + virtual void spvRemoved(const TSourceLoc&, const char* op); + virtual void vulkanRemoved(const TSourceLoc&, const char* op); + virtual void requireVulkan(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char* op); + virtual void requireSpv(const TSourceLoc&, const char *op, unsigned int version); + + +#if defined(GLSLANG_WEB) && !defined(GLSLANG_WEB_DEVEL) + void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } + void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { addError(); } + void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) { } +#else + virtual void C_DECL error(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL warn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppError(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; + virtual void C_DECL ppWarn(const TSourceLoc&, const char* szReason, const char* szToken, + const char* szExtraInfoFormat, ...) = 0; +#endif + + void addError() { ++numErrors; } + int getNumErrors() const { return numErrors; } + + void setScanner(TInputScanner* scanner) { currentScanner = scanner; } + TInputScanner* getScanner() const { return currentScanner; } + const TSourceLoc& getCurrentLoc() const { return currentScanner->getSourceLoc(); } + void setCurrentLine(int line) { currentScanner->setLine(line); } + void setCurrentColumn(int col) { currentScanner->setColumn(col); } + void setCurrentSourceName(const char* name) { currentScanner->setFile(name); } + void setCurrentString(int string) { currentScanner->setString(string); } + + void getPreamble(std::string&); +#ifdef ENABLE_HLSL + bool isReadingHLSL() const { return (messages & EShMsgReadHlsl) == EShMsgReadHlsl; } + bool hlslEnable16BitTypes() const { return (messages & EShMsgHlslEnable16BitTypes) != 0; } + bool hlslDX9Compatible() const { return (messages & EShMsgHlslDX9Compatible) != 0; } +#else + bool isReadingHLSL() const { return false; } +#endif + + TInfoSink& infoSink; + + // compilation mode + int version; // version, updated by #version in the shader + EShLanguage language; // really the stage + SpvVersion spvVersion; + TIntermediate& intermediate; // helper for making and hooking up pieces of the parse tree + +protected: + TMap extensionBehavior; // for each extension string, what its current behavior is + TMap extensionMinSpv; // for each extension string, store minimum spirv required + EShMessages messages; // errors/warnings/rule-sets + int numErrors; // number of compile-time errors encountered + TInputScanner* currentScanner; + +private: + explicit TParseVersions(const TParseVersions&); + TParseVersions& operator=(const TParseVersions&); +}; + +} // end namespace glslang + +#endif // _PARSE_VERSIONS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/preprocessor/PpContext.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/preprocessor/PpContext.h new file mode 100644 index 0000000..714b5ea --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/preprocessor/PpContext.h @@ -0,0 +1,703 @@ +// +// Copyright (C) 2013 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PPCONTEXT_H +#define PPCONTEXT_H + +#include +#include +#include + +#include "../ParseHelper.h" +#include "PpTokens.h" + +/* windows only pragma */ +#ifdef _MSC_VER + #pragma warning(disable : 4127) +#endif + +namespace glslang { + +class TPpToken { +public: + TPpToken() { clear(); } + void clear() + { + space = false; + i64val = 0; + loc.init(); + name[0] = 0; + } + + // Used for comparing macro definitions, so checks what is relevant for that. + bool operator==(const TPpToken& right) const + { + return space == right.space && + ival == right.ival && dval == right.dval && i64val == right.i64val && + strncmp(name, right.name, MaxTokenLength) == 0; + } + bool operator!=(const TPpToken& right) const { return ! operator==(right); } + + TSourceLoc loc; + // True if a space (for white space or a removed comment) should also be + // recognized, in front of the token returned: + bool space; + // Numeric value of the token: + union { + int ival; + double dval; + long long i64val; + }; + // Text string of the token: + char name[MaxTokenLength + 1]; +}; + +class TStringAtomMap { +// +// Implementation is in PpAtom.cpp +// +// Maintain a bi-directional mapping between relevant preprocessor strings and +// "atoms" which a unique integers (small, contiguous, not hash-like) per string. +// +public: + TStringAtomMap(); + + // Map string -> atom. + // Return 0 if no existing string. + int getAtom(const char* s) const + { + auto it = atomMap.find(s); + return it == atomMap.end() ? 0 : it->second; + } + + // Map a new or existing string -> atom, inventing a new atom if necessary. + int getAddAtom(const char* s) + { + int atom = getAtom(s); + if (atom == 0) { + atom = nextAtom++; + addAtomFixed(s, atom); + } + return atom; + } + + // Map atom -> string. + const char* getString(int atom) const { return stringMap[atom]->c_str(); } + +protected: + TStringAtomMap(TStringAtomMap&); + TStringAtomMap& operator=(TStringAtomMap&); + + TUnorderedMap atomMap; + TVector stringMap; // these point into the TString in atomMap + int nextAtom; + + // Bad source characters can lead to bad atoms, so gracefully handle those by + // pre-filling the table with them (to avoid if tests later). + TString badToken; + + // Add bi-directional mappings: + // - string -> atom + // - atom -> string + void addAtomFixed(const char* s, int atom) + { + auto it = atomMap.insert(std::pair(s, atom)).first; + if (stringMap.size() < (size_t)atom + 1) + stringMap.resize(atom + 100, &badToken); + stringMap[atom] = &it->first; + } +}; + +class TInputScanner; + +enum MacroExpandResult { + MacroExpandNotStarted, // macro not expanded, which might not be an error + MacroExpandError, // a clear error occurred while expanding, no expansion + MacroExpandStarted, // macro expansion process has started + MacroExpandUndef // macro is undefined and will be expanded +}; + +// This class is the result of turning a huge pile of C code communicating through globals +// into a class. This was done to allowing instancing to attain thread safety. +// Don't expect too much in terms of OO design. +class TPpContext { +public: + TPpContext(TParseContextBase&, const std::string& rootFileName, TShader::Includer&); + virtual ~TPpContext(); + + void setPreamble(const char* preamble, size_t length); + + int tokenize(TPpToken& ppToken); + int tokenPaste(int token, TPpToken&); + + class tInput { + public: + tInput(TPpContext* p) : done(false), pp(p) { } + virtual ~tInput() { } + + virtual int scan(TPpToken*) = 0; + virtual int getch() = 0; + virtual void ungetch() = 0; + virtual bool peekPasting() { return false; } // true when about to see ## + virtual bool peekContinuedPasting(int) { return false; } // true when non-spaced tokens can paste + virtual bool endOfReplacementList() { return false; } // true when at the end of a macro replacement list (RHS of #define) + virtual bool isMacroInput() { return false; } + + // Will be called when we start reading tokens from this instance + virtual void notifyActivated() {} + // Will be called when we do not read tokens from this instance anymore + virtual void notifyDeleted() {} + protected: + bool done; + TPpContext* pp; + }; + + void setInput(TInputScanner& input, bool versionWillBeError); + + void pushInput(tInput* in) + { + inputStack.push_back(in); + in->notifyActivated(); + } + void popInput() + { + inputStack.back()->notifyDeleted(); + delete inputStack.back(); + inputStack.pop_back(); + } + + // + // From PpTokens.cpp + // + + // Capture the needed parts of a token stream for macro recording/playback. + class TokenStream { + public: + // Manage a stream of these 'Token', which capture the relevant parts + // of a TPpToken, plus its atom. + class Token { + public: + Token(int atom, const TPpToken& ppToken) : + atom(atom), + space(ppToken.space), + i64val(ppToken.i64val), + name(ppToken.name) { } + int get(TPpToken& ppToken) + { + ppToken.clear(); + ppToken.space = space; + ppToken.i64val = i64val; + snprintf(ppToken.name, sizeof(ppToken.name), "%s", name.c_str()); + return atom; + } + bool isAtom(int a) const { return atom == a; } + int getAtom() const { return atom; } + bool nonSpaced() const { return !space; } + protected: + Token() {} + int atom; + bool space; // did a space precede the token? + long long i64val; + TString name; + }; + + TokenStream() : currentPos(0) { } + + void putToken(int token, TPpToken* ppToken); + bool peekToken(int atom) { return !atEnd() && stream[currentPos].isAtom(atom); } + bool peekContinuedPasting(int atom) + { + // This is basically necessary because, for example, the PP + // tokenizer only accepts valid numeric-literals plus suffixes, so + // separates numeric-literals plus bad suffix into two tokens, which + // should get both pasted together as one token when token pasting. + // + // The following code is a bit more generalized than the above example. + if (!atEnd() && atom == PpAtomIdentifier && stream[currentPos].nonSpaced()) { + switch(stream[currentPos].getAtom()) { + case PpAtomConstInt: + case PpAtomConstUint: + case PpAtomConstInt64: + case PpAtomConstUint64: + case PpAtomConstInt16: + case PpAtomConstUint16: + case PpAtomConstFloat: + case PpAtomConstDouble: + case PpAtomConstFloat16: + case PpAtomConstString: + case PpAtomIdentifier: + return true; + default: + break; + } + } + + return false; + } + int getToken(TParseContextBase&, TPpToken*); + bool atEnd() { return currentPos >= stream.size(); } + bool peekTokenizedPasting(bool lastTokenPastes); + bool peekUntokenizedPasting(); + void reset() { currentPos = 0; } + + protected: + TVector stream; + size_t currentPos; + }; + + // + // From Pp.cpp + // + + struct MacroSymbol { + MacroSymbol() : functionLike(0), busy(0), undef(0) { } + TVector args; + TokenStream body; + unsigned functionLike : 1; // 0 means object-like, 1 means function-like + unsigned busy : 1; + unsigned undef : 1; + }; + + typedef TMap TSymbolMap; + TSymbolMap macroDefs; // map atoms to macro definitions + MacroSymbol* lookupMacroDef(int atom) + { + auto existingMacroIt = macroDefs.find(atom); + return (existingMacroIt == macroDefs.end()) ? nullptr : &(existingMacroIt->second); + } + void addMacroDef(int atom, MacroSymbol& macroDef) { macroDefs[atom] = macroDef; } + +protected: + TPpContext(TPpContext&); + TPpContext& operator=(TPpContext&); + + TStringAtomMap atomStrings; + char* preamble; // string to parse, all before line 1 of string 0, it is 0 if no preamble + int preambleLength; + char** strings; // official strings of shader, starting a string 0 line 1 + size_t* lengths; + int numStrings; // how many official strings there are + int currentString; // which string we're currently parsing (-1 for preamble) + + // Scanner data: + int previous_token; + TParseContextBase& parseContext; + + // Get the next token from *stack* of input sources, popping input sources + // that are out of tokens, down until an input source is found that has a token. + // Return EndOfInput when there are no more tokens to be found by doing this. + int scanToken(TPpToken* ppToken) + { + int token = EndOfInput; + + while (! inputStack.empty()) { + token = inputStack.back()->scan(ppToken); + if (token != EndOfInput || inputStack.empty()) + break; + popInput(); + } + + return token; + } + int getChar() { return inputStack.back()->getch(); } + void ungetChar() { inputStack.back()->ungetch(); } + bool peekPasting() { return !inputStack.empty() && inputStack.back()->peekPasting(); } + bool peekContinuedPasting(int a) + { + return !inputStack.empty() && inputStack.back()->peekContinuedPasting(a); + } + bool endOfReplacementList() { return inputStack.empty() || inputStack.back()->endOfReplacementList(); } + bool isMacroInput() { return inputStack.size() > 0 && inputStack.back()->isMacroInput(); } + + static const int maxIfNesting = 65; + + int ifdepth; // current #if-#else-#endif nesting in the cpp.c file (pre-processor) + bool elseSeen[maxIfNesting]; // Keep a track of whether an else has been seen at a particular depth + int elsetracker; // #if-#else and #endif constructs...Counter. + + class tMacroInput : public tInput { + public: + tMacroInput(TPpContext* pp) : tInput(pp), prepaste(false), postpaste(false) { } + virtual ~tMacroInput() + { + for (size_t i = 0; i < args.size(); ++i) + delete args[i]; + for (size_t i = 0; i < expandedArgs.size(); ++i) + delete expandedArgs[i]; + } + + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + bool peekPasting() override { return prepaste; } + bool peekContinuedPasting(int a) override { return mac->body.peekContinuedPasting(a); } + bool endOfReplacementList() override { return mac->body.atEnd(); } + bool isMacroInput() override { return true; } + + MacroSymbol *mac; + TVector args; + TVector expandedArgs; + + protected: + bool prepaste; // true if we are just before ## + bool postpaste; // true if we are right after ## + }; + + class tMarkerInput : public tInput { + public: + tMarkerInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override + { + if (done) + return EndOfInput; + done = true; + + return marker; + } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + static const int marker = -3; + }; + + class tZeroInput : public tInput { + public: + tZeroInput(TPpContext* pp) : tInput(pp) { } + virtual int scan(TPpToken*) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + }; + + std::vector inputStack; + bool errorOnVersion; + bool versionSeen; + + // + // from Pp.cpp + // + + // Used to obtain #include content. + TShader::Includer& includer; + + int CPPdefine(TPpToken * ppToken); + int CPPundef(TPpToken * ppToken); + int CPPelse(int matchelse, TPpToken * ppToken); + int extraTokenCheck(int atom, TPpToken* ppToken, int token); + int eval(int token, int precedence, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int evalToToken(int token, bool shortCircuit, int& res, bool& err, TPpToken * ppToken); + int CPPif (TPpToken * ppToken); + int CPPifdef(int defined, TPpToken * ppToken); + int CPPinclude(TPpToken * ppToken); + int CPPline(TPpToken * ppToken); + int CPPerror(TPpToken * ppToken); + int CPPpragma(TPpToken * ppToken); + int CPPversion(TPpToken * ppToken); + int CPPextension(TPpToken * ppToken); + int readCPPline(TPpToken * ppToken); + int scanHeaderName(TPpToken* ppToken, char delimit); + TokenStream* PrescanMacroArg(TokenStream&, TPpToken*, bool newLineOkay); + MacroExpandResult MacroExpand(TPpToken* ppToken, bool expandUndef, bool newLineOkay); + + // + // From PpTokens.cpp + // + void pushTokenStreamInput(TokenStream&, bool pasting = false); + void UngetToken(int token, TPpToken*); + + class tTokenInput : public tInput { + public: + tTokenInput(TPpContext* pp, TokenStream* t, bool prepasting) : + tInput(pp), + tokens(t), + lastTokenPastes(prepasting) { } + virtual int scan(TPpToken *ppToken) override { return tokens->getToken(pp->parseContext, ppToken); } + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + virtual bool peekPasting() override { return tokens->peekTokenizedPasting(lastTokenPastes); } + bool peekContinuedPasting(int a) override { return tokens->peekContinuedPasting(a); } + protected: + TokenStream* tokens; + bool lastTokenPastes; // true if the last token in the input is to be pasted, rather than consumed as a token + }; + + class tUngotTokenInput : public tInput { + public: + tUngotTokenInput(TPpContext* pp, int t, TPpToken* p) : tInput(pp), token(t), lval(*p) { } + virtual int scan(TPpToken *) override; + virtual int getch() override { assert(0); return EndOfInput; } + virtual void ungetch() override { assert(0); } + protected: + int token; + TPpToken lval; + }; + + // + // From PpScanner.cpp + // + class tStringInput : public tInput { + public: + tStringInput(TPpContext* pp, TInputScanner& i) : tInput(pp), input(&i) { } + virtual int scan(TPpToken*) override; + + // Scanner used to get source stream characters. + // - Escaped newlines are handled here, invisibly to the caller. + // - All forms of newline are handled, and turned into just a '\n'. + int getch() override + { + int ch = input->get(); + + if (ch == '\\') { + // Move past escaped newlines, as many as sequentially exist + do { + if (input->peek() == '\r' || input->peek() == '\n') { + bool allowed = pp->parseContext.lineContinuationCheck(input->getSourceLoc(), pp->inComment); + if (! allowed && pp->inComment) + return '\\'; + + // escape one newline now + ch = input->get(); + int nextch = input->get(); + if (ch == '\r' && nextch == '\n') + ch = input->get(); + else + ch = nextch; + } else + return '\\'; + } while (ch == '\\'); + } + + // handle any non-escaped newline + if (ch == '\r' || ch == '\n') { + if (ch == '\r' && input->peek() == '\n') + input->get(); + return '\n'; + } + + return ch; + } + + // Scanner used to backup the source stream characters. Newlines are + // handled here, invisibly to the caller, meaning have to undo exactly + // what getch() above does (e.g., don't leave things in the middle of a + // sequence of escaped newlines). + void ungetch() override + { + input->unget(); + + do { + int ch = input->peek(); + if (ch == '\r' || ch == '\n') { + if (ch == '\n') { + // correct for two-character newline + input->unget(); + if (input->peek() != '\r') + input->get(); + } + // now in front of a complete newline, move past an escape character + input->unget(); + if (input->peek() == '\\') + input->unget(); + else { + input->get(); + break; + } + } else + break; + } while (true); + } + + protected: + TInputScanner* input; + }; + + // Holds a reference to included file data, as well as a + // prologue and an epilogue string. This can be scanned using the tInput + // interface and acts as a single source string. + class TokenizableIncludeFile : public tInput { + public: + // Copies prologue and epilogue. The includedFile must remain valid + // until this TokenizableIncludeFile is no longer used. + TokenizableIncludeFile(const TSourceLoc& startLoc, + const std::string& prologue, + TShader::Includer::IncludeResult* includedFile, + const std::string& epilogue, + TPpContext* pp) + : tInput(pp), + prologue_(prologue), + epilogue_(epilogue), + includedFile_(includedFile), + scanner(3, strings, lengths, nullptr, 0, 0, true), + prevScanner(nullptr), + stringInput(pp, scanner) + { + strings[0] = prologue_.data(); + strings[1] = includedFile_->headerData; + strings[2] = epilogue_.data(); + + lengths[0] = prologue_.size(); + lengths[1] = includedFile_->headerLength; + lengths[2] = epilogue_.size(); + + scanner.setLine(startLoc.line); + scanner.setString(startLoc.string); + + scanner.setFile(startLoc.getFilenameStr(), 0); + scanner.setFile(startLoc.getFilenameStr(), 1); + scanner.setFile(startLoc.getFilenameStr(), 2); + } + + // tInput methods: + int scan(TPpToken* t) override { return stringInput.scan(t); } + int getch() override { return stringInput.getch(); } + void ungetch() override { stringInput.ungetch(); } + + void notifyActivated() override + { + prevScanner = pp->parseContext.getScanner(); + pp->parseContext.setScanner(&scanner); + pp->push_include(includedFile_); + } + + void notifyDeleted() override + { + pp->parseContext.setScanner(prevScanner); + pp->pop_include(); + } + + private: + TokenizableIncludeFile& operator=(const TokenizableIncludeFile&); + + // Stores the prologue for this string. + const std::string prologue_; + + // Stores the epilogue for this string. + const std::string epilogue_; + + // Points to the IncludeResult that this TokenizableIncludeFile represents. + TShader::Includer::IncludeResult* includedFile_; + + // Will point to prologue_, includedFile_->headerData and epilogue_ + // This is passed to scanner constructor. + // These do not own the storage and it must remain valid until this + // object has been destroyed. + const char* strings[3]; + // Length of str_, passed to scanner constructor. + size_t lengths[3]; + // Scans over str_. + TInputScanner scanner; + // The previous effective scanner before the scanner in this instance + // has been activated. + TInputScanner* prevScanner; + // Delegate object implementing the tInput interface. + tStringInput stringInput; + }; + + int ScanFromString(char* s); + void missingEndifCheck(); + int lFloatConst(int len, int ch, TPpToken* ppToken); + int characterLiteral(TPpToken* ppToken); + + void push_include(TShader::Includer::IncludeResult* result) + { + currentSourceFile = result->headerName; + includeStack.push(result); + } + + void pop_include() + { + TShader::Includer::IncludeResult* include = includeStack.top(); + includeStack.pop(); + includer.releaseInclude(include); + if (includeStack.empty()) { + currentSourceFile = rootFileName; + } else { + currentSourceFile = includeStack.top()->headerName; + } + } + + bool inComment; + std::string rootFileName; + std::stack includeStack; + std::string currentSourceFile; + + std::istringstream strtodStream; + bool disableEscapeSequences; +}; + +} // end namespace glslang + +#endif // PPCONTEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/preprocessor/PpTokens.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/preprocessor/PpTokens.h new file mode 100644 index 0000000..7b0f815 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/preprocessor/PpTokens.h @@ -0,0 +1,179 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +/****************************************************************************\ +Copyright (c) 2002, NVIDIA Corporation. + +NVIDIA Corporation("NVIDIA") supplies this software to you in +consideration of your agreement to the following terms, and your use, +installation, modification or redistribution of this NVIDIA software +constitutes acceptance of these terms. If you do not agree with these +terms, please do not use, install, modify or redistribute this NVIDIA +software. + +In consideration of your agreement to abide by the following terms, and +subject to these terms, NVIDIA grants you a personal, non-exclusive +license, under NVIDIA's copyrights in this original NVIDIA software (the +"NVIDIA Software"), to use, reproduce, modify and redistribute the +NVIDIA Software, with or without modifications, in source and/or binary +forms; provided that if you redistribute the NVIDIA Software, you must +retain the copyright notice of NVIDIA, this notice and the following +text and disclaimers in all such redistributions of the NVIDIA Software. +Neither the name, trademarks, service marks nor logos of NVIDIA +Corporation may be used to endorse or promote products derived from the +NVIDIA Software without specific prior written permission from NVIDIA. +Except as expressly stated in this notice, no other rights or licenses +express or implied, are granted by NVIDIA herein, including but not +limited to any patent rights that may be infringed by your derivative +works or by other works in which the NVIDIA Software may be +incorporated. No hardware is licensed hereunder. + +THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, +INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR +ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER +PRODUCTS. + +IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, +INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY +OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE +NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, +TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF +NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\****************************************************************************/ + +#ifndef PARSER_H +#define PARSER_H + +namespace glslang { + +// Multi-character tokens +enum EFixedAtoms { + // single character tokens get their own char value as their token; start here for multi-character tokens + PpAtomMaxSingle = 127, + + // replace bad character tokens with this, to avoid accidental aliasing with the below + PpAtomBadToken, + + // Operators + + PPAtomAddAssign, + PPAtomSubAssign, + PPAtomMulAssign, + PPAtomDivAssign, + PPAtomModAssign, + + PpAtomRight, + PpAtomLeft, + + PpAtomRightAssign, + PpAtomLeftAssign, + PpAtomAndAssign, + PpAtomOrAssign, + PpAtomXorAssign, + + PpAtomAnd, + PpAtomOr, + PpAtomXor, + + PpAtomEQ, + PpAtomNE, + PpAtomGE, + PpAtomLE, + + PpAtomDecrement, + PpAtomIncrement, + + PpAtomColonColon, + + PpAtomPaste, + + // Constants + + PpAtomConstInt, + PpAtomConstUint, + PpAtomConstInt64, + PpAtomConstUint64, + PpAtomConstInt16, + PpAtomConstUint16, + PpAtomConstFloat, + PpAtomConstDouble, + PpAtomConstFloat16, + PpAtomConstString, + + // Identifiers + PpAtomIdentifier, + + // preprocessor "keywords" + + PpAtomDefine, + PpAtomUndef, + + PpAtomIf, + PpAtomIfdef, + PpAtomIfndef, + PpAtomElse, + PpAtomElif, + PpAtomEndif, + + PpAtomLine, + PpAtomPragma, + PpAtomError, + + // #version ... + PpAtomVersion, + PpAtomCore, + PpAtomCompatibility, + PpAtomEs, + + // #extension + PpAtomExtension, + + // __LINE__, __FILE__, __VERSION__ + + PpAtomLineMacro, + PpAtomFileMacro, + PpAtomVersionMacro, + + // #include + PpAtomInclude, + + PpAtomLast, +}; + +} // end namespace glslang + +#endif /* not PARSER_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/propagateNoContraction.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/propagateNoContraction.h new file mode 100644 index 0000000..8521ad7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/propagateNoContraction.h @@ -0,0 +1,55 @@ +// +// Copyright (C) 2015-2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Visit the nodes in the glslang intermediate tree representation to +// propagate 'noContraction' qualifier. +// + +#pragma once + +#include "../Include/intermediate.h" + +namespace glslang { + +// Propagates the 'precise' qualifier for objects (objects marked with +// 'noContraction' qualifier) from the shader source specified 'precise' +// variables to all the involved objects, and add 'noContraction' qualifier for +// the involved arithmetic operations. +// Note that the same qualifier: 'noContraction' is used in both object nodes +// and arithmetic operation nodes, but has different meaning. For object nodes, +// 'noContraction' means the object is 'precise'; and for arithmetic operation +// nodes, it means the operation should not be contracted. +void PropagateNoContraction(const glslang::TIntermediate& intermediate); +}; diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/reflection.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/reflection.h new file mode 100644 index 0000000..bfd5452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/MachineIndependent/reflection.h @@ -0,0 +1,223 @@ +// +// Copyright (C) 2013-2016 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#if !defined(GLSLANG_WEB) + +#ifndef _REFLECTION_INCLUDED +#define _REFLECTION_INCLUDED + +#include "../Public/ShaderLang.h" +#include "../Include/Types.h" + +#include +#include + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +namespace glslang { + +class TIntermediate; +class TIntermAggregate; +class TReflectionTraverser; + +// The full reflection database +class TReflection { +public: + TReflection(EShReflectionOptions opts, EShLanguage first, EShLanguage last) + : options(opts), firstStage(first), lastStage(last), badReflection(TObjectReflection::badReflection()) + { + for (int dim=0; dim<3; ++dim) + localSize[dim] = 0; + } + + virtual ~TReflection() {} + + // grow the reflection stage by stage + bool addStage(EShLanguage, const TIntermediate&); + + // for mapping a uniform index to a uniform object's description + int getNumUniforms() { return (int)indexToUniform.size(); } + const TObjectReflection& getUniform(int i) const + { + if (i >= 0 && i < (int)indexToUniform.size()) + return indexToUniform[i]; + else + return badReflection; + } + + // for mapping a block index to the block's description + int getNumUniformBlocks() const { return (int)indexToUniformBlock.size(); } + const TObjectReflection& getUniformBlock(int i) const + { + if (i >= 0 && i < (int)indexToUniformBlock.size()) + return indexToUniformBlock[i]; + else + return badReflection; + } + + // for mapping an pipeline input index to the input's description + int getNumPipeInputs() { return (int)indexToPipeInput.size(); } + const TObjectReflection& getPipeInput(int i) const + { + if (i >= 0 && i < (int)indexToPipeInput.size()) + return indexToPipeInput[i]; + else + return badReflection; + } + + // for mapping an pipeline output index to the output's description + int getNumPipeOutputs() { return (int)indexToPipeOutput.size(); } + const TObjectReflection& getPipeOutput(int i) const + { + if (i >= 0 && i < (int)indexToPipeOutput.size()) + return indexToPipeOutput[i]; + else + return badReflection; + } + + // for mapping from an atomic counter to the uniform index + int getNumAtomicCounters() const { return (int)atomicCounterUniformIndices.size(); } + const TObjectReflection& getAtomicCounter(int i) const + { + if (i >= 0 && i < (int)atomicCounterUniformIndices.size()) + return getUniform(atomicCounterUniformIndices[i]); + else + return badReflection; + } + + // for mapping a buffer variable index to a buffer variable object's description + int getNumBufferVariables() { return (int)indexToBufferVariable.size(); } + const TObjectReflection& getBufferVariable(int i) const + { + if (i >= 0 && i < (int)indexToBufferVariable.size()) + return indexToBufferVariable[i]; + else + return badReflection; + } + + // for mapping a storage block index to the storage block's description + int getNumStorageBuffers() const { return (int)indexToBufferBlock.size(); } + const TObjectReflection& getStorageBufferBlock(int i) const + { + if (i >= 0 && i < (int)indexToBufferBlock.size()) + return indexToBufferBlock[i]; + else + return badReflection; + } + + // for mapping any name to its index (block names, uniform names and input/output names) + int getIndex(const char* name) const + { + TNameToIndex::const_iterator it = nameToIndex.find(name); + if (it == nameToIndex.end()) + return -1; + else + return it->second; + } + + // see getIndex(const char*) + int getIndex(const TString& name) const { return getIndex(name.c_str()); } + + + // for mapping any name to its index (only pipe input/output names) + int getPipeIOIndex(const char* name, const bool inOrOut) const + { + TNameToIndex::const_iterator it = inOrOut ? pipeInNameToIndex.find(name) : pipeOutNameToIndex.find(name); + if (it == (inOrOut ? pipeInNameToIndex.end() : pipeOutNameToIndex.end())) + return -1; + else + return it->second; + } + + // see gePipeIOIndex(const char*, const bool) + int getPipeIOIndex(const TString& name, const bool inOrOut) const { return getPipeIOIndex(name.c_str(), inOrOut); } + + // Thread local size + unsigned getLocalSize(int dim) const { return dim <= 2 ? localSize[dim] : 0; } + + void dump(); + +protected: + friend class glslang::TReflectionTraverser; + + void buildCounterIndices(const TIntermediate&); + void buildUniformStageMask(const TIntermediate& intermediate); + void buildAttributeReflection(EShLanguage, const TIntermediate&); + + // Need a TString hash: typedef std::unordered_map TNameToIndex; + typedef std::map TNameToIndex; + typedef std::vector TMapIndexToReflection; + typedef std::vector TIndices; + + TMapIndexToReflection& GetBlockMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferBlock; + return indexToUniformBlock; + } + TMapIndexToReflection& GetVariableMapForStorage(TStorageQualifier storage) + { + if ((options & EShReflectionSeparateBuffers) && storage == EvqBuffer) + return indexToBufferVariable; + return indexToUniform; + } + + EShReflectionOptions options; + + EShLanguage firstStage; + EShLanguage lastStage; + + TObjectReflection badReflection; // return for queries of -1 or generally out of range; has expected descriptions with in it for this + TNameToIndex nameToIndex; // maps names to indexes; can hold all types of data: uniform/buffer and which function names have been processed + TNameToIndex pipeInNameToIndex; // maps pipe in names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TNameToIndex pipeOutNameToIndex; // maps pipe out names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers. + TMapIndexToReflection indexToUniform; + TMapIndexToReflection indexToUniformBlock; + TMapIndexToReflection indexToBufferVariable; + TMapIndexToReflection indexToBufferBlock; + TMapIndexToReflection indexToPipeInput; + TMapIndexToReflection indexToPipeOutput; + TIndices atomicCounterUniformIndices; + + unsigned int localSize[3]; +}; + +} // end namespace glslang + +#endif // _REFLECTION_INCLUDED + +#endif // !GLSLANG_WEB diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/ResourceLimits.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/ResourceLimits.h new file mode 100644 index 0000000..f70be81 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/ResourceLimits.h @@ -0,0 +1,57 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ + +#include + +#include "../Include/ResourceLimits.h" + +// Return pointer to user-writable Resource to pass through API in +// future-proof way. +extern TBuiltInResource* GetResources(); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +extern const TBuiltInResource* GetDefaultResources(); + +// Returns the DefaultTBuiltInResource as a human-readable string. +std::string GetDefaultTBuiltInResourceString(); + +// Decodes the resource limits from |config| to |resources|. +void DecodeResourceLimits(TBuiltInResource* resources, char* config); + +#endif // _STAND_ALONE_RESOURCE_LIMITS_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/ShaderLang.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/ShaderLang.h new file mode 100644 index 0000000..90a5302 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/ShaderLang.h @@ -0,0 +1,987 @@ +// +// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. +// Copyright (C) 2013-2016 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +#ifndef _COMPILER_INTERFACE_INCLUDED_ +#define _COMPILER_INTERFACE_INCLUDED_ + +#include "../Include/ResourceLimits.h" +#include "../MachineIndependent/Versions.h" + +#include +#include + +#ifdef _WIN32 + #define C_DECL __cdecl +#else + #define C_DECL +#endif + +#ifdef GLSLANG_IS_SHARED_LIBRARY + #ifdef _WIN32 + #ifdef GLSLANG_EXPORTING + #define GLSLANG_EXPORT __declspec(dllexport) + #else + #define GLSLANG_EXPORT __declspec(dllimport) + #endif + #elif __GNUC__ >= 4 + #define GLSLANG_EXPORT __attribute__((visibility("default"))) + #endif +#endif // GLSLANG_IS_SHARED_LIBRARY + +#ifndef GLSLANG_EXPORT +#define GLSLANG_EXPORT +#endif + +// +// This is the platform independent interface between an OGL driver +// and the shading language compiler/linker. +// + +#ifdef __cplusplus + extern "C" { +#endif + +// +// Call before doing any other compiler/linker operations. +// +// (Call once per process, not once per thread.) +// +GLSLANG_EXPORT int ShInitialize(); + +// +// Call this at process shutdown to clean up memory. +// +GLSLANG_EXPORT int ShFinalize(); + +// +// Types of languages the compiler can consume. +// +typedef enum { + EShLangVertex, + EShLangTessControl, + EShLangTessEvaluation, + EShLangGeometry, + EShLangFragment, + EShLangCompute, + EShLangRayGen, + EShLangRayGenNV = EShLangRayGen, + EShLangIntersect, + EShLangIntersectNV = EShLangIntersect, + EShLangAnyHit, + EShLangAnyHitNV = EShLangAnyHit, + EShLangClosestHit, + EShLangClosestHitNV = EShLangClosestHit, + EShLangMiss, + EShLangMissNV = EShLangMiss, + EShLangCallable, + EShLangCallableNV = EShLangCallable, + EShLangTask, + EShLangTaskNV = EShLangTask, + EShLangMesh, + EShLangMeshNV = EShLangMesh, + LAST_ELEMENT_MARKER(EShLangCount), +} EShLanguage; // would be better as stage, but this is ancient now + +typedef enum : unsigned { + EShLangVertexMask = (1 << EShLangVertex), + EShLangTessControlMask = (1 << EShLangTessControl), + EShLangTessEvaluationMask = (1 << EShLangTessEvaluation), + EShLangGeometryMask = (1 << EShLangGeometry), + EShLangFragmentMask = (1 << EShLangFragment), + EShLangComputeMask = (1 << EShLangCompute), + EShLangRayGenMask = (1 << EShLangRayGen), + EShLangRayGenNVMask = EShLangRayGenMask, + EShLangIntersectMask = (1 << EShLangIntersect), + EShLangIntersectNVMask = EShLangIntersectMask, + EShLangAnyHitMask = (1 << EShLangAnyHit), + EShLangAnyHitNVMask = EShLangAnyHitMask, + EShLangClosestHitMask = (1 << EShLangClosestHit), + EShLangClosestHitNVMask = EShLangClosestHitMask, + EShLangMissMask = (1 << EShLangMiss), + EShLangMissNVMask = EShLangMissMask, + EShLangCallableMask = (1 << EShLangCallable), + EShLangCallableNVMask = EShLangCallableMask, + EShLangTaskMask = (1 << EShLangTask), + EShLangTaskNVMask = EShLangTaskMask, + EShLangMeshMask = (1 << EShLangMesh), + EShLangMeshNVMask = EShLangMeshMask, + LAST_ELEMENT_MARKER(EShLanguageMaskCount), +} EShLanguageMask; + +namespace glslang { + +class TType; + +typedef enum { + EShSourceNone, + EShSourceGlsl, // GLSL, includes ESSL (OpenGL ES GLSL) + EShSourceHlsl, // HLSL + LAST_ELEMENT_MARKER(EShSourceCount), +} EShSource; // if EShLanguage were EShStage, this could be EShLanguage instead + +typedef enum { + EShClientNone, // use when there is no client, e.g. for validation + EShClientVulkan, // as GLSL dialect, specifies KHR_vulkan_glsl extension + EShClientOpenGL, // as GLSL dialect, specifies ARB_gl_spirv extension + LAST_ELEMENT_MARKER(EShClientCount), +} EShClient; + +typedef enum { + EShTargetNone, + EShTargetSpv, // SPIR-V (preferred spelling) + EshTargetSpv = EShTargetSpv, // legacy spelling + LAST_ELEMENT_MARKER(EShTargetCount), +} EShTargetLanguage; + +typedef enum { + EShTargetVulkan_1_0 = (1 << 22), // Vulkan 1.0 + EShTargetVulkan_1_1 = (1 << 22) | (1 << 12), // Vulkan 1.1 + EShTargetVulkan_1_2 = (1 << 22) | (2 << 12), // Vulkan 1.2 + EShTargetVulkan_1_3 = (1 << 22) | (3 << 12), // Vulkan 1.3 + EShTargetOpenGL_450 = 450, // OpenGL + LAST_ELEMENT_MARKER(EShTargetClientVersionCount = 5), +} EShTargetClientVersion; + +typedef EShTargetClientVersion EshTargetClientVersion; + +typedef enum { + EShTargetSpv_1_0 = (1 << 16), // SPIR-V 1.0 + EShTargetSpv_1_1 = (1 << 16) | (1 << 8), // SPIR-V 1.1 + EShTargetSpv_1_2 = (1 << 16) | (2 << 8), // SPIR-V 1.2 + EShTargetSpv_1_3 = (1 << 16) | (3 << 8), // SPIR-V 1.3 + EShTargetSpv_1_4 = (1 << 16) | (4 << 8), // SPIR-V 1.4 + EShTargetSpv_1_5 = (1 << 16) | (5 << 8), // SPIR-V 1.5 + EShTargetSpv_1_6 = (1 << 16) | (6 << 8), // SPIR-V 1.6 + LAST_ELEMENT_MARKER(EShTargetLanguageVersionCount = 7), +} EShTargetLanguageVersion; + +struct TInputLanguage { + EShSource languageFamily; // redundant information with other input, this one overrides when not EShSourceNone + EShLanguage stage; // redundant information with other input, this one overrides when not EShSourceNone + EShClient dialect; + int dialectVersion; // version of client's language definition, not the client (when not EShClientNone) + bool vulkanRulesRelaxed; +}; + +struct TClient { + EShClient client; + EShTargetClientVersion version; // version of client itself (not the client's input dialect) +}; + +struct TTarget { + EShTargetLanguage language; + EShTargetLanguageVersion version; // version to target, if SPIR-V, defined by "word 1" of the SPIR-V header + bool hlslFunctionality1; // can target hlsl_functionality1 extension(s) +}; + +// All source/client/target versions and settings. +// Can override previous methods of setting, when items are set here. +// Expected to grow, as more are added, rather than growing parameter lists. +struct TEnvironment { + TInputLanguage input; // definition of the input language + TClient client; // what client is the overall compilation being done for? + TTarget target; // what to generate +}; + +GLSLANG_EXPORT const char* StageName(EShLanguage); + +} // end namespace glslang + +// +// Types of output the linker will create. +// +typedef enum { + EShExVertexFragment, + EShExFragment +} EShExecutable; + +// +// Optimization level for the compiler. +// +typedef enum { + EShOptNoGeneration, + EShOptNone, + EShOptSimple, // Optimizations that can be done quickly + EShOptFull, // Optimizations that will take more time + LAST_ELEMENT_MARKER(EshOptLevelCount), +} EShOptimizationLevel; + +// +// Texture and Sampler transformation mode. +// +typedef enum { + EShTexSampTransKeep, // keep textures and samplers as is (default) + EShTexSampTransUpgradeTextureRemoveSampler, // change texture w/o embeded sampler into sampled texture and throw away all samplers + LAST_ELEMENT_MARKER(EShTexSampTransCount), +} EShTextureSamplerTransformMode; + +// +// Message choices for what errors and warnings are given. +// +enum EShMessages : unsigned { + EShMsgDefault = 0, // default is to give all required errors and extra warnings + EShMsgRelaxedErrors = (1 << 0), // be liberal in accepting input + EShMsgSuppressWarnings = (1 << 1), // suppress all warnings, except those required by the specification + EShMsgAST = (1 << 2), // print the AST intermediate representation + EShMsgSpvRules = (1 << 3), // issue messages for SPIR-V generation + EShMsgVulkanRules = (1 << 4), // issue messages for Vulkan-requirements of GLSL for SPIR-V + EShMsgOnlyPreprocessor = (1 << 5), // only print out errors produced by the preprocessor + EShMsgReadHlsl = (1 << 6), // use HLSL parsing rules and semantics + EShMsgCascadingErrors = (1 << 7), // get cascading errors; risks error-recovery issues, instead of an early exit + EShMsgKeepUncalled = (1 << 8), // for testing, don't eliminate uncalled functions + EShMsgHlslOffsets = (1 << 9), // allow block offsets to follow HLSL rules instead of GLSL rules + EShMsgDebugInfo = (1 << 10), // save debug information + EShMsgHlslEnable16BitTypes = (1 << 11), // enable use of 16-bit types in SPIR-V for HLSL + EShMsgHlslLegalization = (1 << 12), // enable HLSL Legalization messages + EShMsgHlslDX9Compatible = (1 << 13), // enable HLSL DX9 compatible mode (for samplers and semantics) + EShMsgBuiltinSymbolTable = (1 << 14), // print the builtin symbol table + EShMsgEnhanced = (1 << 15), // enhanced message readability + LAST_ELEMENT_MARKER(EShMsgCount), +}; + +// +// Options for building reflection +// +typedef enum { + EShReflectionDefault = 0, // default is original behaviour before options were added + EShReflectionStrictArraySuffix = (1 << 0), // reflection will follow stricter rules for array-of-structs suffixes + EShReflectionBasicArraySuffix = (1 << 1), // arrays of basic types will be appended with [0] as in GL reflection + EShReflectionIntermediateIO = (1 << 2), // reflect inputs and outputs to program, even with no vertex shader + EShReflectionSeparateBuffers = (1 << 3), // buffer variables and buffer blocks are reflected separately + EShReflectionAllBlockVariables = (1 << 4), // reflect all variables in blocks, even if they are inactive + EShReflectionUnwrapIOBlocks = (1 << 5), // unwrap input/output blocks the same as with uniform blocks + EShReflectionAllIOVariables = (1 << 6), // reflect all input/output variables, even if they are inactive + EShReflectionSharedStd140SSBO = (1 << 7), // Apply std140/shared rules for ubo to ssbo + EShReflectionSharedStd140UBO = (1 << 8), // Apply std140/shared rules for ubo to ssbo + LAST_ELEMENT_MARKER(EShReflectionCount), +} EShReflectionOptions; + +// +// Build a table for bindings. This can be used for locating +// attributes, uniforms, globals, etc., as needed. +// +typedef struct { + const char* name; + int binding; +} ShBinding; + +typedef struct { + int numBindings; + ShBinding* bindings; // array of bindings +} ShBindingTable; + +// +// ShHandle held by but opaque to the driver. It is allocated, +// managed, and de-allocated by the compiler/linker. Its contents +// are defined by and used by the compiler and linker. For example, +// symbol table information and object code passed from the compiler +// to the linker can be stored where ShHandle points. +// +// If handle creation fails, 0 will be returned. +// +typedef void* ShHandle; + +// +// Driver calls these to create and destroy compiler/linker +// objects. +// +GLSLANG_EXPORT ShHandle ShConstructCompiler(const EShLanguage, int debugOptions); // one per shader +GLSLANG_EXPORT ShHandle ShConstructLinker(const EShExecutable, int debugOptions); // one per shader pair +GLSLANG_EXPORT ShHandle ShConstructUniformMap(); // one per uniform namespace (currently entire program object) +GLSLANG_EXPORT void ShDestruct(ShHandle); + +// +// The return value of ShCompile is boolean, non-zero indicating +// success. +// +// The info-log should be written by ShCompile into +// ShHandle, so it can answer future queries. +// +GLSLANG_EXPORT int ShCompile( + const ShHandle, + const char* const shaderStrings[], + const int numStrings, + const int* lengths, + const EShOptimizationLevel, + const TBuiltInResource *resources, + int debugOptions, + int defaultVersion = 110, // use 100 for ES environment, overridden by #version in shader + bool forwardCompatible = false, // give errors for use of deprecated features + EShMessages messages = EShMsgDefault // warnings and errors + ); + +GLSLANG_EXPORT int ShLinkExt( + const ShHandle, // linker object + const ShHandle h[], // compiler objects to link together + const int numHandles); + +// +// ShSetEncrpytionMethod is a place-holder for specifying +// how source code is encrypted. +// +GLSLANG_EXPORT void ShSetEncryptionMethod(ShHandle); + +// +// All the following return 0 if the information is not +// available in the object passed down, or the object is bad. +// +GLSLANG_EXPORT const char* ShGetInfoLog(const ShHandle); +GLSLANG_EXPORT const void* ShGetExecutable(const ShHandle); +GLSLANG_EXPORT int ShSetVirtualAttributeBindings(const ShHandle, const ShBindingTable*); // to detect user aliasing +GLSLANG_EXPORT int ShSetFixedAttributeBindings(const ShHandle, const ShBindingTable*); // to force any physical mappings +// +// Tell the linker to never assign a vertex attribute to this list of physical attributes +// +GLSLANG_EXPORT int ShExcludeAttributes(const ShHandle, int *attributes, int count); + +// +// Returns the location ID of the named uniform. +// Returns -1 if error. +// +GLSLANG_EXPORT int ShGetUniformLocation(const ShHandle uniformMap, const char* name); + +#ifdef __cplusplus + } // end extern "C" +#endif + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// Deferred-Lowering C++ Interface +// ----------------------------------- +// +// Below is a new alternate C++ interface, which deprecates the above +// opaque handle-based interface. +// +// The below is further designed to handle multiple compilation units per stage, where +// the intermediate results, including the parse tree, are preserved until link time, +// rather than the above interface which is designed to have each compilation unit +// lowered at compile time. In the above model, linking occurs on the lowered results, +// whereas in this model intra-stage linking can occur at the parse tree +// (treeRoot in TIntermediate) level, and then a full stage can be lowered. +// + +#include +#include +#include + +class TCompiler; +class TInfoSink; + +namespace glslang { + +struct Version { + int major; + int minor; + int patch; + const char* flavor; +}; + +GLSLANG_EXPORT Version GetVersion(); +GLSLANG_EXPORT const char* GetEsslVersionString(); +GLSLANG_EXPORT const char* GetGlslVersionString(); +GLSLANG_EXPORT int GetKhronosToolId(); + +class TIntermediate; +class TProgram; +class TPoolAllocator; + +// Call this exactly once per process before using anything else +GLSLANG_EXPORT bool InitializeProcess(); + +// Call once per process to tear down everything +GLSLANG_EXPORT void FinalizeProcess(); + +// Resource type for IO resolver +enum TResourceType { + EResSampler, + EResTexture, + EResImage, + EResUbo, + EResSsbo, + EResUav, + EResCount +}; + +enum TBlockStorageClass +{ + EbsUniform = 0, + EbsStorageBuffer, + EbsPushConstant, + EbsNone, // not a uniform or buffer variable + EbsCount, +}; + +// Make one TShader per shader that you will link into a program. Then +// - provide the shader through setStrings() or setStringsWithLengths() +// - optionally call setEnv*(), see below for more detail +// - optionally use setPreamble() to set a special shader string that will be +// processed before all others but won't affect the validity of #version +// - optionally call addProcesses() for each setting/transform, +// see comment for class TProcesses +// - call parse(): source language and target environment must be selected +// either by correct setting of EShMessages sent to parse(), or by +// explicitly calling setEnv*() +// - query the info logs +// +// N.B.: Does not yet support having the same TShader instance being linked into +// multiple programs. +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TShader { +public: + GLSLANG_EXPORT explicit TShader(EShLanguage); + GLSLANG_EXPORT virtual ~TShader(); + GLSLANG_EXPORT void setStrings(const char* const* s, int n); + GLSLANG_EXPORT void setStringsWithLengths( + const char* const* s, const int* l, int n); + GLSLANG_EXPORT void setStringsWithLengthsAndNames( + const char* const* s, const int* l, const char* const* names, int n); + void setPreamble(const char* s) { preamble = s; } + GLSLANG_EXPORT void setEntryPoint(const char* entryPoint); + GLSLANG_EXPORT void setSourceEntryPoint(const char* sourceEntryPointName); + GLSLANG_EXPORT void addProcesses(const std::vector&); + GLSLANG_EXPORT void setUniqueId(unsigned long long id); + GLSLANG_EXPORT void setOverrideVersion(int version); + GLSLANG_EXPORT void setDebugInfo(bool debugInfo); + + // IO resolver binding data: see comments in ShaderLang.cpp + GLSLANG_EXPORT void setShiftBinding(TResourceType res, unsigned int base); + GLSLANG_EXPORT void setShiftSamplerBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftTextureBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftImageBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftUavBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftCbufferBinding(unsigned int base); // synonym for setShiftUboBinding + GLSLANG_EXPORT void setShiftSsboBinding(unsigned int base); // DEPRECATED: use setShiftBinding + GLSLANG_EXPORT void setShiftBindingForSet(TResourceType res, unsigned int base, unsigned int set); + GLSLANG_EXPORT void setResourceSetBinding(const std::vector& base); + GLSLANG_EXPORT void setAutoMapBindings(bool map); + GLSLANG_EXPORT void setAutoMapLocations(bool map); + GLSLANG_EXPORT void addUniformLocationOverride(const char* name, int loc); + GLSLANG_EXPORT void setUniformLocationBase(int base); + GLSLANG_EXPORT void setInvertY(bool invert); + GLSLANG_EXPORT void setDxPositionW(bool dxPosW); + GLSLANG_EXPORT void setEnhancedMsgs(); +#ifdef ENABLE_HLSL + GLSLANG_EXPORT void setHlslIoMapping(bool hlslIoMap); + GLSLANG_EXPORT void setFlattenUniformArrays(bool flatten); +#endif + GLSLANG_EXPORT void setNoStorageFormat(bool useUnknownFormat); + GLSLANG_EXPORT void setNanMinMaxClamp(bool nanMinMaxClamp); + GLSLANG_EXPORT void setTextureSamplerTransformMode(EShTextureSamplerTransformMode mode); + GLSLANG_EXPORT void addBlockStorageOverride(const char* nameStr, glslang::TBlockStorageClass backing); + + GLSLANG_EXPORT void setGlobalUniformBlockName(const char* name); + GLSLANG_EXPORT void setAtomicCounterBlockName(const char* name); + GLSLANG_EXPORT void setGlobalUniformSet(unsigned int set); + GLSLANG_EXPORT void setGlobalUniformBinding(unsigned int binding); + GLSLANG_EXPORT void setAtomicCounterBlockSet(unsigned int set); + GLSLANG_EXPORT void setAtomicCounterBlockBinding(unsigned int binding); + + // For setting up the environment (cleared to nothingness in the constructor). + // These must be called so that parsing is done for the right source language and + // target environment, either indirectly through TranslateEnvironment() based on + // EShMessages et. al., or directly by the user. + // + // setEnvInput: The input source language and stage. If generating code for a + // specific client, the input client semantics to use and the + // version of that client's input semantics to use, otherwise + // use EShClientNone and version of 0, e.g. for validation mode. + // Note 'version' does not describe the target environment, + // just the version of the source dialect to compile under. + // For example, to choose the Vulkan dialect of GLSL defined by + // version 100 of the KHR_vulkan_glsl extension: lang = EShSourceGlsl, + // dialect = EShClientVulkan, and version = 100. + // + // See the definitions of TEnvironment, EShSource, EShLanguage, + // and EShClient for choices and more detail. + // + // setEnvClient: The client that will be hosting the execution, and its version. + // Note 'version' is not the version of the languages involved, but + // the version of the client environment. + // Use EShClientNone and version of 0 if there is no client, e.g. + // for validation mode. + // + // See EShTargetClientVersion for choices. + // + // setEnvTarget: The language to translate to when generating code, and that + // language's version. + // Use EShTargetNone and version of 0 if there is no client, e.g. + // for validation mode. + // + void setEnvInput(EShSource lang, EShLanguage envStage, EShClient client, int version) + { + environment.input.languageFamily = lang; + environment.input.stage = envStage; + environment.input.dialect = client; + environment.input.dialectVersion = version; + } + void setEnvClient(EShClient client, EShTargetClientVersion version) + { + environment.client.client = client; + environment.client.version = version; + } + void setEnvTarget(EShTargetLanguage lang, EShTargetLanguageVersion version) + { + environment.target.language = lang; + environment.target.version = version; + } + + void getStrings(const char* const* &s, int& n) { s = strings; n = numStrings; } + +#ifdef ENABLE_HLSL + void setEnvTargetHlslFunctionality1() { environment.target.hlslFunctionality1 = true; } + bool getEnvTargetHlslFunctionality1() const { return environment.target.hlslFunctionality1; } +#else + bool getEnvTargetHlslFunctionality1() const { return false; } +#endif + + void setEnvInputVulkanRulesRelaxed() { environment.input.vulkanRulesRelaxed = true; } + bool getEnvInputVulkanRulesRelaxed() const { return environment.input.vulkanRulesRelaxed; } + + // Interface to #include handlers. + // + // To support #include, a client of Glslang does the following: + // 1. Call setStringsWithNames to set the source strings and associated + // names. For example, the names could be the names of the files + // containing the shader sources. + // 2. Call parse with an Includer. + // + // When the Glslang parser encounters an #include directive, it calls + // the Includer's include method with the requested include name + // together with the current string name. The returned IncludeResult + // contains the fully resolved name of the included source, together + // with the source text that should replace the #include directive + // in the source stream. After parsing that source, Glslang will + // release the IncludeResult object. + class Includer { + public: + // An IncludeResult contains the resolved name and content of a source + // inclusion. + struct IncludeResult { + IncludeResult(const std::string& headerName, const char* const headerData, const size_t headerLength, void* userData) : + headerName(headerName), headerData(headerData), headerLength(headerLength), userData(userData) { } + // For a successful inclusion, the fully resolved name of the requested + // include. For example, in a file system-based includer, full resolution + // should convert a relative path name into an absolute path name. + // For a failed inclusion, this is an empty string. + const std::string headerName; + // The content and byte length of the requested inclusion. The + // Includer producing this IncludeResult retains ownership of the + // storage. + // For a failed inclusion, the header + // field points to a string containing error details. + const char* const headerData; + const size_t headerLength; + // Include resolver's context. + void* userData; + protected: + IncludeResult& operator=(const IncludeResult&); + IncludeResult(); + }; + + // For both include methods below: + // + // Resolves an inclusion request by name, current source name, + // and include depth. + // On success, returns an IncludeResult containing the resolved name + // and content of the include. + // On failure, returns a nullptr, or an IncludeResult + // with an empty string for the headerName and error details in the + // header field. + // The Includer retains ownership of the contents + // of the returned IncludeResult value, and those contents must + // remain valid until the releaseInclude method is called on that + // IncludeResult object. + // + // Note "local" vs. "system" is not an "either/or": "local" is an + // extra thing to do over "system". Both might get called, as per + // the C++ specification. + + // For the "system" or <>-style includes; search the "system" paths. + virtual IncludeResult* includeSystem(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // For the "local"-only aspect of a "" include. Should not search in the + // "system" paths, because on returning a failure, the parser will + // call includeSystem() to look in the "system" locations. + virtual IncludeResult* includeLocal(const char* /*headerName*/, + const char* /*includerName*/, + size_t /*inclusionDepth*/) { return nullptr; } + + // Signals that the parser will no longer use the contents of the + // specified IncludeResult. + virtual void releaseInclude(IncludeResult*) = 0; + virtual ~Includer() {} + }; + + // Fail all Includer searches + class ForbidIncluder : public Includer { + public: + virtual void releaseInclude(IncludeResult*) override { } + }; + + GLSLANG_EXPORT bool parse( + const TBuiltInResource*, int defaultVersion, EProfile defaultProfile, + bool forceDefaultVersionAndProfile, bool forwardCompatible, + EShMessages, Includer&); + + bool parse(const TBuiltInResource* res, int defaultVersion, EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages messages) + { + TShader::ForbidIncluder includer; + return parse(res, defaultVersion, defaultProfile, forceDefaultVersionAndProfile, forwardCompatible, messages, includer); + } + + // Equivalent to parse() without a default profile and without forcing defaults. + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages); + } + + bool parse(const TBuiltInResource* builtInResources, int defaultVersion, bool forwardCompatible, EShMessages messages, + Includer& includer) + { + return parse(builtInResources, defaultVersion, ENoProfile, false, forwardCompatible, messages, includer); + } + + // NOTE: Doing just preprocessing to obtain a correct preprocessed shader string + // is not an officially supported or fully working path. + GLSLANG_EXPORT bool preprocess( + const TBuiltInResource* builtInResources, int defaultVersion, + EProfile defaultProfile, bool forceDefaultVersionAndProfile, + bool forwardCompatible, EShMessages message, std::string* outputString, + Includer& includer); + + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + EShLanguage getStage() const { return stage; } + TIntermediate* getIntermediate() const { return intermediate; } + +protected: + TPoolAllocator* pool; + EShLanguage stage; + TCompiler* compiler; + TIntermediate* intermediate; + TInfoSink* infoSink; + // strings and lengths follow the standard for glShaderSource: + // strings is an array of numStrings pointers to string data. + // lengths can be null, but if not it is an array of numStrings + // integers containing the length of the associated strings. + // if lengths is null or lengths[n] < 0 the associated strings[n] is + // assumed to be null-terminated. + // stringNames is the optional names for all the strings. If stringNames + // is null, then none of the strings has name. If a certain element in + // stringNames is null, then the corresponding string does not have name. + const char* const* strings; // explicit code to compile, see previous comment + const int* lengths; + const char* const* stringNames; + int numStrings; // size of the above arrays + const char* preamble; // string of implicit code to compile before the explicitly provided code + + // a function in the source string can be renamed FROM this TO the name given in setEntryPoint. + std::string sourceEntryPointName; + + // overrides #version in shader source or default version if #version isn't present + int overrideVersion; + + TEnvironment environment; + + friend class TProgram; + +private: + TShader& operator=(TShader&); +}; + +#if !defined(GLSLANG_WEB) + +// +// A reflection database and its interface, consistent with the OpenGL API reflection queries. +// + +// Data needed for just a single object at the granularity exchanged by the reflection API +class TObjectReflection { +public: + GLSLANG_EXPORT TObjectReflection(const std::string& pName, const TType& pType, int pOffset, int pGLDefineType, int pSize, int pIndex); + + const TType* getType() const { return type; } + GLSLANG_EXPORT int getBinding() const; + GLSLANG_EXPORT void dump() const; + static TObjectReflection badReflection() { return TObjectReflection(); } + + std::string name; + int offset; + int glDefineType; + int size; // data size in bytes for a block, array size for a (non-block) object that's an array + int index; + int counterIndex; + int numMembers; + int arrayStride; // stride of an array variable + int topLevelArraySize; // size of the top-level variable in a storage buffer member + int topLevelArrayStride; // stride of the top-level variable in a storage buffer member + EShLanguageMask stages; + +protected: + TObjectReflection() + : offset(-1), glDefineType(-1), size(-1), index(-1), counterIndex(-1), numMembers(-1), arrayStride(0), + topLevelArrayStride(0), stages(EShLanguageMask(0)), type(nullptr) + { + } + + const TType* type; +}; + +class TReflection; +class TIoMapper; +struct TVarEntryInfo; + +// Allows to customize the binding layout after linking. +// All used uniform variables will invoke at least validateBinding. +// If validateBinding returned true then the other resolveBinding, +// resolveSet, and resolveLocation are invoked to resolve the binding +// and descriptor set index respectively. +// +// Invocations happen in a particular order: +// 1) all shader inputs +// 2) all shader outputs +// 3) all uniforms with binding and set already defined +// 4) all uniforms with binding but no set defined +// 5) all uniforms with set but no binding defined +// 6) all uniforms with no binding and no set defined +// +// mapIO will use this resolver in two phases. The first +// phase is a notification phase, calling the corresponging +// notifiy callbacks, this phase ends with a call to endNotifications. +// Phase two starts directly after the call to endNotifications +// and calls all other callbacks to validate and to get the +// bindings, sets, locations, component and color indices. +// +// NOTE: that still limit checks are applied to bindings and sets +// and may result in an error. +class TIoMapResolver +{ +public: + virtual ~TIoMapResolver() {} + + // Should return true if the resulting/current binding would be okay. + // Basic idea is to do aliasing binding checks with this. + virtual bool validateBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current binding should be overridden. + // Return -1 if the current binding (including no binding) should be kept. + virtual int resolveBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current set should be overridden. + // Return -1 if the current set (including no set) should be kept. + virtual int resolveSet(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveUniformLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return true if the resulting/current setup would be okay. + // Basic idea is to do aliasing checks and reject invalid semantic names. + virtual bool validateInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current location should be overridden. + // Return -1 if the current location (including no location) should be kept. + virtual int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current component index should be overridden. + // Return -1 if the current component index (including no index) should be kept. + virtual int resolveInOutComponent(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Should return a value >= 0 if the current color index should be overridden. + // Return -1 if the current color index (including no index) should be kept. + virtual int resolveInOutIndex(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a uniform variable + virtual void notifyBinding(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Notification of a in or out variable + virtual void notifyInOut(EShLanguage stage, TVarEntryInfo& ent) = 0; + // Called by mapIO when it starts its notify pass for the given stage + virtual void beginNotifications(EShLanguage stage) = 0; + // Called by mapIO when it has finished the notify pass + virtual void endNotifications(EShLanguage stage) = 0; + // Called by mipIO when it starts its resolve pass for the given stage + virtual void beginResolve(EShLanguage stage) = 0; + // Called by mapIO when it has finished the resolve pass + virtual void endResolve(EShLanguage stage) = 0; + // Called by mapIO when it starts its symbol collect for teh given stage + virtual void beginCollect(EShLanguage stage) = 0; + // Called by mapIO when it has finished the symbol collect + virtual void endCollect(EShLanguage stage) = 0; + // Called by TSlotCollector to resolve storage locations or bindings + virtual void reserverStorageSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by TSlotCollector to resolve resource locations or bindings + virtual void reserverResourceSlot(TVarEntryInfo& ent, TInfoSink& infoSink) = 0; + // Called by mapIO.addStage to set shader stage mask to mark a stage be added to this pipeline + virtual void addStage(EShLanguage stage, TIntermediate& stageIntermediate) = 0; +}; + +#endif // !GLSLANG_WEB + +// Make one TProgram per set of shaders that will get linked together. Add all +// the shaders that are to be linked together. After calling shader.parse() +// for all shaders, call link(). +// +// N.B.: Destruct a linked program *before* destructing the shaders linked into it. +// +class TProgram { +public: + GLSLANG_EXPORT TProgram(); + GLSLANG_EXPORT virtual ~TProgram(); + void addShader(TShader* shader) { stages[shader->stage].push_back(shader); } + std::list& getShaders(EShLanguage stage) { return stages[stage]; } + // Link Validation interface + GLSLANG_EXPORT bool link(EShMessages); + GLSLANG_EXPORT const char* getInfoLog(); + GLSLANG_EXPORT const char* getInfoDebugLog(); + + TIntermediate* getIntermediate(EShLanguage stage) const { return intermediate[stage]; } + +#if !defined(GLSLANG_WEB) + + // Reflection Interface + + // call first, to do liveness analysis, index mapping, etc.; returns false on failure + GLSLANG_EXPORT bool buildReflection(int opts = EShReflectionDefault); + GLSLANG_EXPORT unsigned getLocalSize(int dim) const; // return dim'th local size + GLSLANG_EXPORT int getReflectionIndex(const char *name) const; + GLSLANG_EXPORT int getReflectionPipeIOIndex(const char* name, const bool inOrOut) const; + GLSLANG_EXPORT int getNumUniformVariables() const; + GLSLANG_EXPORT const TObjectReflection& getUniform(int index) const; + GLSLANG_EXPORT int getNumUniformBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getUniformBlock(int index) const; + GLSLANG_EXPORT int getNumPipeInputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeInput(int index) const; + GLSLANG_EXPORT int getNumPipeOutputs() const; + GLSLANG_EXPORT const TObjectReflection& getPipeOutput(int index) const; + GLSLANG_EXPORT int getNumBufferVariables() const; + GLSLANG_EXPORT const TObjectReflection& getBufferVariable(int index) const; + GLSLANG_EXPORT int getNumBufferBlocks() const; + GLSLANG_EXPORT const TObjectReflection& getBufferBlock(int index) const; + GLSLANG_EXPORT int getNumAtomicCounters() const; + GLSLANG_EXPORT const TObjectReflection& getAtomicCounter(int index) const; + + // Legacy Reflection Interface - expressed in terms of above interface + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORMS) + int getNumLiveUniformVariables() const { return getNumUniformVariables(); } + + // can be used for glGetProgramiv(GL_ACTIVE_UNIFORM_BLOCKS) + int getNumLiveUniformBlocks() const { return getNumUniformBlocks(); } + + // can be used for glGetProgramiv(GL_ACTIVE_ATTRIBUTES) + int getNumLiveAttributes() const { return getNumPipeInputs(); } + + // can be used for glGetUniformIndices() + int getUniformIndex(const char *name) const { return getReflectionIndex(name); } + + int getPipeIOIndex(const char *name, const bool inOrOut) const + { return getReflectionPipeIOIndex(name, inOrOut); } + + // can be used for "name" part of glGetActiveUniform() + const char *getUniformName(int index) const { return getUniform(index).name.c_str(); } + + // returns the binding number + int getUniformBinding(int index) const { return getUniform(index).getBinding(); } + + // returns Shaders Stages where a Uniform is present + EShLanguageMask getUniformStages(int index) const { return getUniform(index).stages; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_BLOCK_INDEX) + int getUniformBlockIndex(int index) const { return getUniform(index).index; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_TYPE) + int getUniformType(int index) const { return getUniform(index).glDefineType; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_OFFSET) + int getUniformBufferOffset(int index) const { return getUniform(index).offset; } + + // can be used for glGetActiveUniformsiv(GL_UNIFORM_SIZE) + int getUniformArraySize(int index) const { return getUniform(index).size; } + + // returns a TType* + const TType *getUniformTType(int index) const { return getUniform(index).getType(); } + + // can be used for glGetActiveUniformBlockName() + const char *getUniformBlockName(int index) const { return getUniformBlock(index).name.c_str(); } + + // can be used for glGetActiveUniformBlockiv(UNIFORM_BLOCK_DATA_SIZE) + int getUniformBlockSize(int index) const { return getUniformBlock(index).size; } + + // returns the block binding number + int getUniformBlockBinding(int index) const { return getUniformBlock(index).getBinding(); } + + // returns block index of associated counter. + int getUniformBlockCounterIndex(int index) const { return getUniformBlock(index).counterIndex; } + + // returns a TType* + const TType *getUniformBlockTType(int index) const { return getUniformBlock(index).getType(); } + + // can be used for glGetActiveAttrib() + const char *getAttributeName(int index) const { return getPipeInput(index).name.c_str(); } + + // can be used for glGetActiveAttrib() + int getAttributeType(int index) const { return getPipeInput(index).glDefineType; } + + // returns a TType* + const TType *getAttributeTType(int index) const { return getPipeInput(index).getType(); } + + GLSLANG_EXPORT void dumpReflection(); + // I/O mapping: apply base offsets and map live unbound variables + // If resolver is not provided it uses the previous approach + // and respects auto assignment and offsets. + GLSLANG_EXPORT bool mapIO(TIoMapResolver* pResolver = nullptr, TIoMapper* pIoMapper = nullptr); +#endif // !GLSLANG_WEB + +protected: + GLSLANG_EXPORT bool linkStage(EShLanguage, EShMessages); + GLSLANG_EXPORT bool crossStageCheck(EShMessages); + + TPoolAllocator* pool; + std::list stages[EShLangCount]; + TIntermediate* intermediate[EShLangCount]; + bool newedIntermediate[EShLangCount]; // track which intermediate were "new" versus reusing a singleton unit in a stage + TInfoSink* infoSink; +#if !defined(GLSLANG_WEB) + TReflection* reflection; +#endif + bool linked; + +private: + TProgram(TProgram&); + TProgram& operator=(TProgram&); +}; + +} // end namespace glslang + +#endif // _COMPILER_INTERFACE_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/resource_limits_c.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/resource_limits_c.h new file mode 100644 index 0000000..05aa8eb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/Public/resource_limits_c.h @@ -0,0 +1,57 @@ +/** +BSD 2-Clause License + +Copyright (c) 2020, Travis Fort +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**/ + +#ifndef _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ +#define _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ + +#include "../Include/glslang_c_interface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Returns a struct that can be use to create custom resource values. +glslang_resource_t* glslang_resource(void); + +// These are the default resources for TBuiltInResources, used for both +// - parsing this string for the case where the user didn't supply one, +// - dumping out a template for user construction of a config file. +const glslang_resource_t* glslang_default_resource(void); + +// Returns the DefaultTBuiltInResource as a human-readable string. +// NOTE: User is responsible for freeing this string. +const char* glslang_default_resource_string(); + +// Decodes the resource limits from |config| to |resources|. +void glslang_decode_resource_limits(glslang_resource_t* resources, char* config); + +#ifdef __cplusplus +} +#endif + +#endif // _STAND_ALONE_RESOURCE_LIMITS_C_INCLUDED_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.AMD.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.AMD.h new file mode 100644 index 0000000..009d2f1 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.AMD.h @@ -0,0 +1,108 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextAMD_H +#define GLSLextAMD_H + +static const int GLSLextAMDVersion = 100; +static const int GLSLextAMDRevision = 7; + +// SPV_AMD_shader_ballot +static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot"; + +enum ShaderBallotAMD { + ShaderBallotBadAMD = 0, // Don't use + + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4, + + ShaderBallotCountAMD +}; + +// SPV_AMD_shader_trinary_minmax +static const char* const E_SPV_AMD_shader_trinary_minmax = "SPV_AMD_shader_trinary_minmax"; + +enum ShaderTrinaryMinMaxAMD { + ShaderTrinaryMinMaxBadAMD = 0, // Don't use + + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9, + + ShaderTrinaryMinMaxCountAMD +}; + +// SPV_AMD_shader_explicit_vertex_parameter +static const char* const E_SPV_AMD_shader_explicit_vertex_parameter = "SPV_AMD_shader_explicit_vertex_parameter"; + +enum ShaderExplicitVertexParameterAMD { + ShaderExplicitVertexParameterBadAMD = 0, // Don't use + + InterpolateAtVertexAMD = 1, + + ShaderExplicitVertexParameterCountAMD +}; + +// SPV_AMD_gcn_shader +static const char* const E_SPV_AMD_gcn_shader = "SPV_AMD_gcn_shader"; + +enum GcnShaderAMD { + GcnShaderBadAMD = 0, // Don't use + + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3, + + GcnShaderCountAMD +}; + +// SPV_AMD_gpu_shader_half_float +static const char* const E_SPV_AMD_gpu_shader_half_float = "SPV_AMD_gpu_shader_half_float"; + +// SPV_AMD_texture_gather_bias_lod +static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_gather_bias_lod"; + +// SPV_AMD_gpu_shader_int16 +static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16"; + +// SPV_AMD_shader_image_load_store_lod +static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod"; + +// SPV_AMD_shader_fragment_mask +static const char* const E_SPV_AMD_shader_fragment_mask = "SPV_AMD_shader_fragment_mask"; + +// SPV_AMD_gpu_shader_half_float_fetch +static const char* const E_SPV_AMD_gpu_shader_half_float_fetch = "SPV_AMD_gpu_shader_half_float_fetch"; + +#endif // #ifndef GLSLextAMD_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.EXT.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.EXT.h new file mode 100644 index 0000000..a247b4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.EXT.h @@ -0,0 +1,44 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextEXT_H +#define GLSLextEXT_H + +static const int GLSLextEXTVersion = 100; +static const int GLSLextEXTRevision = 2; + +static const char* const E_SPV_EXT_shader_stencil_export = "SPV_EXT_shader_stencil_export"; +static const char* const E_SPV_EXT_shader_viewport_index_layer = "SPV_EXT_shader_viewport_index_layer"; +static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fully_covered"; +static const char* const E_SPV_EXT_fragment_invocation_density = "SPV_EXT_fragment_invocation_density"; +static const char* const E_SPV_EXT_demote_to_helper_invocation = "SPV_EXT_demote_to_helper_invocation"; +static const char* const E_SPV_EXT_shader_atomic_float_add = "SPV_EXT_shader_atomic_float_add"; +static const char* const E_SPV_EXT_shader_atomic_float16_add = "SPV_EXT_shader_atomic_float16_add"; +static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader_atomic_float_min_max"; +static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64"; +static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader"; + +#endif // #ifndef GLSLextEXT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.KHR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.KHR.h new file mode 100644 index 0000000..d5c670f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.KHR.h @@ -0,0 +1,58 @@ +/* +** Copyright (c) 2014-2020 The Khronos Group Inc. +** Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextKHR_H +#define GLSLextKHR_H + +static const int GLSLextKHRVersion = 100; +static const int GLSLextKHRRevision = 3; + +static const char* const E_SPV_KHR_shader_ballot = "SPV_KHR_shader_ballot"; +static const char* const E_SPV_KHR_subgroup_vote = "SPV_KHR_subgroup_vote"; +static const char* const E_SPV_KHR_device_group = "SPV_KHR_device_group"; +static const char* const E_SPV_KHR_multiview = "SPV_KHR_multiview"; +static const char* const E_SPV_KHR_shader_draw_parameters = "SPV_KHR_shader_draw_parameters"; +static const char* const E_SPV_KHR_16bit_storage = "SPV_KHR_16bit_storage"; +static const char* const E_SPV_KHR_8bit_storage = "SPV_KHR_8bit_storage"; +static const char* const E_SPV_KHR_storage_buffer_storage_class = "SPV_KHR_storage_buffer_storage_class"; +static const char* const E_SPV_KHR_post_depth_coverage = "SPV_KHR_post_depth_coverage"; +static const char* const E_SPV_KHR_vulkan_memory_model = "SPV_KHR_vulkan_memory_model"; +static const char* const E_SPV_EXT_physical_storage_buffer = "SPV_EXT_physical_storage_buffer"; +static const char* const E_SPV_KHR_physical_storage_buffer = "SPV_KHR_physical_storage_buffer"; +static const char* const E_SPV_EXT_fragment_shader_interlock = "SPV_EXT_fragment_shader_interlock"; +static const char* const E_SPV_KHR_shader_clock = "SPV_KHR_shader_clock"; +static const char* const E_SPV_KHR_non_semantic_info = "SPV_KHR_non_semantic_info"; +static const char* const E_SPV_KHR_ray_tracing = "SPV_KHR_ray_tracing"; +static const char* const E_SPV_KHR_ray_query = "SPV_KHR_ray_query"; +static const char* const E_SPV_KHR_fragment_shading_rate = "SPV_KHR_fragment_shading_rate"; +static const char* const E_SPV_KHR_terminate_invocation = "SPV_KHR_terminate_invocation"; +static const char* const E_SPV_KHR_workgroup_memory_explicit_layout = "SPV_KHR_workgroup_memory_explicit_layout"; +static const char* const E_SPV_KHR_subgroup_uniform_control_flow = "SPV_KHR_subgroup_uniform_control_flow"; +static const char* const E_SPV_KHR_fragment_shader_barycentric = "SPV_KHR_fragment_shader_barycentric"; +static const char* const E_SPV_AMD_shader_early_and_late_fragment_tests = "SPV_AMD_shader_early_and_late_fragment_tests"; + +#endif // #ifndef GLSLextKHR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.NV.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.NV.h new file mode 100644 index 0000000..93c98bf --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.ext.NV.h @@ -0,0 +1,84 @@ +/* +** Copyright (c) 2014-2017 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLextNV_H +#define GLSLextNV_H + +enum BuiltIn; +enum Decoration; +enum Op; +enum Capability; + +static const int GLSLextNVVersion = 100; +static const int GLSLextNVRevision = 11; + +//SPV_NV_sample_mask_override_coverage +const char* const E_SPV_NV_sample_mask_override_coverage = "SPV_NV_sample_mask_override_coverage"; + +//SPV_NV_geometry_shader_passthrough +const char* const E_SPV_NV_geometry_shader_passthrough = "SPV_NV_geometry_shader_passthrough"; + +//SPV_NV_viewport_array2 +const char* const E_SPV_NV_viewport_array2 = "SPV_NV_viewport_array2"; +const char* const E_ARB_shader_viewport_layer_array = "SPV_ARB_shader_viewport_layer_array"; + +//SPV_NV_stereo_view_rendering +const char* const E_SPV_NV_stereo_view_rendering = "SPV_NV_stereo_view_rendering"; + +//SPV_NVX_multiview_per_view_attributes +const char* const E_SPV_NVX_multiview_per_view_attributes = "SPV_NVX_multiview_per_view_attributes"; + +//SPV_NV_shader_subgroup_partitioned +const char* const E_SPV_NV_shader_subgroup_partitioned = "SPV_NV_shader_subgroup_partitioned"; + +//SPV_NV_fragment_shader_barycentric +const char* const E_SPV_NV_fragment_shader_barycentric = "SPV_NV_fragment_shader_barycentric"; + +//SPV_NV_compute_shader_derivatives +const char* const E_SPV_NV_compute_shader_derivatives = "SPV_NV_compute_shader_derivatives"; + +//SPV_NV_shader_image_footprint +const char* const E_SPV_NV_shader_image_footprint = "SPV_NV_shader_image_footprint"; + +//SPV_NV_mesh_shader +const char* const E_SPV_NV_mesh_shader = "SPV_NV_mesh_shader"; + +//SPV_NV_raytracing +const char* const E_SPV_NV_ray_tracing = "SPV_NV_ray_tracing"; + +//SPV_NV_ray_tracing_motion_blur +const char* const E_SPV_NV_ray_tracing_motion_blur = "SPV_NV_ray_tracing_motion_blur"; + +//SPV_NV_shading_rate +const char* const E_SPV_NV_shading_rate = "SPV_NV_shading_rate"; + +//SPV_NV_cooperative_matrix +const char* const E_SPV_NV_cooperative_matrix = "SPV_NV_cooperative_matrix"; + +//SPV_NV_shader_sm_builtins +const char* const E_SPV_NV_shader_sm_builtins = "SPV_NV_shader_sm_builtins"; + +#endif // #ifndef GLSLextNV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.std.450.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.std.450.h new file mode 100644 index 0000000..df31092 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GlslangToSpv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GlslangToSpv.h new file mode 100644 index 0000000..3907be4 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/GlslangToSpv.h @@ -0,0 +1,61 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#if defined(_MSC_VER) && _MSC_VER >= 1900 + #pragma warning(disable : 4464) // relative include path contains '..' +#endif + +#include "SpvTools.h" +#include "glslang/Include/intermediate.h" + +#include +#include + +#include "Logger.h" + +namespace glslang { + +void GetSpirvVersion(std::string&); +int GetSpirvGeneratorVersion(); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + SpvOptions* options = nullptr); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger* logger, SpvOptions* options = nullptr); +void OutputSpvBin(const std::vector& spirv, const char* baseName); +void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName); + +} diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/Logger.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/Logger.h new file mode 100644 index 0000000..411367c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/Logger.h @@ -0,0 +1,83 @@ +// +// Copyright (C) 2016 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_SPIRV_LOGGER_H +#define GLSLANG_SPIRV_LOGGER_H + +#include +#include + +namespace spv { + +// A class for holding all SPIR-V build status messages, including +// missing/TBD functionalities, warnings, and errors. +class SpvBuildLogger { +public: + SpvBuildLogger() {} + +#ifdef GLSLANG_WEB + void tbdFunctionality(const std::string& f) { } + void missingFunctionality(const std::string& f) { } + void warning(const std::string& w) { } + void error(const std::string& e) { errors.push_back(e); } + std::string getAllMessages() { return ""; } +#else + + // Registers a TBD functionality. + void tbdFunctionality(const std::string& f); + // Registers a missing functionality. + void missingFunctionality(const std::string& f); + + // Logs a warning. + void warning(const std::string& w) { warnings.push_back(w); } + // Logs an error. + void error(const std::string& e) { errors.push_back(e); } + + // Returns all messages accumulated in the order of: + // TBD functionalities, missing functionalities, warnings, errors. + std::string getAllMessages() const; +#endif + +private: + SpvBuildLogger(const SpvBuildLogger&); + + std::vector tbdFeatures; + std::vector missingFeatures; + std::vector warnings; + std::vector errors; +}; + +} // end spv namespace + +#endif // GLSLANG_SPIRV_LOGGER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/NonSemanticDebugPrintf.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/NonSemanticDebugPrintf.h new file mode 100644 index 0000000..83796d7 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/NonSemanticDebugPrintf.h @@ -0,0 +1,50 @@ +// Copyright (c) 2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and/or associated documentation files (the +// "Materials"), to deal in the Materials without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Materials, and to +// permit persons to whom the Materials are furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS +// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS +// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT +// https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +// + +#ifndef SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ +#define SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticDebugPrintfRevision = 1, + NonSemanticDebugPrintfRevision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticDebugPrintfInstructions { + NonSemanticDebugPrintfDebugPrintf = 1, + NonSemanticDebugPrintfInstructionsMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticDebugPrintf_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h new file mode 100644 index 0000000..c52f32f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/NonSemanticShaderDebugInfo100.h @@ -0,0 +1,171 @@ +// Copyright (c) 2018 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +#ifndef SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ +#define SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + NonSemanticShaderDebugInfo100Version = 100, + NonSemanticShaderDebugInfo100Version_BitWidthPadding = 0x7fffffff +}; +enum { + NonSemanticShaderDebugInfo100Revision = 6, + NonSemanticShaderDebugInfo100Revision_BitWidthPadding = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100Instructions { + NonSemanticShaderDebugInfo100DebugInfoNone = 0, + NonSemanticShaderDebugInfo100DebugCompilationUnit = 1, + NonSemanticShaderDebugInfo100DebugTypeBasic = 2, + NonSemanticShaderDebugInfo100DebugTypePointer = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifier = 4, + NonSemanticShaderDebugInfo100DebugTypeArray = 5, + NonSemanticShaderDebugInfo100DebugTypeVector = 6, + NonSemanticShaderDebugInfo100DebugTypedef = 7, + NonSemanticShaderDebugInfo100DebugTypeFunction = 8, + NonSemanticShaderDebugInfo100DebugTypeEnum = 9, + NonSemanticShaderDebugInfo100DebugTypeComposite = 10, + NonSemanticShaderDebugInfo100DebugTypeMember = 11, + NonSemanticShaderDebugInfo100DebugTypeInheritance = 12, + NonSemanticShaderDebugInfo100DebugTypePtrToMember = 13, + NonSemanticShaderDebugInfo100DebugTypeTemplate = 14, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameter = 15, + NonSemanticShaderDebugInfo100DebugTypeTemplateTemplateParameter = 16, + NonSemanticShaderDebugInfo100DebugTypeTemplateParameterPack = 17, + NonSemanticShaderDebugInfo100DebugGlobalVariable = 18, + NonSemanticShaderDebugInfo100DebugFunctionDeclaration = 19, + NonSemanticShaderDebugInfo100DebugFunction = 20, + NonSemanticShaderDebugInfo100DebugLexicalBlock = 21, + NonSemanticShaderDebugInfo100DebugLexicalBlockDiscriminator = 22, + NonSemanticShaderDebugInfo100DebugScope = 23, + NonSemanticShaderDebugInfo100DebugNoScope = 24, + NonSemanticShaderDebugInfo100DebugInlinedAt = 25, + NonSemanticShaderDebugInfo100DebugLocalVariable = 26, + NonSemanticShaderDebugInfo100DebugInlinedVariable = 27, + NonSemanticShaderDebugInfo100DebugDeclare = 28, + NonSemanticShaderDebugInfo100DebugValue = 29, + NonSemanticShaderDebugInfo100DebugOperation = 30, + NonSemanticShaderDebugInfo100DebugExpression = 31, + NonSemanticShaderDebugInfo100DebugMacroDef = 32, + NonSemanticShaderDebugInfo100DebugMacroUndef = 33, + NonSemanticShaderDebugInfo100DebugImportedEntity = 34, + NonSemanticShaderDebugInfo100DebugSource = 35, + NonSemanticShaderDebugInfo100DebugFunctionDefinition = 101, + NonSemanticShaderDebugInfo100DebugSourceContinued = 102, + NonSemanticShaderDebugInfo100DebugLine = 103, + NonSemanticShaderDebugInfo100DebugNoLine = 104, + NonSemanticShaderDebugInfo100DebugBuildIdentifier = 105, + NonSemanticShaderDebugInfo100DebugStoragePath = 106, + NonSemanticShaderDebugInfo100DebugEntryPoint = 107, + NonSemanticShaderDebugInfo100DebugTypeMatrix = 108, + NonSemanticShaderDebugInfo100InstructionsMax = 0x7fffffff +}; + + +enum NonSemanticShaderDebugInfo100DebugInfoFlags { + NonSemanticShaderDebugInfo100None = 0x0000, + NonSemanticShaderDebugInfo100FlagIsProtected = 0x01, + NonSemanticShaderDebugInfo100FlagIsPrivate = 0x02, + NonSemanticShaderDebugInfo100FlagIsPublic = 0x03, + NonSemanticShaderDebugInfo100FlagIsLocal = 0x04, + NonSemanticShaderDebugInfo100FlagIsDefinition = 0x08, + NonSemanticShaderDebugInfo100FlagFwdDecl = 0x10, + NonSemanticShaderDebugInfo100FlagArtificial = 0x20, + NonSemanticShaderDebugInfo100FlagExplicit = 0x40, + NonSemanticShaderDebugInfo100FlagPrototyped = 0x80, + NonSemanticShaderDebugInfo100FlagObjectPointer = 0x100, + NonSemanticShaderDebugInfo100FlagStaticMember = 0x200, + NonSemanticShaderDebugInfo100FlagIndirectVariable = 0x400, + NonSemanticShaderDebugInfo100FlagLValueReference = 0x800, + NonSemanticShaderDebugInfo100FlagRValueReference = 0x1000, + NonSemanticShaderDebugInfo100FlagIsOptimized = 0x2000, + NonSemanticShaderDebugInfo100FlagIsEnumClass = 0x4000, + NonSemanticShaderDebugInfo100FlagTypePassByValue = 0x8000, + NonSemanticShaderDebugInfo100FlagTypePassByReference = 0x10000, + NonSemanticShaderDebugInfo100FlagUnknownPhysicalLayout = 0x20000, + NonSemanticShaderDebugInfo100DebugInfoFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100BuildIdentifierFlags { + NonSemanticShaderDebugInfo100IdentifierPossibleDuplicates = 0x01, + NonSemanticShaderDebugInfo100BuildIdentifierFlagsMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncoding { + NonSemanticShaderDebugInfo100Unspecified = 0, + NonSemanticShaderDebugInfo100Address = 1, + NonSemanticShaderDebugInfo100Boolean = 2, + NonSemanticShaderDebugInfo100Float = 3, + NonSemanticShaderDebugInfo100Signed = 4, + NonSemanticShaderDebugInfo100SignedChar = 5, + NonSemanticShaderDebugInfo100Unsigned = 6, + NonSemanticShaderDebugInfo100UnsignedChar = 7, + NonSemanticShaderDebugInfo100DebugBaseTypeAttributeEncodingMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugCompositeType { + NonSemanticShaderDebugInfo100Class = 0, + NonSemanticShaderDebugInfo100Structure = 1, + NonSemanticShaderDebugInfo100Union = 2, + NonSemanticShaderDebugInfo100DebugCompositeTypeMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugTypeQualifier { + NonSemanticShaderDebugInfo100ConstType = 0, + NonSemanticShaderDebugInfo100VolatileType = 1, + NonSemanticShaderDebugInfo100RestrictType = 2, + NonSemanticShaderDebugInfo100AtomicType = 3, + NonSemanticShaderDebugInfo100DebugTypeQualifierMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugOperation { + NonSemanticShaderDebugInfo100Deref = 0, + NonSemanticShaderDebugInfo100Plus = 1, + NonSemanticShaderDebugInfo100Minus = 2, + NonSemanticShaderDebugInfo100PlusUconst = 3, + NonSemanticShaderDebugInfo100BitPiece = 4, + NonSemanticShaderDebugInfo100Swap = 5, + NonSemanticShaderDebugInfo100Xderef = 6, + NonSemanticShaderDebugInfo100StackValue = 7, + NonSemanticShaderDebugInfo100Constu = 8, + NonSemanticShaderDebugInfo100Fragment = 9, + NonSemanticShaderDebugInfo100DebugOperationMax = 0x7fffffff +}; + +enum NonSemanticShaderDebugInfo100DebugImportedEntity { + NonSemanticShaderDebugInfo100ImportedModule = 0, + NonSemanticShaderDebugInfo100ImportedDeclaration = 1, + NonSemanticShaderDebugInfo100DebugImportedEntityMax = 0x7fffffff +}; + + +#ifdef __cplusplus +} +#endif + +#endif // SPIRV_UNIFIED1_NonSemanticShaderDebugInfo100_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SPVRemapper.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SPVRemapper.h new file mode 100644 index 0000000..d216946 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SPVRemapper.h @@ -0,0 +1,312 @@ +// +// Copyright (C) 2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef SPIRVREMAPPER_H +#define SPIRVREMAPPER_H + +#include +#include +#include +#include + +namespace spv { + +// MSVC defines __cplusplus as an older value, even when it supports almost all of 11. +// We handle that here by making our own symbol. +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1700) +# define use_cpp11 1 +#endif + +class spirvbin_base_t +{ +public: + enum Options { + NONE = 0, + STRIP = (1<<0), + MAP_TYPES = (1<<1), + MAP_NAMES = (1<<2), + MAP_FUNCS = (1<<3), + DCE_FUNCS = (1<<4), + DCE_VARS = (1<<5), + DCE_TYPES = (1<<6), + OPT_LOADSTORE = (1<<7), + OPT_FWD_LS = (1<<8), // EXPERIMENTAL: PRODUCES INVALID SCHEMA-0 SPIRV + MAP_ALL = (MAP_TYPES | MAP_NAMES | MAP_FUNCS), + DCE_ALL = (DCE_FUNCS | DCE_VARS | DCE_TYPES), + OPT_ALL = (OPT_LOADSTORE), + + ALL_BUT_STRIP = (MAP_ALL | DCE_ALL | OPT_ALL), + DO_EVERYTHING = (STRIP | ALL_BUT_STRIP) + }; +}; + +} // namespace SPV + +#if !defined (use_cpp11) +#include +#include + +namespace spv { +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int /*verbose = 0*/) { } + + void remap(std::vector& /*spv*/, unsigned int /*opts = 0*/) + { + printf("Tool not compiled for C++11, which is required for SPIR-V remapping.\n"); + exit(5); + } +}; + +} // namespace SPV + +#else // defined (use_cpp11) + +#include +#include +#include +#include +#include +#include +#include + +#include "spirv.hpp" +#include "spvIR.h" + +namespace spv { + +// class to hold SPIR-V binary data for remapping, DCE, and debug stripping +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int verbose = 0) : entryPoint(spv::NoResult), largestNewId(0), verbose(verbose), errorLatch(false) + { } + + virtual ~spirvbin_t() { } + + // remap on an existing binary in memory + void remap(std::vector& spv, const std::vector& whiteListStrings, + std::uint32_t opts = DO_EVERYTHING); + + // remap on an existing binary in memory - legacy interface without white list + void remap(std::vector& spv, std::uint32_t opts = DO_EVERYTHING); + + // Type for error/log handler functions + typedef std::function errorfn_t; + typedef std::function logfn_t; + + // Register error/log handling functions (can be lambda fn / functor / etc) + static void registerErrorHandler(errorfn_t handler) { errorHandler = handler; } + static void registerLogHandler(logfn_t handler) { logHandler = handler; } + +protected: + // This can be overridden to provide other message behavior if needed + virtual void msg(int minVerbosity, int indent, const std::string& txt) const; + +private: + // Local to global, or global to local ID map + typedef std::unordered_map idmap_t; + typedef std::unordered_set idset_t; + typedef std::unordered_map blockmap_t; + + void remap(std::uint32_t opts = DO_EVERYTHING); + + // Map of names to IDs + typedef std::unordered_map namemap_t; + + typedef std::uint32_t spirword_t; + + typedef std::pair range_t; + typedef std::function idfn_t; + typedef std::function instfn_t; + + // Special Values for ID map: + static const spv::Id unmapped; // unchanged from default value + static const spv::Id unused; // unused ID + static const int header_size; // SPIR header = 5 words + + class id_iterator_t; + + // For mapping type entries between different shaders + typedef std::vector typeentry_t; + typedef std::map globaltypes_t; + + // A set that preserves position order, and a reverse map + typedef std::set posmap_t; + typedef std::unordered_map posmap_rev_t; + + // Maps and ID to the size of its base type, if known. + typedef std::unordered_map typesize_map_t; + + // handle error + void error(const std::string& txt) const { errorLatch = true; errorHandler(txt); } + + bool isConstOp(spv::Op opCode) const; + bool isTypeOp(spv::Op opCode) const; + bool isStripOp(spv::Op opCode) const; + bool isFlowCtrl(spv::Op opCode) const; + range_t literalRange(spv::Op opCode) const; + range_t typeRange(spv::Op opCode) const; + range_t constRange(spv::Op opCode) const; + unsigned typeSizeInWords(spv::Id id) const; + unsigned idTypeSizeInWords(spv::Id id) const; + + bool isStripOp(spv::Op opCode, unsigned start) const; + + spv::Id& asId(unsigned word) { return spv[word]; } + const spv::Id& asId(unsigned word) const { return spv[word]; } + spv::Op asOpCode(unsigned word) const { return opOpCode(spv[word]); } + std::uint32_t asOpCodeHash(unsigned word); + spv::Decoration asDecoration(unsigned word) const { return spv::Decoration(spv[word]); } + unsigned asWordCount(unsigned word) const { return opWordCount(spv[word]); } + spv::Id asTypeConstId(unsigned word) const { return asId(word + (isTypeOp(asOpCode(word)) ? 1 : 2)); } + unsigned idPos(spv::Id id) const; + + static unsigned opWordCount(spirword_t data) { return data >> spv::WordCountShift; } + static spv::Op opOpCode(spirword_t data) { return spv::Op(data & spv::OpCodeMask); } + + // Header access & set methods + spirword_t magic() const { return spv[0]; } // return magic number + spirword_t bound() const { return spv[3]; } // return Id bound from header + spirword_t bound(spirword_t b) { return spv[3] = b; } + spirword_t genmagic() const { return spv[2]; } // generator magic + spirword_t genmagic(spirword_t m) { return spv[2] = m; } + spirword_t schemaNum() const { return spv[4]; } // schema number from header + + // Mapping fns: get + spv::Id localId(spv::Id id) const { return idMapL[id]; } + + // Mapping fns: set + inline spv::Id localId(spv::Id id, spv::Id newId); + void countIds(spv::Id id); + + // Return next unused new local ID. + // NOTE: boost::dynamic_bitset would be more efficient due to find_next(), + // which std::vector doens't have. + inline spv::Id nextUnusedId(spv::Id id); + + void buildLocalMaps(); + std::string literalString(unsigned word) const; // Return literal as a std::string + int literalStringWords(const std::string& str) const { return (int(str.size())+4)/4; } + + bool isNewIdMapped(spv::Id newId) const { return isMapped(newId); } + bool isOldIdUnmapped(spv::Id oldId) const { return localId(oldId) == unmapped; } + bool isOldIdUnused(spv::Id oldId) const { return localId(oldId) == unused; } + bool isOldIdMapped(spv::Id oldId) const { return !isOldIdUnused(oldId) && !isOldIdUnmapped(oldId); } + bool isFunction(spv::Id oldId) const { return fnPos.find(oldId) != fnPos.end(); } + + // bool matchType(const globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const; + // spv::Id findType(const globaltypes_t& globalTypes, spv::Id lt) const; + std::uint32_t hashType(unsigned typeStart) const; + + spirvbin_t& process(instfn_t, idfn_t, unsigned begin = 0, unsigned end = 0); + int processInstruction(unsigned word, instfn_t, idfn_t); + + void validate() const; + void mapTypeConst(); + void mapFnBodies(); + void optLoadStore(); + void dceFuncs(); + void dceVars(); + void dceTypes(); + void mapNames(); + void foldIds(); // fold IDs to smallest space + void forwardLoadStores(); // load store forwarding (EXPERIMENTAL) + void offsetIds(); // create relative offset IDs + + void applyMap(); // remap per local name map + void mapRemainder(); // map any IDs we haven't touched yet + void stripDebug(); // strip all debug info + void stripDeadRefs(); // strips debug info for now-dead references after DCE + void strip(); // remove debug symbols + + std::vector spv; // SPIR words + + std::vector stripWhiteList; + + namemap_t nameMap; // ID names from OpName + + // Since we want to also do binary ops, we can't use std::vector. we could use + // boost::dynamic_bitset, but we're trying to avoid a boost dependency. + typedef std::uint64_t bits_t; + std::vector mapped; // which new IDs have been mapped + static const int mBits = sizeof(bits_t) * 4; + + bool isMapped(spv::Id id) const { return id < maxMappedId() && ((mapped[id/mBits] & (1LL<<(id%mBits))) != 0); } + void setMapped(spv::Id id) { resizeMapped(id); mapped[id/mBits] |= (1LL<<(id%mBits)); } + void resizeMapped(spv::Id id) { if (id >= maxMappedId()) mapped.resize(id/mBits+1, 0); } + size_t maxMappedId() const { return mapped.size() * mBits; } + + // Add a strip range for a given instruction starting at 'start' + // Note: avoiding brace initializers to please older versions os MSVC. + void stripInst(unsigned start) { stripRange.push_back(range_t(start, start + asWordCount(start))); } + + // Function start and end. use unordered_map because we'll have + // many fewer functions than IDs. + std::unordered_map fnPos; + + // Which functions are called, anywhere in the module, with a call count + std::unordered_map fnCalls; + + posmap_t typeConstPos; // word positions that define types & consts (ordered) + posmap_rev_t idPosR; // reverse map from IDs to positions + typesize_map_t idTypeSizeMap; // maps each ID to its type size, if known. + + std::vector idMapL; // ID {M}ap from {L}ocal to {G}lobal IDs + + spv::Id entryPoint; // module entry point + spv::Id largestNewId; // biggest new ID we have mapped anything to + + // Sections of the binary to strip, given as [begin,end) + std::vector stripRange; + + // processing options: + std::uint32_t options; + int verbose; // verbosity level + + // Error latch: this is set if the error handler is ever executed. It would be better to + // use a try/catch block and throw, but that's not desired for certain environments, so + // this is the alternative. + mutable bool errorLatch; + + static errorfn_t errorHandler; + static logfn_t logHandler; +}; + +} // namespace SPV + +#endif // defined (use_cpp11) +#endif // SPIRVREMAPPER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SpvBuilder.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SpvBuilder.h new file mode 100644 index 0000000..f7fdc6a --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SpvBuilder.h @@ -0,0 +1,959 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// Copyright (C) 2015-2020 Google, Inc. +// Copyright (C) 2017 ARM Limited. +// Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// "Builder" is an interface to fully build SPIR-V IR. Allocate one of +// these to build (a thread safe) internal SPIR-V representation (IR), +// and then dump it as a binary stream according to the SPIR-V specification. +// +// A Builder has a 1:1 relationship with a SPIR-V module. +// + +#pragma once +#ifndef SpvBuilder_H +#define SpvBuilder_H + +#include "Logger.h" +#include "spirv.hpp" +#include "spvIR.h" +namespace spv { + #include "GLSL.ext.KHR.h" + #include "NonSemanticShaderDebugInfo100.h" +} + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +typedef enum { + Spv_1_0 = (1 << 16), + Spv_1_1 = (1 << 16) | (1 << 8), + Spv_1_2 = (1 << 16) | (2 << 8), + Spv_1_3 = (1 << 16) | (3 << 8), + Spv_1_4 = (1 << 16) | (4 << 8), + Spv_1_5 = (1 << 16) | (5 << 8), +} SpvVersion; + +class Builder { +public: + Builder(unsigned int spvVersion, unsigned int userNumber, SpvBuildLogger* logger); + virtual ~Builder(); + + static const int maxMatrixSize = 4; + + unsigned int getSpvVersion() const { return spvVersion; } + + void setSource(spv::SourceLanguage lang, int version) + { + sourceLang = lang; + sourceVersion = version; + } + spv::Id getStringId(const std::string& str) + { + auto sItr = stringIds.find(str); + if (sItr != stringIds.end()) + return sItr->second; + spv::Id strId = getUniqueId(); + Instruction* fileString = new Instruction(strId, NoType, OpString); + const char* file_c_str = str.c_str(); + fileString->addStringOperand(file_c_str); + strings.push_back(std::unique_ptr(fileString)); + module.mapInstruction(fileString); + stringIds[file_c_str] = strId; + return strId; + } + spv::Id getSourceFile() const + { + return sourceFileStringId; + } + void setSourceFile(const std::string& file) + { + sourceFileStringId = getStringId(file); + currentFileId = sourceFileStringId; + } + void setSourceText(const std::string& text) { sourceText = text; } + void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); } + void addModuleProcessed(const std::string& p) { moduleProcesses.push_back(p.c_str()); } + void setEmitOpLines() { emitOpLines = true; } + void setEmitNonSemanticShaderDebugInfo(bool const emit) + { + emitNonSemanticShaderDebugInfo = emit; + + if(emit) + { + importNonSemanticShaderDebugInfoInstructions(); + } + } + void setEmitNonSemanticShaderDebugSource(bool const src) + { + emitNonSemanticShaderDebugSource = src; + } + void addExtension(const char* ext) { extensions.insert(ext); } + void removeExtension(const char* ext) + { + extensions.erase(ext); + } + void addIncorporatedExtension(const char* ext, SpvVersion incorporatedVersion) + { + if (getSpvVersion() < static_cast(incorporatedVersion)) + addExtension(ext); + } + void promoteIncorporatedExtension(const char* baseExt, const char* promoExt, SpvVersion incorporatedVersion) + { + removeExtension(baseExt); + addIncorporatedExtension(promoExt, incorporatedVersion); + } + void addInclude(const std::string& name, const std::string& text) + { + spv::Id incId = getStringId(name); + includeFiles[incId] = &text; + } + Id import(const char*); + void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem) + { + addressModel = addr; + memoryModel = mem; + } + + void addCapability(spv::Capability cap) { capabilities.insert(cap); } + + // To get a new for anything needing a new one. + Id getUniqueId() { return ++uniqueId; } + + // To get a set of new s, e.g., for a set of function parameters + Id getUniqueIds(int numIds) + { + Id id = uniqueId + 1; + uniqueId += numIds; + return id; + } + + // Generate OpLine for non-filename-based #line directives (ie no filename + // seen yet): Log the current line, and if different than the last one, + // issue a new OpLine using the new line and current source file name. + void setLine(int line); + + // If filename null, generate OpLine for non-filename-based line directives, + // else do filename-based: Log the current line and file, and if different + // than the last one, issue a new OpLine using the new line and file + // name. + void setLine(int line, const char* filename); + // Low-level OpLine. See setLine() for a layered helper. + void addLine(Id fileName, int line, int column); + void addDebugScopeAndLine(Id fileName, int line, int column); + + // For creating new types (will return old type if the requested one was already made). + Id makeVoidType(); + Id makeBoolType(bool const compilerGenerated = true); + Id makePointer(StorageClass, Id pointee); + Id makeForwardPointer(StorageClass); + Id makePointerFromForwardPointer(StorageClass, Id forwardPointerType, Id pointee); + Id makeIntegerType(int width, bool hasSign); // generic + Id makeIntType(int width) { return makeIntegerType(width, true); } + Id makeUintType(int width) { return makeIntegerType(width, false); } + Id makeFloatType(int width); + Id makeStructType(const std::vector& members, const char* name, bool const compilerGenerated = true); + Id makeStructResultType(Id type0, Id type1); + Id makeVectorType(Id component, int size); + Id makeMatrixType(Id component, int cols, int rows); + Id makeArrayType(Id element, Id sizeId, int stride); // 0 stride means no stride decoration + Id makeRuntimeArray(Id element); + Id makeFunctionType(Id returnType, const std::vector& paramTypes); + Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format); + Id makeSamplerType(); + Id makeSampledImageType(Id imageType); + Id makeCooperativeMatrixType(Id component, Id scope, Id rows, Id cols); + Id makeGenericType(spv::Op opcode, std::vector& operands); + + // SPIR-V NonSemantic Shader DebugInfo Instructions + struct DebugTypeLoc { + std::string name {}; + int line {0}; + int column {0}; + }; + std::unordered_map debugTypeLocs; + Id makeDebugInfoNone(); + Id makeBoolDebugType(int const size); + Id makeIntegerDebugType(int const width, bool const hasSign); + Id makeFloatDebugType(int const width); + Id makeSequentialDebugType(Id const baseType, Id const componentCount, NonSemanticShaderDebugInfo100Instructions const sequenceType); + Id makeArrayDebugType(Id const baseType, Id const componentCount); + Id makeVectorDebugType(Id const baseType, int const componentCount); + Id makeMatrixDebugType(Id const vectorType, int const vectorCount, bool columnMajor = true); + Id makeMemberDebugType(Id const memberType, DebugTypeLoc const& debugTypeLoc); + Id makeCompositeDebugType(std::vector const& memberTypes, char const*const name, + NonSemanticShaderDebugInfo100DebugCompositeType const tag, bool const isOpaqueType = false); + Id makeDebugSource(const Id fileName); + Id makeDebugCompilationUnit(); + Id createDebugGlobalVariable(Id const type, char const*const name, Id const variable); + Id createDebugLocalVariable(Id type, char const*const name, size_t const argNumber = 0); + Id makeDebugExpression(); + Id makeDebugDeclare(Id const debugLocalVariable, Id const localVariable); + Id makeDebugValue(Id const debugLocalVariable, Id const value); + Id makeDebugFunctionType(Id returnType, const std::vector& paramTypes); + Id makeDebugFunction(Function* function, Id nameId, Id funcTypeId); + Id makeDebugLexicalBlock(uint32_t line); + std::string unmangleFunctionName(std::string const& name) const; + + // accelerationStructureNV type + Id makeAccelerationStructureType(); + // rayQueryEXT type + Id makeRayQueryType(); + + // For querying about types. + Id getTypeId(Id resultId) const { return module.getTypeId(resultId); } + Id getDerefTypeId(Id resultId) const; + Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); } + Op getTypeClass(Id typeId) const { return getOpCode(typeId); } + Op getMostBasicTypeClass(Id typeId) const; + int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); } + int getNumTypeConstituents(Id typeId) const; + int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); } + Id getScalarTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId, int) const; + StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); } + ImageFormat getImageTypeFormat(Id typeId) const + { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); } + Id getResultingAccessChainType() const; + + bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); } + bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); } + bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); } + bool isMatrix(Id resultId) const { return isMatrixType(getTypeId(resultId)); } + bool isCooperativeMatrix(Id resultId)const { return isCooperativeMatrixType(getTypeId(resultId)); } + bool isAggregate(Id resultId) const { return isAggregateType(getTypeId(resultId)); } + bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); } + + bool isBoolType(Id typeId) + { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); } + bool isIntType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) != 0; } + bool isUintType(Id typeId) const + { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) == 0; } + bool isFloatType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat; } + bool isPointerType(Id typeId) const { return getTypeClass(typeId) == OpTypePointer; } + bool isScalarType(Id typeId) const + { return getTypeClass(typeId) == OpTypeFloat || getTypeClass(typeId) == OpTypeInt || + getTypeClass(typeId) == OpTypeBool; } + bool isVectorType(Id typeId) const { return getTypeClass(typeId) == OpTypeVector; } + bool isMatrixType(Id typeId) const { return getTypeClass(typeId) == OpTypeMatrix; } + bool isStructType(Id typeId) const { return getTypeClass(typeId) == OpTypeStruct; } + bool isArrayType(Id typeId) const { return getTypeClass(typeId) == OpTypeArray; } +#ifdef GLSLANG_WEB + bool isCooperativeMatrixType(Id typeId)const { return false; } +#else + bool isCooperativeMatrixType(Id typeId)const { return getTypeClass(typeId) == OpTypeCooperativeMatrixNV; } +#endif + bool isAggregateType(Id typeId) const + { return isArrayType(typeId) || isStructType(typeId) || isCooperativeMatrixType(typeId); } + bool isImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeImage; } + bool isSamplerType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampler; } + bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; } + bool containsType(Id typeId, Op typeOp, unsigned int width) const; + bool containsPhysicalStorageBufferOrArray(Id typeId) const; + + bool isConstantOpCode(Op opcode) const; + bool isSpecConstantOpCode(Op opcode) const; + bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); } + bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; } + bool isSpecConstant(Id resultId) const { return isSpecConstantOpCode(getOpCode(resultId)); } + unsigned int getConstantScalar(Id resultId) const + { return module.getInstruction(resultId)->getImmediateOperand(0); } + StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); } + + bool isVariableOpCode(Op opcode) const { return opcode == OpVariable; } + bool isVariable(Id resultId) const { return isVariableOpCode(getOpCode(resultId)); } + bool isGlobalStorage(Id resultId) const { return getStorageClass(resultId) != StorageClassFunction; } + bool isGlobalVariable(Id resultId) const { return isVariable(resultId) && isGlobalStorage(resultId); } + // See if a resultId is valid for use as an initializer. + bool isValidInitializer(Id resultId) const { return isConstant(resultId) || isGlobalVariable(resultId); } + + bool isRayTracingOpCode(Op opcode) const; + + int getScalarTypeWidth(Id typeId) const + { + Id scalarTypeId = getScalarTypeId(typeId); + assert(getTypeClass(scalarTypeId) == OpTypeInt || getTypeClass(scalarTypeId) == OpTypeFloat); + return module.getInstruction(scalarTypeId)->getImmediateOperand(0); + } + + int getTypeNumColumns(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeConstituents(typeId); + } + int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); } + int getTypeNumRows(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeComponents(getContainedTypeId(typeId)); + } + int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); } + + Dim getTypeDimensionality(Id typeId) const + { + assert(isImageType(typeId)); + return (Dim)module.getInstruction(typeId)->getImmediateOperand(1); + } + Id getImageType(Id resultId) const + { + Id typeId = getTypeId(resultId); + assert(isImageType(typeId) || isSampledImageType(typeId)); + return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId; + } + bool isArrayedImageType(Id typeId) const + { + assert(isImageType(typeId)); + return module.getInstruction(typeId)->getImmediateOperand(3) != 0; + } + + // For making new constants (will return old constant if the requested one was already made). + Id makeNullConstant(Id typeId); + Id makeBoolConstant(bool b, bool specConstant = false); + Id makeInt8Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(8), (unsigned)i, specConstant); } + Id makeUint8Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(8), u, specConstant); } + Id makeInt16Constant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(16), (unsigned)i, specConstant); } + Id makeUint16Constant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(16), u, specConstant); } + Id makeIntConstant(int i, bool specConstant = false) + { return makeIntConstant(makeIntType(32), (unsigned)i, specConstant); } + Id makeUintConstant(unsigned u, bool specConstant = false) + { return makeIntConstant(makeUintType(32), u, specConstant); } + Id makeInt64Constant(long long i, bool specConstant = false) + { return makeInt64Constant(makeIntType(64), (unsigned long long)i, specConstant); } + Id makeUint64Constant(unsigned long long u, bool specConstant = false) + { return makeInt64Constant(makeUintType(64), u, specConstant); } + Id makeFloatConstant(float f, bool specConstant = false); + Id makeDoubleConstant(double d, bool specConstant = false); + Id makeFloat16Constant(float f16, bool specConstant = false); + Id makeFpConstant(Id type, double d, bool specConstant = false); + + Id importNonSemanticShaderDebugInfoInstructions(); + + // Turn the array of constants into a proper spv constant of the requested type. + Id makeCompositeConstant(Id type, const std::vector& comps, bool specConst = false); + + // Methods for adding information outside the CFG. + Instruction* addEntryPoint(ExecutionModel, Function*, const char* name); + void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1); + void addExecutionMode(Function*, ExecutionMode mode, const std::vector& literals); + void addExecutionModeId(Function*, ExecutionMode mode, const std::vector& operandIds); + void addName(Id, const char* name); + void addMemberName(Id, int member, const char* name); + void addDecoration(Id, Decoration, int num = -1); + void addDecoration(Id, Decoration, const char*); + void addDecoration(Id, Decoration, const std::vector& literals); + void addDecoration(Id, Decoration, const std::vector& strings); + void addDecorationId(Id id, Decoration, Id idDecoration); + void addDecorationId(Id id, Decoration, const std::vector& operandIds); + void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1); + void addMemberDecoration(Id, unsigned int member, Decoration, const char*); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& literals); + void addMemberDecoration(Id, unsigned int member, Decoration, const std::vector& strings); + + // At the end of what block do the next create*() instructions go? + // Also reset current last DebugScope and current source line to unknown + void setBuildPoint(Block* bp) { + buildPoint = bp; + lastDebugScopeId = NoResult; + currentLine = 0; + } + Block* getBuildPoint() const { return buildPoint; } + + // Make the entry-point function. The returned pointer is only valid + // for the lifetime of this builder. + Function* makeEntryPoint(const char*); + + // Make a shader-style function, and create its entry block if entry is non-zero. + // Return the function, pass back the entry. + // The returned pointer is only valid for the lifetime of this builder. + Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, + const std::vector& paramTypes, const std::vector& paramNames, + const std::vector>& precisions, Block **entry = 0); + + // Create a return. An 'implicit' return is one not appearing in the source + // code. In the case of an implicit return, no post-return block is inserted. + void makeReturn(bool implicit, Id retVal = 0); + + // Initialize state and generate instructions for new lexical scope + void enterScope(uint32_t line); + + // Set state and generate instructions to exit current lexical scope + void leaveScope(); + + // Prepare builder for generation of instructions for a function. + void enterFunction(Function const* function); + + // Generate all the code needed to finish up a function. + void leaveFunction(); + + // Create block terminator instruction for certain statements like + // discard, terminate-invocation, terminateRayEXT, or ignoreIntersectionEXT + void makeStatementTerminator(spv::Op opcode, const char *name); + + // Create block terminator instruction for statements that have input operands + // such as OpEmitMeshTasksEXT + void makeStatementTerminator(spv::Op opcode, const std::vector& operands, const char* name); + + // Create a global or function local or IO variable. + Id createVariable(Decoration precision, StorageClass storageClass, Id type, const char* name = nullptr, + Id initializer = NoResult, bool const compilerGenerated = true); + + // Create an intermediate with an undefined value. + Id createUndefined(Id type); + + // Store into an Id and return the l-value + void createStore(Id rValue, Id lValue, spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Load from an Id and return it + Id createLoad(Id lValue, spv::Decoration precision, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // Create an OpAccessChain instruction + Id createAccessChain(StorageClass, Id base, const std::vector& offsets); + + // Create an OpArrayLength instruction + Id createArrayLength(Id base, unsigned int member); + + // Create an OpCooperativeMatrixLengthNV instruction + Id createCooperativeMatrixLength(Id type); + + // Create an OpCompositeExtract instruction + Id createCompositeExtract(Id composite, Id typeId, unsigned index); + Id createCompositeExtract(Id composite, Id typeId, const std::vector& indexes); + Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index); + Id createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes); + + Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex); + Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex); + + void createNoResultOp(Op); + void createNoResultOp(Op, Id operand); + void createNoResultOp(Op, const std::vector& operands); + void createNoResultOp(Op, const std::vector& operands); + void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask); + void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics); + Id createUnaryOp(Op, Id typeId, Id operand); + Id createBinOp(Op, Id typeId, Id operand1, Id operand2); + Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createFunctionCall(spv::Function*, const std::vector&); + Id createSpecConstantOp(Op, Id typeId, const std::vector& operands, const std::vector& literals); + + // Take an rvalue (source) and a set of channels to extract from it to + // make a new rvalue, which is returned. + Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels); + + // Take a copy of an lvalue (target) and a source of components, and set the + // source components into the lvalue where the 'channels' say to put them. + // An updated version of the target is returned. + // (No true lvalue or stores are used.) + Id createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels); + + // If both the id and precision are valid, the id + // gets tagged with the requested precision. + // The passed in id is always the returned id, to simplify use patterns. + Id setPrecision(Id id, Decoration precision) + { + if (precision != NoPrecision && id != NoResult) + addDecoration(id, precision); + + return id; + } + + // Can smear a scalar to a vector for the following forms: + // - promoteScalar(scalar, vector) // smear scalar to width of vector + // - promoteScalar(vector, scalar) // smear scalar to width of vector + // - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to + // - promoteScalar(scalar, scalar) // do nothing + // Other forms are not allowed. + // + // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'. + // The type of the created vector is a vector of components of the same type as the scalar. + // + // Note: One of the arguments will change, with the result coming back that way rather than + // through the return value. + void promoteScalar(Decoration precision, Id& left, Id& right); + + // Make a value by smearing the scalar to fill the type. + // vectorType should be the correct type for making a vector of scalarVal. + // (No conversions are done.) + Id smearScalar(Decoration precision, Id scalarVal, Id vectorType); + + // Create a call to a built-in function. + Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args); + + // List of parameters used to create a texture operation + struct TextureParameters { + Id sampler; + Id coords; + Id bias; + Id lod; + Id Dref; + Id offset; + Id offsets; + Id gradX; + Id gradY; + Id sample; + Id component; + Id texelOut; + Id lodClamp; + Id granularity; + Id coarse; + bool nonprivate; + bool volatil; + }; + + // Select the correct texture operation based on all inputs, and emit the correct instruction + Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, + bool noImplicit, const TextureParameters&, ImageOperandsMask); + + // Emit the OpTextureQuery* instruction that was passed in. + // Figure out the right return value and type, and return it. + Id createTextureQueryCall(Op, const TextureParameters&, bool isUnsignedResult); + + Id createSamplePositionCall(Decoration precision, Id, Id); + + Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned); + Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id); + + // Reduction comparison for composites: For equal and not-equal resulting in a scalar. + Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */); + + // OpCompositeConstruct + Id createCompositeConstruct(Id typeId, const std::vector& constituents); + + // vector or scalar constructor + Id createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId); + + // matrix constructor + Id createMatrixConstructor(Decoration precision, const std::vector& sources, Id constructee); + + // Helper to use for building nested control flow with if-then-else. + class If { + public: + If(Id condition, unsigned int ctrl, Builder& builder); + ~If() {} + + void makeBeginElse(); + void makeEndIf(); + + private: + If(const If&); + If& operator=(If&); + + Builder& builder; + Id condition; + unsigned int control; + Function* function; + Block* headerBlock; + Block* thenBlock; + Block* elseBlock; + Block* mergeBlock; + }; + + // Make a switch statement. A switch has 'numSegments' of pieces of code, not containing + // any case/default labels, all separated by one or more case/default labels. Each possible + // case value v is a jump to the caseValues[v] segment. The defaultSegment is also in this + // number space. How to compute the value is given by 'condition', as in switch(condition). + // + // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches. + // + // Use a defaultSegment < 0 if there is no default segment (to branch to post switch). + // + // Returns the right set of basic blocks to start each code segment with, so that the caller's + // recursion stack can hold the memory for it. + // + void makeSwitch(Id condition, unsigned int control, int numSegments, const std::vector& caseValues, + const std::vector& valueToSegment, int defaultSegment, std::vector& segmentBB); + + // Add a branch to the innermost switch's merge block. + void addSwitchBreak(); + + // Move to the next code segment, passing in the return argument in makeSwitch() + void nextSwitchSegment(std::vector& segmentBB, int segment); + + // Finish off the innermost switch. + void endSwitch(std::vector& segmentBB); + + struct LoopBlocks { + LoopBlocks(Block& head, Block& body, Block& merge, Block& continue_target) : + head(head), body(body), merge(merge), continue_target(continue_target) { } + Block &head, &body, &merge, &continue_target; + private: + LoopBlocks(); + LoopBlocks& operator=(const LoopBlocks&) = delete; + }; + + // Start a new loop and prepare the builder to generate code for it. Until + // closeLoop() is called for this loop, createLoopContinue() and + // createLoopExit() will target its corresponding blocks. + LoopBlocks& makeNewLoop(); + + // Create a new block in the function containing the build point. Memory is + // owned by the function object. + Block& makeNewBlock(); + + // Add a branch to the continue_target of the current (innermost) loop. + void createLoopContinue(); + + // Add an exit (e.g. "break") from the innermost loop that we're currently + // in. + void createLoopExit(); + + // Close the innermost loop that you're in + void closeLoop(); + + // + // Access chain design for an R-Value vs. L-Value: + // + // There is a single access chain the builder is building at + // any particular time. Such a chain can be used to either to a load or + // a store, when desired. + // + // Expressions can be r-values, l-values, or both, or only r-values: + // a[b.c].d = .... // l-value + // ... = a[b.c].d; // r-value, that also looks like an l-value + // ++a[b.c].d; // r-value and l-value + // (x + y)[2]; // r-value only, can't possibly be l-value + // + // Computing an r-value means generating code. Hence, + // r-values should only be computed when they are needed, not speculatively. + // + // Computing an l-value means saving away information for later use in the compiler, + // no code is generated until the l-value is later dereferenced. It is okay + // to speculatively generate an l-value, just not okay to speculatively dereference it. + // + // The base of the access chain (the left-most variable or expression + // from which everything is based) can be set either as an l-value + // or as an r-value. Most efficient would be to set an l-value if one + // is available. If an expression was evaluated, the resulting r-value + // can be set as the chain base. + // + // The users of this single access chain can save and restore if they + // want to nest or manage multiple chains. + // + + struct AccessChain { + Id base; // for l-values, pointer to the base object, for r-values, the base object + std::vector indexChain; + Id instr; // cache the instruction that generates this access chain + std::vector swizzle; // each std::vector element selects the next GLSL component number + Id component; // a dynamic component index, can coexist with a swizzle, + // done after the swizzle, NoResult if not present + Id preSwizzleBaseType; // dereferenced type, before swizzle or component is applied; + // NoType unless a swizzle or component is present + bool isRValue; // true if 'base' is an r-value, otherwise, base is an l-value + unsigned int alignment; // bitwise OR of alignment values passed in. Accumulates worst alignment. + // Only tracks base and (optional) component selection alignment. + + // Accumulate whether anything in the chain of structures has coherent decorations. + struct CoherentFlags { + CoherentFlags() { clear(); } +#ifdef GLSLANG_WEB + void clear() { } + bool isVolatile() const { return false; } + CoherentFlags operator |=(const CoherentFlags &other) { return *this; } +#else + bool isVolatile() const { return volatil; } + bool isNonUniform() const { return nonUniform; } + bool anyCoherent() const { + return coherent || devicecoherent || queuefamilycoherent || workgroupcoherent || + subgroupcoherent || shadercallcoherent; + } + + unsigned coherent : 1; + unsigned devicecoherent : 1; + unsigned queuefamilycoherent : 1; + unsigned workgroupcoherent : 1; + unsigned subgroupcoherent : 1; + unsigned shadercallcoherent : 1; + unsigned nonprivate : 1; + unsigned volatil : 1; + unsigned isImage : 1; + unsigned nonUniform : 1; + + void clear() { + coherent = 0; + devicecoherent = 0; + queuefamilycoherent = 0; + workgroupcoherent = 0; + subgroupcoherent = 0; + shadercallcoherent = 0; + nonprivate = 0; + volatil = 0; + isImage = 0; + nonUniform = 0; + } + + CoherentFlags operator |=(const CoherentFlags &other) { + coherent |= other.coherent; + devicecoherent |= other.devicecoherent; + queuefamilycoherent |= other.queuefamilycoherent; + workgroupcoherent |= other.workgroupcoherent; + subgroupcoherent |= other.subgroupcoherent; + shadercallcoherent |= other.shadercallcoherent; + nonprivate |= other.nonprivate; + volatil |= other.volatil; + isImage |= other.isImage; + nonUniform |= other.nonUniform; + return *this; + } +#endif + }; + CoherentFlags coherentFlags; + }; + + // + // the SPIR-V builder maintains a single active chain that + // the following methods operate on + // + + // for external save and restore + AccessChain getAccessChain() { return accessChain; } + void setAccessChain(AccessChain newChain) { accessChain = newChain; } + + // clear accessChain + void clearAccessChain(); + + // set new base as an l-value base + void setAccessChainLValue(Id lValue) + { + assert(isPointer(lValue)); + accessChain.base = lValue; + } + + // set new base value as an r-value + void setAccessChainRValue(Id rValue) + { + accessChain.isRValue = true; + accessChain.base = rValue; + } + + // push offset onto the end of the chain + void accessChainPush(Id offset, AccessChain::CoherentFlags coherentFlags, unsigned int alignment) + { + accessChain.indexChain.push_back(offset); + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // push new swizzle onto the end of any existing swizzle, merging into a single swizzle + void accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType, + AccessChain::CoherentFlags coherentFlags, unsigned int alignment); + + // push a dynamic component selection onto the access chain, only applicable with a + // non-trivial swizzle or no swizzle + void accessChainPushComponent(Id component, Id preSwizzleBaseType, AccessChain::CoherentFlags coherentFlags, + unsigned int alignment) + { + if (accessChain.swizzle.size() != 1) { + accessChain.component = component; + if (accessChain.preSwizzleBaseType == NoType) + accessChain.preSwizzleBaseType = preSwizzleBaseType; + } + accessChain.coherentFlags |= coherentFlags; + accessChain.alignment |= alignment; + } + + // use accessChain and swizzle to store value + void accessChainStore(Id rvalue, Decoration nonUniform, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, + spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0); + + // use accessChain and swizzle to load an r-value + Id accessChainLoad(Decoration precision, Decoration l_nonUniform, Decoration r_nonUniform, Id ResultType, + spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, spv::Scope scope = spv::ScopeMax, + unsigned int alignment = 0); + + // Return whether or not the access chain can be represented in SPIR-V + // as an l-value. + // E.g., a[3].yx cannot be, while a[3].y and a[3].y[x] can be. + bool isSpvLvalue() const { return accessChain.swizzle.size() <= 1; } + + // get the direct pointer for an l-value + Id accessChainGetLValue(); + + // Get the inferred SPIR-V type of the result of the current access chain, + // based on the type of the base and the chain of dereferences. + Id accessChainGetInferredType(); + + // Add capabilities, extensions, remove unneeded decorations, etc., + // based on the resulting SPIR-V. + void postProcess(); + + // Prune unreachable blocks in the CFG and remove unneeded decorations. + void postProcessCFG(); + +#ifndef GLSLANG_WEB + // Add capabilities, extensions based on instructions in the module. + void postProcessFeatures(); + // Hook to visit each instruction in a block in a function + void postProcess(Instruction&); + // Hook to visit each non-32-bit sized float/int operation in a block. + void postProcessType(const Instruction&, spv::Id typeId); +#endif + + void dump(std::vector&) const; + + void createBranch(Block* block); + void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); + void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, + const std::vector& operands); + + // Sets to generate opcode for specialization constants. + void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; } + // Sets to generate opcode for non-specialization constants (normal mode). + void setToNormalCodeGenMode() { generatingOpCodeForSpecConst = false; } + // Check if the builder is generating code for spec constants. + bool isInSpecConstCodeGenMode() { return generatingOpCodeForSpecConst; } + + protected: + Id makeIntConstant(Id typeId, unsigned value, bool specConstant); + Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2); + Id findCompositeConstant(Op typeClass, Id typeId, const std::vector& comps); + Id findStructConstant(Id typeId, const std::vector& comps); + Id collapseAccessChain(); + void remapDynamicSwizzle(); + void transferAccessChainSwizzle(bool dynamic); + void simplifyAccessChainSwizzle(); + void createAndSetNoPredecessorBlock(const char*); + void createSelectionMerge(Block* mergeBlock, unsigned int control); + void dumpSourceInstructions(std::vector&) const; + void dumpSourceInstructions(const spv::Id fileId, const std::string& text, std::vector&) const; + void dumpInstructions(std::vector&, const std::vector >&) const; + void dumpModuleProcesses(std::vector&) const; + spv::MemoryAccessMask sanitizeMemoryAccessForStorageClass(spv::MemoryAccessMask memoryAccess, StorageClass sc) + const; + + unsigned int spvVersion; // the version of SPIR-V to emit in the header + SourceLanguage sourceLang; + int sourceVersion; + spv::Id sourceFileStringId; + spv::Id nonSemanticShaderCompilationUnitId {0}; + spv::Id nonSemanticShaderDebugInfo {0}; + spv::Id debugInfoNone {0}; + spv::Id debugExpression {0}; // Debug expression with zero operations. + std::string sourceText; + int currentLine; + const char* currentFile; + spv::Id currentFileId; + std::stack currentDebugScopeId; + spv::Id lastDebugScopeId; + bool emitOpLines; + bool emitNonSemanticShaderDebugInfo; + bool restoreNonSemanticShaderDebugInfo; + bool emitNonSemanticShaderDebugSource; + std::set extensions; + std::vector sourceExtensions; + std::vector moduleProcesses; + AddressingModel addressModel; + MemoryModel memoryModel; + std::set capabilities; + int builderNumber; + Module module; + Block* buildPoint; + Id uniqueId; + Function* entryPointFunction; + bool generatingOpCodeForSpecConst; + AccessChain accessChain; + + // special blocks of instructions for output + std::vector > strings; + std::vector > imports; + std::vector > entryPoints; + std::vector > executionModes; + std::vector > names; + std::vector > decorations; + std::vector > constantsTypesGlobals; + std::vector > externals; + std::vector > functions; + + // not output, internally used for quick & dirty canonical (unique) creation + + // map type opcodes to constant inst. + std::unordered_map> groupedConstants; + // map struct-id to constant instructions + std::unordered_map> groupedStructConstants; + // map type opcodes to type instructions + std::unordered_map> groupedTypes; + // map type opcodes to debug type instructions + std::unordered_map> groupedDebugTypes; + // list of OpConstantNull instructions + std::vector nullConstants; + + // stack of switches + std::stack switchMerges; + + // Our loop stack. + std::stack loops; + + // map from strings to their string ids + std::unordered_map stringIds; + + // map from include file name ids to their contents + std::map includeFiles; + + // map from core id to debug id + std::map debugId; + + // map from file name string id to DebugSource id + std::unordered_map debugSourceId; + + // The stream for outputting warnings and errors. + SpvBuildLogger* logger; +}; // end Builder class + +}; // end spv namespace + +#endif // SpvBuilder_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SpvTools.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SpvTools.h new file mode 100644 index 0000000..5386048 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/SpvTools.h @@ -0,0 +1,93 @@ +// +// Copyright (C) 2014-2016 LunarG, Inc. +// Copyright (C) 2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Call into SPIRV-Tools to disassemble, validate, and optimize. +// + +#pragma once +#ifndef GLSLANG_SPV_TOOLS_H +#define GLSLANG_SPV_TOOLS_H + +#if ENABLE_OPT +#include +#include +#include "spirv-tools/libspirv.h" +#endif + +#include "glslang/MachineIndependent/localintermediate.h" +#include "Logger.h" + +namespace glslang { + +struct SpvOptions { + bool generateDebugInfo {false}; + bool stripDebugInfo {false}; + bool disableOptimizer {true}; + bool optimizeSize {false}; + bool disassemble {false}; + bool validate {false}; + bool emitNonSemanticShaderDebugInfo {false}; + bool emitNonSemanticShaderDebugSource{ false }; +}; + +#if ENABLE_OPT + +// Use the SPIRV-Tools disassembler to print SPIR-V using a SPV_ENV_UNIVERSAL_1_3 environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv); + +// Use the SPIRV-Tools disassembler to print SPIR-V with a provided SPIR-V environment. +void SpirvToolsDisassemble(std::ostream& out, const std::vector& spirv, + spv_target_env requested_context); + +// Apply the SPIRV-Tools validator to generated SPIR-V. +void SpirvToolsValidate(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, bool prelegalization); + +// Apply the SPIRV-Tools optimizer to generated SPIR-V. HLSL SPIR-V is legalized in the process. +void SpirvToolsTransform(const glslang::TIntermediate& intermediate, std::vector& spirv, + spv::SpvBuildLogger*, const SpvOptions*); + +// Apply the SPIRV-Tools optimizer to strip debug info from SPIR-V. This is implicitly done by +// SpirvToolsTransform if spvOptions->stripDebugInfo is set, but can be called separately if +// optimization is disabled. +void SpirvToolsStripDebugInfo(const glslang::TIntermediate& intermediate, + std::vector& spirv, spv::SpvBuildLogger*); + +#endif + +} // end namespace glslang + +#endif // GLSLANG_SPV_TOOLS_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/bitutils.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/bitutils.h new file mode 100644 index 0000000..22e44ce --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/bitutils.h @@ -0,0 +1,81 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_BITUTILS_H_ +#define LIBSPIRV_UTIL_BITUTILS_H_ + +#include +#include + +namespace spvutils { + +// Performs a bitwise copy of source to the destination type Dest. +template +Dest BitwiseCast(Src source) { + Dest dest; + static_assert(sizeof(source) == sizeof(dest), + "BitwiseCast: Source and destination must have the same size"); + std::memcpy(static_cast(&dest), &source, sizeof(dest)); + return dest; +} + +// SetBits returns an integer of type with bits set +// for position through , counting from the least +// significant bit. In particular when Num == 0, no positions are set to 1. +// A static assert will be triggered if First + Num > sizeof(T) * 8, that is, +// a bit that will not fit in the underlying type is set. +template +struct SetBits { + static_assert(First < sizeof(T) * 8, + "Tried to set a bit that is shifted too far."); + const static T get = (T(1) << First) | SetBits::get; +}; + +template +struct SetBits { + const static T get = T(0); +}; + +// This is all compile-time so we can put our tests right here. +static_assert(SetBits::get == uint32_t(0x00000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000001), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x80000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x00000006), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xc0000000), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0x7FFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFFFFFF), + "SetBits failed"); +static_assert(SetBits::get == uint32_t(0xFFFF0000), + "SetBits failed"); + +static_assert(SetBits::get == uint64_t(0x0000000000000001LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x8000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0xc000000000000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x0000000080000000LL), + "SetBits failed"); +static_assert(SetBits::get == uint64_t(0x00000000FFFF0000LL), + "SetBits failed"); + +} // namespace spvutils + +#endif // LIBSPIRV_UTIL_BITUTILS_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/disassemble.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/disassemble.h new file mode 100644 index 0000000..b6a4635 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/disassemble.h @@ -0,0 +1,53 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Disassembler for SPIR-V. +// + +#pragma once +#ifndef disassembler_H +#define disassembler_H + +#include +#include + +namespace spv { + + // disassemble with glslang custom disassembler + void Disassemble(std::ostream& out, const std::vector&); + +} // end namespace spv + +#endif // disassembler_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/doc.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/doc.h new file mode 100644 index 0000000..2a0b28c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/doc.h @@ -0,0 +1,259 @@ +// +// Copyright (C) 2014-2015 LunarG, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// +// Parameterize the SPIR-V enumerants. +// + +#pragma once + +#include "spirv.hpp" + +#include + +namespace spv { + +// Fill in all the parameters +void Parameterize(); + +// Return the English names of all the enums. +const char* SourceString(int); +const char* AddressingString(int); +const char* MemoryString(int); +const char* ExecutionModelString(int); +const char* ExecutionModeString(int); +const char* StorageClassString(int); +const char* DecorationString(int); +const char* BuiltInString(int); +const char* DimensionString(int); +const char* SelectControlString(int); +const char* LoopControlString(int); +const char* FunctionControlString(int); +const char* SamplerAddressingModeString(int); +const char* SamplerFilterModeString(int); +const char* ImageFormatString(int); +const char* ImageChannelOrderString(int); +const char* ImageChannelTypeString(int); +const char* ImageChannelDataTypeString(int type); +const char* ImageOperandsString(int format); +const char* ImageOperands(int); +const char* FPFastMathString(int); +const char* FPRoundingModeString(int); +const char* LinkageTypeString(int); +const char* FuncParamAttrString(int); +const char* AccessQualifierString(int); +const char* MemorySemanticsString(int); +const char* MemoryAccessString(int); +const char* ExecutionScopeString(int); +const char* GroupOperationString(int); +const char* KernelEnqueueFlagsString(int); +const char* KernelProfilingInfoString(int); +const char* CapabilityString(int); +const char* OpcodeString(int); +const char* ScopeString(int mem); + +// For grouping opcodes into subsections +enum OpcodeClass { + OpClassMisc, + OpClassDebug, + OpClassAnnotate, + OpClassExtension, + OpClassMode, + OpClassType, + OpClassConstant, + OpClassMemory, + OpClassFunction, + OpClassImage, + OpClassConvert, + OpClassComposite, + OpClassArithmetic, + OpClassBit, + OpClassRelationalLogical, + OpClassDerivative, + OpClassFlowControl, + OpClassAtomic, + OpClassPrimitive, + OpClassBarrier, + OpClassGroup, + OpClassDeviceSideEnqueue, + OpClassPipe, + + OpClassCount, + OpClassMissing // all instructions start out as missing +}; + +// For parameterizing operands. +enum OperandClass { + OperandNone, + OperandId, + OperandVariableIds, + OperandOptionalLiteral, + OperandOptionalLiteralString, + OperandVariableLiterals, + OperandVariableIdLiteral, + OperandVariableLiteralId, + OperandLiteralNumber, + OperandLiteralString, + OperandVariableLiteralStrings, + OperandSource, + OperandExecutionModel, + OperandAddressing, + OperandMemory, + OperandExecutionMode, + OperandStorage, + OperandDimensionality, + OperandSamplerAddressingMode, + OperandSamplerFilterMode, + OperandSamplerImageFormat, + OperandImageChannelOrder, + OperandImageChannelDataType, + OperandImageOperands, + OperandFPFastMath, + OperandFPRoundingMode, + OperandLinkageType, + OperandAccessQualifier, + OperandFuncParamAttr, + OperandDecoration, + OperandBuiltIn, + OperandSelect, + OperandLoop, + OperandFunction, + OperandMemorySemantics, + OperandMemoryAccess, + OperandScope, + OperandGroupOperation, + OperandKernelEnqueueFlags, + OperandKernelProfilingInfo, + OperandCapability, + + OperandOpcode, + + OperandCount +}; + +// Any specific enum can have a set of capabilities that allow it: +typedef std::vector EnumCaps; + +// Parameterize a set of operands with their OperandClass(es) and descriptions. +class OperandParameters { +public: + OperandParameters() { } + void push(OperandClass oc, const char* d, bool opt = false) + { + opClass.push_back(oc); + desc.push_back(d); + optional.push_back(opt); + } + void setOptional(); + OperandClass getClass(int op) const { return opClass[op]; } + const char* getDesc(int op) const { return desc[op]; } + bool isOptional(int op) const { return optional[op]; } + int getNum() const { return (int)opClass.size(); } + +protected: + std::vector opClass; + std::vector desc; + std::vector optional; +}; + +// Parameterize an enumerant +class EnumParameters { +public: + EnumParameters() : desc(0) { } + const char* desc; +}; + +// Parameterize a set of enumerants that form an enum +class EnumDefinition : public EnumParameters { +public: + EnumDefinition() : + ceiling(0), bitmask(false), getName(0), enumParams(0), operandParams(0) { } + void set(int ceil, const char* (*name)(int), EnumParameters* ep, bool mask = false) + { + ceiling = ceil; + getName = name; + bitmask = mask; + enumParams = ep; + } + void setOperands(OperandParameters* op) { operandParams = op; } + int ceiling; // ceiling of enumerants + bool bitmask; // true if these enumerants combine into a bitmask + const char* (*getName)(int); // a function that returns the name for each enumerant value (or shift) + EnumParameters* enumParams; // parameters for each individual enumerant + OperandParameters* operandParams; // sets of operands +}; + +// Parameterize an instruction's logical format, including its known set of operands, +// per OperandParameters above. +class InstructionParameters { +public: + InstructionParameters() : + opDesc("TBD"), + opClass(OpClassMissing), + typePresent(true), // most normal, only exceptions have to be spelled out + resultPresent(true) // most normal, only exceptions have to be spelled out + { } + + void setResultAndType(bool r, bool t) + { + resultPresent = r; + typePresent = t; + } + + bool hasResult() const { return resultPresent != 0; } + bool hasType() const { return typePresent != 0; } + + const char* opDesc; + OpcodeClass opClass; + OperandParameters operands; + +protected: + int typePresent : 1; + int resultPresent : 1; +}; + +// The set of objects that hold all the instruction/operand +// parameterization information. +extern InstructionParameters InstructionDesc[]; + +// These hold definitions of the enumerants used for operands +extern EnumDefinition OperandClassParams[]; + +const char* GetOperandDesc(OperandClass operand); +void PrintImmediateRow(int imm, const char* name, const EnumParameters* enumParams, bool caps, bool hex = false); +const char* AccessQualifierString(int attr); + +void PrintOperands(const OperandParameters& operands, int reservedOperands); + +} // end namespace spv diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/hex_float.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/hex_float.h new file mode 100644 index 0000000..8be8e9f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/hex_float.h @@ -0,0 +1,1078 @@ +// Copyright (c) 2015-2016 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ +#define LIBSPIRV_UTIL_HEX_FLOAT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER < 1800 +namespace std { +bool isnan(double f) +{ + return ::_isnan(f) != 0; +} +bool isinf(double f) +{ + return ::_finite(f) == 0; +} +} +#endif + +#include "bitutils.h" + +namespace spvutils { + +class Float16 { + public: + Float16(uint16_t v) : val(v) {} + Float16() {} + static bool isNan(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); + } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(const Float16& val) { + return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); + } + Float16(const Float16& other) { val = other.val; } + uint16_t get_value() const { return val; } + + // Returns the maximum normal value. + static Float16 max() { return Float16(0x7bff); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16(0xfbff); } + + private: + uint16_t val; +}; + +// To specialize this type, you must override uint_type to define +// an unsigned integer that can fit your floating point type. +// You must also add a isNan function that returns true if +// a value is Nan. +template +struct FloatProxyTraits { + typedef void uint_type; +}; + +template <> +struct FloatProxyTraits { + typedef uint32_t uint_type; + static bool isNan(float f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(float f) { return std::isinf(f); } + // Returns the maximum normal value. + static float max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static float lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint64_t uint_type; + static bool isNan(double f) { return std::isnan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(double f) { return std::isinf(f); } + // Returns the maximum normal value. + static double max() { return std::numeric_limits::max(); } + // Returns the lowest normal value. + static double lowest() { return std::numeric_limits::lowest(); } +}; + +template <> +struct FloatProxyTraits { + typedef uint16_t uint_type; + static bool isNan(Float16 f) { return Float16::isNan(f); } + // Returns true if the given value is any kind of infinity. + static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } + // Returns the maximum normal value. + static Float16 max() { return Float16::max(); } + // Returns the lowest normal value. + static Float16 lowest() { return Float16::lowest(); } +}; + +// Since copying a floating point number (especially if it is NaN) +// does not guarantee that bits are preserved, this class lets us +// store the type and use it as a float when necessary. +template +class FloatProxy { + public: + typedef typename FloatProxyTraits::uint_type uint_type; + + // Since this is to act similar to the normal floats, + // do not initialize the data by default. + FloatProxy() {} + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(T val) { data_ = BitwiseCast(val); } + + // Intentionally non-explicit. This is a proxy type so + // implicit conversions allow us to use it more transparently. + FloatProxy(uint_type val) { data_ = val; } + + // This is helpful to have and is guaranteed not to stomp bits. + FloatProxy operator-() const { + return static_cast(data_ ^ + (uint_type(0x1) << (sizeof(T) * 8 - 1))); + } + + // Returns the data as a floating point value. + T getAsFloat() const { return BitwiseCast(data_); } + + // Returns the raw data. + uint_type data() const { return data_; } + + // Returns true if the value represents any type of NaN. + bool isNan() { return FloatProxyTraits::isNan(getAsFloat()); } + // Returns true if the value represents any type of infinity. + bool isInfinity() { return FloatProxyTraits::isInfinity(getAsFloat()); } + + // Returns the maximum normal value. + static FloatProxy max() { + return FloatProxy(FloatProxyTraits::max()); + } + // Returns the lowest normal value. + static FloatProxy lowest() { + return FloatProxy(FloatProxyTraits::lowest()); + } + + private: + uint_type data_; +}; + +template +bool operator==(const FloatProxy& first, const FloatProxy& second) { + return first.data() == second.data(); +} + +// Reads a FloatProxy value as a normal float from a stream. +template +std::istream& operator>>(std::istream& is, FloatProxy& value) { + T float_val; + is >> float_val; + value = FloatProxy(float_val); + return is; +} + +// This is an example traits. It is not meant to be used in practice, but will +// be the default for any non-specialized type. +template +struct HexFloatTraits { + // Integer type that can store this hex-float. + typedef void uint_type; + // Signed integer type that can store this hex-float. + typedef void int_type; + // The numerical type that this HexFloat represents. + typedef void underlying_type; + // The type needed to construct the underlying type. + typedef void native_type; + // The number of bits that are actually relevant in the uint_type. + // This allows us to deal with, for example, 24-bit values in a 32-bit + // integer. + static const uint32_t num_used_bits = 0; + // Number of bits that represent the exponent. + static const uint32_t num_exponent_bits = 0; + // Number of bits that represent the fractional part. + static const uint32_t num_fraction_bits = 0; + // The bias of the exponent. (How much we need to subtract from the stored + // value to get the correct value.) + static const uint32_t exponent_bias = 0; +}; + +// Traits for IEEE float. +// 1 sign bit, 8 exponent bits, 23 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint32_t uint_type; + typedef int32_t int_type; + typedef FloatProxy underlying_type; + typedef float native_type; + static const uint_type num_used_bits = 32; + static const uint_type num_exponent_bits = 8; + static const uint_type num_fraction_bits = 23; + static const uint_type exponent_bias = 127; +}; + +// Traits for IEEE double. +// 1 sign bit, 11 exponent bits, 52 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint64_t uint_type; + typedef int64_t int_type; + typedef FloatProxy underlying_type; + typedef double native_type; + static const uint_type num_used_bits = 64; + static const uint_type num_exponent_bits = 11; + static const uint_type num_fraction_bits = 52; + static const uint_type exponent_bias = 1023; +}; + +// Traits for IEEE half. +// 1 sign bit, 5 exponent bits, 10 fractional bits. +template <> +struct HexFloatTraits> { + typedef uint16_t uint_type; + typedef int16_t int_type; + typedef uint16_t underlying_type; + typedef uint16_t native_type; + static const uint_type num_used_bits = 16; + static const uint_type num_exponent_bits = 5; + static const uint_type num_fraction_bits = 10; + static const uint_type exponent_bias = 15; +}; + +enum round_direction { + kRoundToZero, + kRoundToNearestEven, + kRoundToPositiveInfinity, + kRoundToNegativeInfinity +}; + +// Template class that houses a floating pointer number. +// It exposes a number of constants based on the provided traits to +// assist in interpreting the bits of the value. +template > +class HexFloat { + public: + typedef typename Traits::uint_type uint_type; + typedef typename Traits::int_type int_type; + typedef typename Traits::underlying_type underlying_type; + typedef typename Traits::native_type native_type; + + explicit HexFloat(T f) : value_(f) {} + + T value() const { return value_; } + void set_value(T f) { value_ = f; } + + // These are all written like this because it is convenient to have + // compile-time constants for all of these values. + + // Pass-through values to save typing. + static const uint32_t num_used_bits = Traits::num_used_bits; + static const uint32_t exponent_bias = Traits::exponent_bias; + static const uint32_t num_exponent_bits = Traits::num_exponent_bits; + static const uint32_t num_fraction_bits = Traits::num_fraction_bits; + + // Number of bits to shift left to set the highest relevant bit. + static const uint32_t top_bit_left_shift = num_used_bits - 1; + // How many nibbles (hex characters) the fractional part takes up. + static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; + // If the fractional part does not fit evenly into a hex character (4-bits) + // then we have to left-shift to get rid of leading 0s. This is the amount + // we have to shift (might be 0). + static const uint32_t num_overflow_bits = + fraction_nibbles * 4 - num_fraction_bits; + + // The representation of the fraction, not the actual bits. This + // includes the leading bit that is usually implicit. + static const uint_type fraction_represent_mask = + spvutils::SetBits::get; + + // The topmost bit in the nibble-aligned fraction. + static const uint_type fraction_top_bit = + uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); + + // The least significant bit in the exponent, which is also the bit + // immediately to the left of the significand. + static const uint_type first_exponent_bit = uint_type(1) + << (num_fraction_bits); + + // The mask for the encoded fraction. It does not include the + // implicit bit. + static const uint_type fraction_encode_mask = + spvutils::SetBits::get; + + // The bit that is used as a sign. + static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; + + // The bits that represent the exponent. + static const uint_type exponent_mask = + spvutils::SetBits::get; + + // How far left the exponent is shifted. + static const uint32_t exponent_left_shift = num_fraction_bits; + + // How far from the right edge the fraction is shifted. + static const uint32_t fraction_right_shift = + static_cast(sizeof(uint_type) * 8) - num_fraction_bits; + + // The maximum representable unbiased exponent. + static const int_type max_exponent = + (exponent_mask >> num_fraction_bits) - exponent_bias; + // The minimum representable exponent for normalized numbers. + static const int_type min_exponent = -static_cast(exponent_bias); + + // Returns the bits associated with the value. + uint_type getBits() const { return spvutils::BitwiseCast(value_); } + + // Returns the bits associated with the value, without the leading sign bit. + uint_type getUnsignedBits() const { + return static_cast(spvutils::BitwiseCast(value_) & + ~sign_mask); + } + + // Returns the bits associated with the exponent, shifted to start at the + // lsb of the type. + const uint_type getExponentBits() const { + return static_cast((getBits() & exponent_mask) >> + num_fraction_bits); + } + + // Returns the exponent in unbiased form. This is the exponent in the + // human-friendly form. + const int_type getUnbiasedExponent() const { + return static_cast(getExponentBits() - exponent_bias); + } + + // Returns just the significand bits from the value. + const uint_type getSignificandBits() const { + return getBits() & fraction_encode_mask; + } + + // If the number was normalized, returns the unbiased exponent. + // If the number was denormal, normalize the exponent first. + const int_type getUnbiasedNormalizedExponent() const { + if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 + return 0; + } + int_type exp = getUnbiasedExponent(); + if (exp == min_exponent) { // We are in denorm land. + uint_type significand_bits = getSignificandBits(); + while ((significand_bits & (first_exponent_bit >> 1)) == 0) { + significand_bits = static_cast(significand_bits << 1); + exp = static_cast(exp - 1); + } + significand_bits &= fraction_encode_mask; + } + return exp; + } + + // Returns the signficand after it has been normalized. + const uint_type getNormalizedSignificand() const { + int_type unbiased_exponent = getUnbiasedNormalizedExponent(); + uint_type significand = getSignificandBits(); + for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { + significand = static_cast(significand << 1); + } + significand &= fraction_encode_mask; + return significand; + } + + // Returns true if this number represents a negative value. + bool isNegative() const { return (getBits() & sign_mask) != 0; } + + // Sets this HexFloat from the individual components. + // Note this assumes EVERY significand is normalized, and has an implicit + // leading one. This means that the only way that this method will set 0, + // is if you set a number so denormalized that it underflows. + // Do not use this method with raw bits extracted from a subnormal number, + // since subnormals do not have an implicit leading 1 in the significand. + // The significand is also expected to be in the + // lowest-most num_fraction_bits of the uint_type. + // The exponent is expected to be unbiased, meaning an exponent of + // 0 actually means 0. + // If underflow_round_up is set, then on underflow, if a number is non-0 + // and would underflow, we round up to the smallest denorm. + void setFromSignUnbiasedExponentAndNormalizedSignificand( + bool negative, int_type exponent, uint_type significand, + bool round_denorm_up) { + bool significand_is_zero = significand == 0; + + if (exponent <= min_exponent) { + // If this was denormalized, then we have to shift the bit on, meaning + // the significand is not zero. + significand_is_zero = false; + significand |= first_exponent_bit; + significand = static_cast(significand >> 1); + } + + while (exponent < min_exponent) { + significand = static_cast(significand >> 1); + ++exponent; + } + + if (exponent == min_exponent) { + if (significand == 0 && !significand_is_zero && round_denorm_up) { + significand = static_cast(0x1); + } + } + + uint_type new_value = 0; + if (negative) { + new_value = static_cast(new_value | sign_mask); + } + exponent = static_cast(exponent + exponent_bias); + assert(exponent >= 0); + + // put it all together + exponent = static_cast((exponent << exponent_left_shift) & + exponent_mask); + significand = static_cast(significand & fraction_encode_mask); + new_value = static_cast(new_value | (exponent | significand)); + value_ = BitwiseCast(new_value); + } + + // Increments the significand of this number by the given amount. + // If this would spill the significand into the implicit bit, + // carry is set to true and the significand is shifted to fit into + // the correct location, otherwise carry is set to false. + // All significands and to_increment are assumed to be within the bounds + // for a valid significand. + static uint_type incrementSignificand(uint_type significand, + uint_type to_increment, bool* carry) { + significand = static_cast(significand + to_increment); + *carry = false; + if (significand & first_exponent_bit) { + *carry = true; + // The implicit 1-bit will have carried, so we should zero-out the + // top bit and shift back. + significand = static_cast(significand & ~first_exponent_bit); + significand = static_cast(significand >> 1); + } + return significand; + } + + // These exist because MSVC throws warnings on negative right-shifts + // even if they are not going to be executed. Eg: + // constant_number < 0? 0: constant_number + // These convert the negative left-shifts into right shifts. + + template + uint_type negatable_left_shift(int_type N, uint_type val) + { + if(N >= 0) + return val << N; + + return val >> -N; + } + + template + uint_type negatable_right_shift(int_type N, uint_type val) + { + if(N >= 0) + return val >> N; + + return val << -N; + } + + // Returns the significand, rounded to fit in a significand in + // other_T. This is shifted so that the most significant + // bit of the rounded number lines up with the most significant bit + // of the returned significand. + template + typename other_T::uint_type getRoundedNormalizedSignificand( + round_direction dir, bool* carry_bit) { + typedef typename other_T::uint_type other_uint_type; + static const int_type num_throwaway_bits = + static_cast(num_fraction_bits) - + static_cast(other_T::num_fraction_bits); + + static const uint_type last_significant_bit = + (num_throwaway_bits < 0) + ? 0 + : negatable_left_shift(num_throwaway_bits, 1u); + static const uint_type first_rounded_bit = + (num_throwaway_bits < 1) + ? 0 + : negatable_left_shift(num_throwaway_bits - 1, 1u); + + static const uint_type throwaway_mask_bits = + num_throwaway_bits > 0 ? num_throwaway_bits : 0; + static const uint_type throwaway_mask = + spvutils::SetBits::get; + + *carry_bit = false; + other_uint_type out_val = 0; + uint_type significand = getNormalizedSignificand(); + // If we are up-casting, then we just have to shift to the right location. + if (num_throwaway_bits <= 0) { + out_val = static_cast(significand); + uint_type shift_amount = static_cast(-num_throwaway_bits); + out_val = static_cast(out_val << shift_amount); + return out_val; + } + + // If every non-representable bit is 0, then we don't have any casting to + // do. + if ((significand & throwaway_mask) == 0) { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + + bool round_away_from_zero = false; + // We actually have to narrow the significand here, so we have to follow the + // rounding rules. + switch (dir) { + case kRoundToZero: + break; + case kRoundToPositiveInfinity: + round_away_from_zero = !isNegative(); + break; + case kRoundToNegativeInfinity: + round_away_from_zero = isNegative(); + break; + case kRoundToNearestEven: + // Have to round down, round bit is 0 + if ((first_rounded_bit & significand) == 0) { + break; + } + if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { + // If any subsequent bit of the rounded portion is non-0 then we round + // up. + round_away_from_zero = true; + break; + } + // We are exactly half-way between 2 numbers, pick even. + if ((significand & last_significant_bit) != 0) { + // 1 for our last bit, round up. + round_away_from_zero = true; + break; + } + break; + } + + if (round_away_from_zero) { + return static_cast( + negatable_right_shift(num_throwaway_bits, incrementSignificand( + significand, last_significant_bit, carry_bit))); + } else { + return static_cast( + negatable_right_shift(num_throwaway_bits, significand)); + } + } + + // Casts this value to another HexFloat. If the cast is widening, + // then round_dir is ignored. If the cast is narrowing, then + // the result is rounded in the direction specified. + // This number will retain Nan and Inf values. + // It will also saturate to Inf if the number overflows, and + // underflow to (0 or min depending on rounding) if the number underflows. + template + void castTo(other_T& other, round_direction round_dir) { + other = other_T(static_cast(0)); + bool negate = isNegative(); + if (getUnsignedBits() == 0) { + if (negate) { + other.set_value(-other.value()); + } + return; + } + uint_type significand = getSignificandBits(); + bool carried = false; + typename other_T::uint_type rounded_significand = + getRoundedNormalizedSignificand(round_dir, &carried); + + int_type exponent = getUnbiasedExponent(); + if (exponent == min_exponent) { + // If we are denormal, normalize the exponent, so that we can encode + // easily. + exponent = static_cast(exponent + 1); + for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; + check_bit = static_cast(check_bit >> 1)) { + exponent = static_cast(exponent - 1); + if (check_bit & significand) break; + } + } + + bool is_nan = + (getBits() & exponent_mask) == exponent_mask && significand != 0; + bool is_inf = + !is_nan && + ((exponent + carried) > static_cast(other_T::exponent_bias) || + (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); + + // If we are Nan or Inf we should pass that through. + if (is_inf) { + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); + return; + } + if (is_nan) { + typename other_T::uint_type shifted_significand; + shifted_significand = static_cast( + negatable_left_shift( + static_cast(other_T::num_fraction_bits) - + static_cast(num_fraction_bits), significand)); + + // We are some sort of Nan. We try to keep the bit-pattern of the Nan + // as close as possible. If we had to shift off bits so we are 0, then we + // just set the last bit. + other.set_value(BitwiseCast( + static_cast( + (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | + (shifted_significand == 0 ? 0x1 : shifted_significand)))); + return; + } + + bool round_underflow_up = + isNegative() ? round_dir == kRoundToNegativeInfinity + : round_dir == kRoundToPositiveInfinity; + typedef typename other_T::int_type other_int_type; + // setFromSignUnbiasedExponentAndNormalizedSignificand will + // zero out any underflowing value (but retain the sign). + other.setFromSignUnbiasedExponentAndNormalizedSignificand( + negate, static_cast(exponent), rounded_significand, + round_underflow_up); + return; + } + + private: + T value_; + + static_assert(num_used_bits == + Traits::num_exponent_bits + Traits::num_fraction_bits + 1, + "The number of bits do not fit"); + static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); +}; + +// Returns 4 bits represented by the hex character. +inline uint8_t get_nibble_from_character(int character) { + const char* dec = "0123456789"; + const char* lower = "abcdef"; + const char* upper = "ABCDEF"; + const char* p = nullptr; + if ((p = strchr(dec, character))) { + return static_cast(p - dec); + } else if ((p = strchr(lower, character))) { + return static_cast(p - lower + 0xa); + } else if ((p = strchr(upper, character))) { + return static_cast(p - upper + 0xa); + } + + assert(false && "This was called with a non-hex character"); + return 0; +} + +// Outputs the given HexFloat to the stream. +template +std::ostream& operator<<(std::ostream& os, const HexFloat& value) { + typedef HexFloat HF; + typedef typename HF::uint_type uint_type; + typedef typename HF::int_type int_type; + + static_assert(HF::num_used_bits != 0, + "num_used_bits must be non-zero for a valid float"); + static_assert(HF::num_exponent_bits != 0, + "num_exponent_bits must be non-zero for a valid float"); + static_assert(HF::num_fraction_bits != 0, + "num_fractin_bits must be non-zero for a valid float"); + + const uint_type bits = spvutils::BitwiseCast(value.value()); + const char* const sign = (bits & HF::sign_mask) ? "-" : ""; + const uint_type exponent = static_cast( + (bits & HF::exponent_mask) >> HF::num_fraction_bits); + + uint_type fraction = static_cast((bits & HF::fraction_encode_mask) + << HF::num_overflow_bits); + + const bool is_zero = exponent == 0 && fraction == 0; + const bool is_denorm = exponent == 0 && !is_zero; + + // exponent contains the biased exponent we have to convert it back into + // the normal range. + int_type int_exponent = static_cast(exponent - HF::exponent_bias); + // If the number is all zeros, then we actually have to NOT shift the + // exponent. + int_exponent = is_zero ? 0 : int_exponent; + + // If we are denorm, then start shifting, and decreasing the exponent until + // our leading bit is 1. + + if (is_denorm) { + while ((fraction & HF::fraction_top_bit) == 0) { + fraction = static_cast(fraction << 1); + int_exponent = static_cast(int_exponent - 1); + } + // Since this is denormalized, we have to consume the leading 1 since it + // will end up being implicit. + fraction = static_cast(fraction << 1); // eat the leading 1 + fraction &= HF::fraction_represent_mask; + } + + uint_type fraction_nibbles = HF::fraction_nibbles; + // We do not have to display any trailing 0s, since this represents the + // fractional part. + while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { + // Shift off any trailing values; + fraction = static_cast(fraction >> 4); + --fraction_nibbles; + } + + const auto saved_flags = os.flags(); + const auto saved_fill = os.fill(); + + os << sign << "0x" << (is_zero ? '0' : '1'); + if (fraction_nibbles) { + // Make sure to keep the leading 0s in place, since this is the fractional + // part. + os << "." << std::setw(static_cast(fraction_nibbles)) + << std::setfill('0') << std::hex << fraction; + } + os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; + + os.flags(saved_flags); + os.fill(saved_fill); + + return os; +} + +// Returns true if negate_value is true and the next character on the +// input stream is a plus or minus sign. In that case we also set the fail bit +// on the stream and set the value to the zero value for its type. +template +inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, + HexFloat& value) { + if (negate_value) { + auto next_char = is.peek(); + if (next_char == '-' || next_char == '+') { + // Fail the parse. Emulate standard behaviour by setting the value to + // the zero value, and set the fail bit on the stream. + value = HexFloat(typename HexFloat::uint_type(0)); + is.setstate(std::ios_base::failbit); + return true; + } + } + return false; +} + +// Parses a floating point number from the given stream and stores it into the +// value parameter. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template +inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, + HexFloat& value) { + if (RejectParseDueToLeadingSign(is, negate_value, value)) { + return is; + } + T val; + is >> val; + if (negate_value) { + val = -val; + } + value.set_value(val); + // In the failure case, map -0.0 to 0.0. + if (is.fail() && value.getUnsignedBits() == 0u) { + value = HexFloat(typename HexFloat::uint_type(0)); + } + if (val.isInfinity()) { + // Fail the parse. Emulate standard behaviour by setting the value to + // the closest normal value, and set the fail bit on the stream. + value.set_value((value.isNegative() || negate_value) ? T::lowest() + : T::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Specialization of ParseNormalFloat for FloatProxy values. +// This will parse the float as it were a 32-bit floating point number, +// and then round it down to fit into a Float16 value. +// The number is rounded towards zero. +// If negate_value is true then the number may not have a leading minus or +// plus, and if it successfully parses, then the number is negated before +// being stored into the value parameter. +// If the value cannot be correctly parsed or overflows the target floating +// point type, then set the fail bit on the stream. +// TODO(dneto): Promise C++11 standard behavior in how the value is set in +// the error case, but only after all target platforms implement it correctly. +// In particular, the Microsoft C++ runtime appears to be out of spec. +template <> +inline std::istream& +ParseNormalFloat, HexFloatTraits>>( + std::istream& is, bool negate_value, + HexFloat, HexFloatTraits>>& value) { + // First parse as a 32-bit float. + HexFloat> float_val(0.0f); + ParseNormalFloat(is, negate_value, float_val); + + // Then convert to 16-bit float, saturating at infinities, and + // rounding toward zero. + float_val.castTo(value, kRoundToZero); + + // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the + // fail bit and set the lowest or highest value. + if (Float16::isInfinity(value.value().getAsFloat())) { + value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); + is.setstate(std::ios_base::failbit); + } + return is; +} + +// Reads a HexFloat from the given stream. +// If the float is not encoded as a hex-float then it will be parsed +// as a regular float. +// This may fail if your stream does not support at least one unget. +// Nan values can be encoded with "0x1.p+exponent_bias". +// This would normally overflow a float and round to +// infinity but this special pattern is the exact representation for a NaN, +// and therefore is actually encoded as the correct NaN. To encode inf, +// either 0x0p+exponent_bias can be specified or any exponent greater than +// exponent_bias. +// Examples using IEEE 32-bit float encoding. +// 0x1.0p+128 (+inf) +// -0x1.0p-128 (-inf) +// +// 0x1.1p+128 (+Nan) +// -0x1.1p+128 (-Nan) +// +// 0x1p+129 (+inf) +// -0x1p+129 (-inf) +template +std::istream& operator>>(std::istream& is, HexFloat& value) { + using HF = HexFloat; + using uint_type = typename HF::uint_type; + using int_type = typename HF::int_type; + + value.set_value(static_cast(0.f)); + + if (is.flags() & std::ios::skipws) { + // If the user wants to skip whitespace , then we should obey that. + while (std::isspace(is.peek())) { + is.get(); + } + } + + auto next_char = is.peek(); + bool negate_value = false; + + if (next_char != '-' && next_char != '0') { + return ParseNormalFloat(is, negate_value, value); + } + + if (next_char == '-') { + negate_value = true; + is.get(); + next_char = is.peek(); + } + + if (next_char == '0') { + is.get(); // We may have to unget this. + auto maybe_hex_start = is.peek(); + if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { + is.unget(); + return ParseNormalFloat(is, negate_value, value); + } else { + is.get(); // Throw away the 'x'; + } + } else { + return ParseNormalFloat(is, negate_value, value); + } + + // This "looks" like a hex-float so treat it as one. + bool seen_p = false; + bool seen_dot = false; + uint_type fraction_index = 0; + + uint_type fraction = 0; + int_type exponent = HF::exponent_bias; + + // Strip off leading zeros so we don't have to special-case them later. + while ((next_char = is.peek()) == '0') { + is.get(); + } + + bool is_denorm = + true; // Assume denorm "representation" until we hear otherwise. + // NB: This does not mean the value is actually denorm, + // it just means that it was written 0. + bool bits_written = false; // Stays false until we write a bit. + while (!seen_p && !seen_dot) { + // Handle characters that are left of the fractional part. + if (next_char == '.') { + seen_dot = true; + } else if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + // We know this is not denormalized since we have stripped all leading + // zeroes and we are not a ".". + is_denorm = false; + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; + if (bits_written) { + // If we are here the bits represented belong in the fractional + // part of the float, and we have to adjust the exponent accordingly. + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + exponent = static_cast(exponent + 1); + } + bits_written |= write_bit != 0; + } + } else { + // We have not found our exponent yet, so we have to fail. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + bits_written = false; + while (seen_dot && !seen_p) { + // Handle only fractional parts now. + if (next_char == 'p') { + seen_p = true; + } else if (::isxdigit(next_char)) { + int number = get_nibble_from_character(next_char); + for (int i = 0; i < 4; ++i, number <<= 1) { + uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; + bits_written |= write_bit != 0; + if (is_denorm && !bits_written) { + // Handle modifying the exponent here this way we can handle + // an arbitrary number of hex values without overflowing our + // integer. + exponent = static_cast(exponent - 1); + } else { + fraction = static_cast( + fraction | + static_cast( + write_bit << (HF::top_bit_left_shift - fraction_index++))); + } + } + } else { + // We still have not found our 'p' exponent yet, so this is not a valid + // hex-float. + is.setstate(std::ios::failbit); + return is; + } + is.get(); + next_char = is.peek(); + } + + bool seen_sign = false; + int8_t exponent_sign = 1; + int_type written_exponent = 0; + while (true) { + if ((next_char == '-' || next_char == '+')) { + if (seen_sign) { + is.setstate(std::ios::failbit); + return is; + } + seen_sign = true; + exponent_sign = (next_char == '-') ? -1 : 1; + } else if (::isdigit(next_char)) { + // Hex-floats express their exponent as decimal. + written_exponent = static_cast(written_exponent * 10); + written_exponent = + static_cast(written_exponent + (next_char - '0')); + } else { + break; + } + is.get(); + next_char = is.peek(); + } + + written_exponent = static_cast(written_exponent * exponent_sign); + exponent = static_cast(exponent + written_exponent); + + bool is_zero = is_denorm && (fraction == 0); + if (is_denorm && !is_zero) { + fraction = static_cast(fraction << 1); + exponent = static_cast(exponent - 1); + } else if (is_zero) { + exponent = 0; + } + + if (exponent <= 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + fraction |= static_cast(1) << HF::top_bit_left_shift; + } + + fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; + + const int_type max_exponent = + SetBits::get; + + // Handle actual denorm numbers + while (exponent < 0 && !is_zero) { + fraction = static_cast(fraction >> 1); + exponent = static_cast(exponent + 1); + + fraction &= HF::fraction_encode_mask; + if (fraction == 0) { + // We have underflowed our fraction. We should clamp to zero. + is_zero = true; + exponent = 0; + } + } + + // We have overflowed so we should be inf/-inf. + if (exponent > max_exponent) { + exponent = max_exponent; + fraction = 0; + } + + uint_type output_bits = static_cast( + static_cast(negate_value ? 1 : 0) << HF::top_bit_left_shift); + output_bits |= fraction; + + uint_type shifted_exponent = static_cast( + static_cast(exponent << HF::exponent_left_shift) & + HF::exponent_mask); + output_bits |= shifted_exponent; + + T output_float = spvutils::BitwiseCast(output_bits); + value.set_value(output_float); + + return is; +} + +// Writes a FloatProxy value to a stream. +// Zero and normal numbers are printed in the usual notation, but with +// enough digits to fully reproduce the value. Other values (subnormal, +// NaN, and infinity) are printed as a hex float. +template +std::ostream& operator<<(std::ostream& os, const FloatProxy& value) { + auto float_val = value.getAsFloat(); + switch (std::fpclassify(float_val)) { + case FP_ZERO: + case FP_NORMAL: { + auto saved_precision = os.precision(); + os.precision(std::numeric_limits::digits10); + os << float_val; + os.precision(saved_precision); + } break; + default: + os << HexFloat>(value); + break; + } + return os; +} + +template <> +inline std::ostream& operator<<(std::ostream& os, + const FloatProxy& value) { + os << HexFloat>(value); + return os; +} +} + +#endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/spirv.hpp b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/spirv.hpp new file mode 100644 index 0000000..f85469d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/spirv.hpp @@ -0,0 +1,2533 @@ +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010600; +static const unsigned int Revision = 1; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, + SourceLanguageHLSL = 5, + SourceLanguageCPP_for_OpenCL = 6, + SourceLanguageMax = 0x7fffffff, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, + ExecutionModelTaskNV = 5267, + ExecutionModelMeshNV = 5268, + ExecutionModelRayGenerationKHR = 5313, + ExecutionModelRayGenerationNV = 5313, + ExecutionModelIntersectionKHR = 5314, + ExecutionModelIntersectionNV = 5314, + ExecutionModelAnyHitKHR = 5315, + ExecutionModelAnyHitNV = 5315, + ExecutionModelClosestHitKHR = 5316, + ExecutionModelClosestHitNV = 5316, + ExecutionModelMissKHR = 5317, + ExecutionModelMissNV = 5317, + ExecutionModelCallableKHR = 5318, + ExecutionModelCallableNV = 5318, + ExecutionModelTaskEXT = 5364, + ExecutionModelMeshEXT = 5365, + ExecutionModelMax = 0x7fffffff, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, + AddressingModelPhysicalStorageBuffer64EXT = 5348, + AddressingModelMax = 0x7fffffff, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, + MemoryModelVulkanKHR = 3, + MemoryModelMax = 0x7fffffff, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, + ExecutionModeInitializer = 33, + ExecutionModeFinalizer = 34, + ExecutionModeSubgroupSize = 35, + ExecutionModeSubgroupsPerWorkgroup = 36, + ExecutionModeSubgroupsPerWorkgroupId = 37, + ExecutionModeLocalSizeId = 38, + ExecutionModeLocalSizeHintId = 39, + ExecutionModeSubgroupUniformControlFlowKHR = 4421, + ExecutionModePostDepthCoverage = 4446, + ExecutionModeDenormPreserve = 4459, + ExecutionModeDenormFlushToZero = 4460, + ExecutionModeSignedZeroInfNanPreserve = 4461, + ExecutionModeRoundingModeRTE = 4462, + ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, + ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeStencilRefUnchangedFrontAMD = 5079, + ExecutionModeStencilRefGreaterFrontAMD = 5080, + ExecutionModeStencilRefLessFrontAMD = 5081, + ExecutionModeStencilRefUnchangedBackAMD = 5082, + ExecutionModeStencilRefGreaterBackAMD = 5083, + ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeOutputLinesEXT = 5269, + ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesEXT = 5270, + ExecutionModeOutputPrimitivesNV = 5270, + ExecutionModeDerivativeGroupQuadsNV = 5289, + ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesEXT = 5298, + ExecutionModeOutputTrianglesNV = 5298, + ExecutionModePixelInterlockOrderedEXT = 5366, + ExecutionModePixelInterlockUnorderedEXT = 5367, + ExecutionModeSampleInterlockOrderedEXT = 5368, + ExecutionModeSampleInterlockUnorderedEXT = 5369, + ExecutionModeShadingRateInterlockOrderedEXT = 5370, + ExecutionModeShadingRateInterlockUnorderedEXT = 5371, + ExecutionModeSharedLocalMemorySizeINTEL = 5618, + ExecutionModeRoundingModeRTPINTEL = 5620, + ExecutionModeRoundingModeRTNINTEL = 5621, + ExecutionModeFloatingPointModeALTINTEL = 5622, + ExecutionModeFloatingPointModeIEEEINTEL = 5623, + ExecutionModeMaxWorkgroupSizeINTEL = 5893, + ExecutionModeMaxWorkDimINTEL = 5894, + ExecutionModeNoGlobalOffsetINTEL = 5895, + ExecutionModeNumSIMDWorkitemsINTEL = 5896, + ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeMax = 0x7fffffff, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, + StorageClassStorageBuffer = 12, + StorageClassCallableDataKHR = 5328, + StorageClassCallableDataNV = 5328, + StorageClassIncomingCallableDataKHR = 5329, + StorageClassIncomingCallableDataNV = 5329, + StorageClassRayPayloadKHR = 5338, + StorageClassRayPayloadNV = 5338, + StorageClassHitAttributeKHR = 5339, + StorageClassHitAttributeNV = 5339, + StorageClassIncomingRayPayloadKHR = 5342, + StorageClassIncomingRayPayloadNV = 5342, + StorageClassShaderRecordBufferKHR = 5343, + StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, + StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassTaskPayloadWorkgroupEXT = 5402, + StorageClassCodeSectionINTEL = 5605, + StorageClassDeviceOnlyINTEL = 5936, + StorageClassHostOnlyINTEL = 5937, + StorageClassMax = 0x7fffffff, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, + DimMax = 0x7fffffff, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, + SamplerAddressingModeMax = 0x7fffffff, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, + SamplerFilterModeMax = 0x7fffffff, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, + ImageFormatR64ui = 40, + ImageFormatR64i = 41, + ImageFormatMax = 0x7fffffff, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, + ImageChannelOrderABGR = 19, + ImageChannelOrderMax = 0x7fffffff, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeMax = 0x7fffffff, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, + ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, + ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, + ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, + ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsSignExtendShift = 12, + ImageOperandsZeroExtendShift = 13, + ImageOperandsNontemporalShift = 14, + ImageOperandsOffsetsShift = 16, + ImageOperandsMax = 0x7fffffff, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, + ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, + ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, + ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, + ImageOperandsVolatileTexelKHRMask = 0x00000800, + ImageOperandsSignExtendMask = 0x00001000, + ImageOperandsZeroExtendMask = 0x00002000, + ImageOperandsNontemporalMask = 0x00004000, + ImageOperandsOffsetsMask = 0x00010000, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, + FPFastMathModeAllowContractFastINTELShift = 16, + FPFastMathModeAllowReassocINTELShift = 17, + FPFastMathModeMax = 0x7fffffff, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, + FPFastMathModeAllowContractFastINTELMask = 0x00010000, + FPFastMathModeAllowReassocINTELMask = 0x00020000, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, + FPRoundingModeMax = 0x7fffffff, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, + LinkageTypeLinkOnceODR = 2, + LinkageTypeMax = 0x7fffffff, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, + AccessQualifierMax = 0x7fffffff, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeMax = 0x7fffffff, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationUniformId = 27, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, + DecorationMaxByteOffset = 45, + DecorationAlignmentId = 46, + DecorationMaxByteOffsetId = 47, + DecorationNoSignedWrap = 4469, + DecorationNoUnsignedWrap = 4470, + DecorationExplicitInterpAMD = 4999, + DecorationOverrideCoverageNV = 5248, + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveEXT = 5271, + DecorationPerPrimitiveNV = 5271, + DecorationPerViewNV = 5272, + DecorationPerTaskNV = 5273, + DecorationPerVertexKHR = 5285, + DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, + DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, + DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, + DecorationAliasedPointerEXT = 5356, + DecorationBindlessSamplerNV = 5398, + DecorationBindlessImageNV = 5399, + DecorationBoundSamplerNV = 5400, + DecorationBoundImageNV = 5401, + DecorationSIMTCallINTEL = 5599, + DecorationReferencedIndirectlyINTEL = 5602, + DecorationClobberINTEL = 5607, + DecorationSideEffectsINTEL = 5608, + DecorationVectorComputeVariableINTEL = 5624, + DecorationFuncParamIOKindINTEL = 5625, + DecorationVectorComputeFunctionINTEL = 5626, + DecorationStackCallINTEL = 5627, + DecorationGlobalVariableOffsetINTEL = 5628, + DecorationCounterBuffer = 5634, + DecorationHlslCounterBufferGOOGLE = 5634, + DecorationHlslSemanticGOOGLE = 5635, + DecorationUserSemantic = 5635, + DecorationUserTypeGOOGLE = 5636, + DecorationFunctionRoundingModeINTEL = 5822, + DecorationFunctionDenormModeINTEL = 5823, + DecorationRegisterINTEL = 5825, + DecorationMemoryINTEL = 5826, + DecorationNumbanksINTEL = 5827, + DecorationBankwidthINTEL = 5828, + DecorationMaxPrivateCopiesINTEL = 5829, + DecorationSinglepumpINTEL = 5830, + DecorationDoublepumpINTEL = 5831, + DecorationMaxReplicatesINTEL = 5832, + DecorationSimpleDualPortINTEL = 5833, + DecorationMergeINTEL = 5834, + DecorationBankBitsINTEL = 5835, + DecorationForcePow2DepthINTEL = 5836, + DecorationBurstCoalesceINTEL = 5899, + DecorationCacheSizeINTEL = 5900, + DecorationDontStaticallyCoalesceINTEL = 5901, + DecorationPrefetchINTEL = 5902, + DecorationStallEnableINTEL = 5905, + DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationBufferLocationINTEL = 5921, + DecorationIOPipeStorageINTEL = 5944, + DecorationFunctionFloatingPointModeINTEL = 6080, + DecorationSingleElementVectorINTEL = 6085, + DecorationVectorComputeCallableFunctionINTEL = 6087, + DecorationMediaBlockIOINTEL = 6140, + DecorationMax = 0x7fffffff, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, + BuiltInSubgroupEqMask = 4416, + BuiltInSubgroupEqMaskKHR = 4416, + BuiltInSubgroupGeMask = 4417, + BuiltInSubgroupGeMaskKHR = 4417, + BuiltInSubgroupGtMask = 4418, + BuiltInSubgroupGtMaskKHR = 4418, + BuiltInSubgroupLeMask = 4419, + BuiltInSubgroupLeMaskKHR = 4419, + BuiltInSubgroupLtMask = 4420, + BuiltInSubgroupLtMaskKHR = 4420, + BuiltInBaseVertex = 4424, + BuiltInBaseInstance = 4425, + BuiltInDrawIndex = 4426, + BuiltInPrimitiveShadingRateKHR = 4432, + BuiltInDeviceIndex = 4438, + BuiltInViewIndex = 4440, + BuiltInShadingRateKHR = 4444, + BuiltInBaryCoordNoPerspAMD = 4992, + BuiltInBaryCoordNoPerspCentroidAMD = 4993, + BuiltInBaryCoordNoPerspSampleAMD = 4994, + BuiltInBaryCoordSmoothAMD = 4995, + BuiltInBaryCoordSmoothCentroidAMD = 4996, + BuiltInBaryCoordSmoothSampleAMD = 4997, + BuiltInBaryCoordPullModelAMD = 4998, + BuiltInFragStencilRefEXT = 5014, + BuiltInViewportMaskNV = 5253, + BuiltInSecondaryPositionNV = 5257, + BuiltInSecondaryViewportMaskNV = 5258, + BuiltInPositionPerViewNV = 5261, + BuiltInViewportMaskPerViewNV = 5262, + BuiltInFullyCoveredEXT = 5264, + BuiltInTaskCountNV = 5274, + BuiltInPrimitiveCountNV = 5275, + BuiltInPrimitiveIndicesNV = 5276, + BuiltInClipDistancePerViewNV = 5277, + BuiltInCullDistancePerViewNV = 5278, + BuiltInLayerPerViewNV = 5279, + BuiltInMeshViewCountNV = 5280, + BuiltInMeshViewIndicesNV = 5281, + BuiltInBaryCoordKHR = 5286, + BuiltInBaryCoordNV = 5286, + BuiltInBaryCoordNoPerspKHR = 5287, + BuiltInBaryCoordNoPerspNV = 5287, + BuiltInFragSizeEXT = 5292, + BuiltInFragmentSizeNV = 5292, + BuiltInFragInvocationCountEXT = 5293, + BuiltInInvocationsPerPixelNV = 5293, + BuiltInPrimitivePointIndicesEXT = 5294, + BuiltInPrimitiveLineIndicesEXT = 5295, + BuiltInPrimitiveTriangleIndicesEXT = 5296, + BuiltInCullPrimitiveEXT = 5299, + BuiltInLaunchIdKHR = 5319, + BuiltInLaunchIdNV = 5319, + BuiltInLaunchSizeKHR = 5320, + BuiltInLaunchSizeNV = 5320, + BuiltInWorldRayOriginKHR = 5321, + BuiltInWorldRayOriginNV = 5321, + BuiltInWorldRayDirectionKHR = 5322, + BuiltInWorldRayDirectionNV = 5322, + BuiltInObjectRayOriginKHR = 5323, + BuiltInObjectRayOriginNV = 5323, + BuiltInObjectRayDirectionKHR = 5324, + BuiltInObjectRayDirectionNV = 5324, + BuiltInRayTminKHR = 5325, + BuiltInRayTminNV = 5325, + BuiltInRayTmaxKHR = 5326, + BuiltInRayTmaxNV = 5326, + BuiltInInstanceCustomIndexKHR = 5327, + BuiltInInstanceCustomIndexNV = 5327, + BuiltInObjectToWorldKHR = 5330, + BuiltInObjectToWorldNV = 5330, + BuiltInWorldToObjectKHR = 5331, + BuiltInWorldToObjectNV = 5331, + BuiltInHitTNV = 5332, + BuiltInHitKindKHR = 5333, + BuiltInHitKindNV = 5333, + BuiltInCurrentRayTimeNV = 5334, + BuiltInIncomingRayFlagsKHR = 5351, + BuiltInIncomingRayFlagsNV = 5351, + BuiltInRayGeometryIndexKHR = 5352, + BuiltInWarpsPerSMNV = 5374, + BuiltInSMCountNV = 5375, + BuiltInWarpIDNV = 5376, + BuiltInSMIDNV = 5377, + BuiltInCullMaskKHR = 6021, + BuiltInMax = 0x7fffffff, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, + SelectionControlMax = 0x7fffffff, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, + LoopControlDependencyInfiniteShift = 2, + LoopControlDependencyLengthShift = 3, + LoopControlMinIterationsShift = 4, + LoopControlMaxIterationsShift = 5, + LoopControlIterationMultipleShift = 6, + LoopControlPeelCountShift = 7, + LoopControlPartialCountShift = 8, + LoopControlInitiationIntervalINTELShift = 16, + LoopControlMaxConcurrencyINTELShift = 17, + LoopControlDependencyArrayINTELShift = 18, + LoopControlPipelineEnableINTELShift = 19, + LoopControlLoopCoalesceINTELShift = 20, + LoopControlMaxInterleavingINTELShift = 21, + LoopControlSpeculatedIterationsINTELShift = 22, + LoopControlNoFusionINTELShift = 23, + LoopControlMax = 0x7fffffff, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, + LoopControlDependencyInfiniteMask = 0x00000004, + LoopControlDependencyLengthMask = 0x00000008, + LoopControlMinIterationsMask = 0x00000010, + LoopControlMaxIterationsMask = 0x00000020, + LoopControlIterationMultipleMask = 0x00000040, + LoopControlPeelCountMask = 0x00000080, + LoopControlPartialCountMask = 0x00000100, + LoopControlInitiationIntervalINTELMask = 0x00010000, + LoopControlMaxConcurrencyINTELMask = 0x00020000, + LoopControlDependencyArrayINTELMask = 0x00040000, + LoopControlPipelineEnableINTELMask = 0x00080000, + LoopControlLoopCoalesceINTELMask = 0x00100000, + LoopControlMaxInterleavingINTELMask = 0x00200000, + LoopControlSpeculatedIterationsINTELMask = 0x00400000, + LoopControlNoFusionINTELMask = 0x00800000, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, + FunctionControlOptNoneINTELShift = 16, + FunctionControlMax = 0x7fffffff, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, + FunctionControlOptNoneINTELMask = 0x00010000, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, + MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, + MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, + MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsVolatileShift = 15, + MemorySemanticsMax = 0x7fffffff, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, + MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, + MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, + MemorySemanticsMakeVisibleKHRMask = 0x00004000, + MemorySemanticsVolatileMask = 0x00008000, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, + MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, + MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, + MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessMax = 0x7fffffff, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, + MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, + MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, + MemoryAccessNonPrivatePointerKHRMask = 0x00000020, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, + ScopeQueueFamily = 5, + ScopeQueueFamilyKHR = 5, + ScopeShaderCallKHR = 6, + ScopeMax = 0x7fffffff, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, + GroupOperationClusteredReduce = 3, + GroupOperationPartitionedReduceNV = 6, + GroupOperationPartitionedInclusiveScanNV = 7, + GroupOperationPartitionedExclusiveScanNV = 8, + GroupOperationMax = 0x7fffffff, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, + KernelEnqueueFlagsMax = 0x7fffffff, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, + KernelProfilingInfoMax = 0x7fffffff, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, + CapabilitySubgroupDispatch = 58, + CapabilityNamedBarrier = 59, + CapabilityPipeStorage = 60, + CapabilityGroupNonUniform = 61, + CapabilityGroupNonUniformVote = 62, + CapabilityGroupNonUniformArithmetic = 63, + CapabilityGroupNonUniformBallot = 64, + CapabilityGroupNonUniformShuffle = 65, + CapabilityGroupNonUniformShuffleRelative = 66, + CapabilityGroupNonUniformClustered = 67, + CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, + CapabilityUniformDecoration = 71, + CapabilityFragmentShadingRateKHR = 4422, + CapabilitySubgroupBallotKHR = 4423, + CapabilityDrawParameters = 4427, + CapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, + CapabilitySubgroupVoteKHR = 4431, + CapabilityStorageBuffer16BitAccess = 4433, + CapabilityStorageUniformBufferBlock16 = 4433, + CapabilityStorageUniform16 = 4434, + CapabilityUniformAndStorageBuffer16BitAccess = 4434, + CapabilityStoragePushConstant16 = 4435, + CapabilityStorageInputOutput16 = 4436, + CapabilityDeviceGroup = 4437, + CapabilityMultiView = 4439, + CapabilityVariablePointersStorageBuffer = 4441, + CapabilityVariablePointers = 4442, + CapabilityAtomicStorageOps = 4445, + CapabilitySampleMaskPostDepthCoverage = 4447, + CapabilityStorageBuffer8BitAccess = 4448, + CapabilityUniformAndStorageBuffer8BitAccess = 4449, + CapabilityStoragePushConstant8 = 4450, + CapabilityDenormPreserve = 4464, + CapabilityDenormFlushToZero = 4465, + CapabilitySignedZeroInfNanPreserve = 4466, + CapabilityRoundingModeRTE = 4467, + CapabilityRoundingModeRTZ = 4468, + CapabilityRayQueryProvisionalKHR = 4471, + CapabilityRayQueryKHR = 4472, + CapabilityRayTraversalPrimitiveCullingKHR = 4478, + CapabilityRayTracingKHR = 4479, + CapabilityFloat16ImageAMD = 5008, + CapabilityImageGatherBiasLodAMD = 5009, + CapabilityFragmentMaskAMD = 5010, + CapabilityStencilExportEXT = 5013, + CapabilityImageReadWriteLodAMD = 5015, + CapabilityInt64ImageEXT = 5016, + CapabilityShaderClockKHR = 5055, + CapabilitySampleMaskOverrideCoverageNV = 5249, + CapabilityGeometryShaderPassthroughNV = 5251, + CapabilityShaderViewportIndexLayerEXT = 5254, + CapabilityShaderViewportIndexLayerNV = 5254, + CapabilityShaderViewportMaskNV = 5255, + CapabilityShaderStereoViewNV = 5259, + CapabilityPerViewAttributesNV = 5260, + CapabilityFragmentFullyCoveredEXT = 5265, + CapabilityMeshShadingNV = 5266, + CapabilityImageFootprintNV = 5282, + CapabilityMeshShadingEXT = 5283, + CapabilityFragmentBarycentricKHR = 5284, + CapabilityFragmentBarycentricNV = 5284, + CapabilityComputeDerivativeGroupQuadsNV = 5288, + CapabilityFragmentDensityEXT = 5291, + CapabilityShadingRateNV = 5291, + CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, + CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, + CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, + CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, + CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, + CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, + CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, + CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, + CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + CapabilityRayTracingNV = 5340, + CapabilityRayTracingMotionBlurNV = 5341, + CapabilityVulkanMemoryModel = 5345, + CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, + CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, + CapabilityPhysicalStorageBufferAddressesEXT = 5347, + CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilityRayTracingProvisionalKHR = 5353, + CapabilityCooperativeMatrixNV = 5357, + CapabilityFragmentShaderSampleInterlockEXT = 5363, + CapabilityFragmentShaderShadingRateInterlockEXT = 5372, + CapabilityShaderSMBuiltinsNV = 5373, + CapabilityFragmentShaderPixelInterlockEXT = 5378, + CapabilityDemoteToHelperInvocation = 5379, + CapabilityDemoteToHelperInvocationEXT = 5379, + CapabilityBindlessTextureNV = 5390, + CapabilitySubgroupShuffleINTEL = 5568, + CapabilitySubgroupBufferBlockIOINTEL = 5569, + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityRoundToInfinityINTEL = 5582, + CapabilityFloatingPointModeINTEL = 5583, + CapabilityIntegerFunctions2INTEL = 5584, + CapabilityFunctionPointersINTEL = 5603, + CapabilityIndirectReferencesINTEL = 5604, + CapabilityAsmINTEL = 5606, + CapabilityAtomicFloat32MinMaxEXT = 5612, + CapabilityAtomicFloat64MinMaxEXT = 5613, + CapabilityAtomicFloat16MinMaxEXT = 5616, + CapabilityVectorComputeINTEL = 5617, + CapabilityVectorAnyINTEL = 5619, + CapabilityExpectAssumeKHR = 5629, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + CapabilityVariableLengthArrayINTEL = 5817, + CapabilityFunctionFloatControlINTEL = 5821, + CapabilityFPGAMemoryAttributesINTEL = 5824, + CapabilityFPFastMathModeINTEL = 5837, + CapabilityArbitraryPrecisionIntegersINTEL = 5844, + CapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + CapabilityUnstructuredLoopControlsINTEL = 5886, + CapabilityFPGALoopControlsINTEL = 5888, + CapabilityKernelAttributesINTEL = 5892, + CapabilityFPGAKernelAttributesINTEL = 5897, + CapabilityFPGAMemoryAccessesINTEL = 5898, + CapabilityFPGAClusterAttributesINTEL = 5904, + CapabilityLoopFuseINTEL = 5906, + CapabilityFPGABufferLocationINTEL = 5920, + CapabilityArbitraryPrecisionFixedPointINTEL = 5922, + CapabilityUSMStorageClassesINTEL = 5935, + CapabilityIOPipesINTEL = 5943, + CapabilityBlockingPipesINTEL = 5945, + CapabilityFPGARegINTEL = 5948, + CapabilityDotProductInputAll = 6016, + CapabilityDotProductInputAllKHR = 6016, + CapabilityDotProductInput4x8Bit = 6017, + CapabilityDotProductInput4x8BitKHR = 6017, + CapabilityDotProductInput4x8BitPacked = 6018, + CapabilityDotProductInput4x8BitPackedKHR = 6018, + CapabilityDotProduct = 6019, + CapabilityDotProductKHR = 6019, + CapabilityRayCullMaskKHR = 6020, + CapabilityBitInstructions = 6025, + CapabilityAtomicFloat32AddEXT = 6033, + CapabilityAtomicFloat64AddEXT = 6034, + CapabilityLongConstantCompositeINTEL = 6089, + CapabilityOptNoneINTEL = 6094, + CapabilityAtomicFloat16AddEXT = 6095, + CapabilityDebugInfoModuleINTEL = 6114, + CapabilityMax = 0x7fffffff, +}; + +enum RayFlagsShift { + RayFlagsOpaqueKHRShift = 0, + RayFlagsNoOpaqueKHRShift = 1, + RayFlagsTerminateOnFirstHitKHRShift = 2, + RayFlagsSkipClosestHitShaderKHRShift = 3, + RayFlagsCullBackFacingTrianglesKHRShift = 4, + RayFlagsCullFrontFacingTrianglesKHRShift = 5, + RayFlagsCullOpaqueKHRShift = 6, + RayFlagsCullNoOpaqueKHRShift = 7, + RayFlagsSkipTrianglesKHRShift = 8, + RayFlagsSkipAABBsKHRShift = 9, + RayFlagsMax = 0x7fffffff, +}; + +enum RayFlagsMask { + RayFlagsMaskNone = 0, + RayFlagsOpaqueKHRMask = 0x00000001, + RayFlagsNoOpaqueKHRMask = 0x00000002, + RayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + RayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + RayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + RayFlagsCullOpaqueKHRMask = 0x00000040, + RayFlagsCullNoOpaqueKHRMask = 0x00000080, + RayFlagsSkipTrianglesKHRMask = 0x00000100, + RayFlagsSkipAABBsKHRMask = 0x00000200, +}; + +enum RayQueryIntersection { + RayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + RayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + RayQueryIntersectionMax = 0x7fffffff, +}; + +enum RayQueryCommittedIntersectionType { + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + RayQueryCommittedIntersectionTypeMax = 0x7fffffff, +}; + +enum RayQueryCandidateIntersectionType { + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + RayQueryCandidateIntersectionTypeMax = 0x7fffffff, +}; + +enum FragmentShadingRateShift { + FragmentShadingRateVertical2PixelsShift = 0, + FragmentShadingRateVertical4PixelsShift = 1, + FragmentShadingRateHorizontal2PixelsShift = 2, + FragmentShadingRateHorizontal4PixelsShift = 3, + FragmentShadingRateMax = 0x7fffffff, +}; + +enum FragmentShadingRateMask { + FragmentShadingRateMaskNone = 0, + FragmentShadingRateVertical2PixelsMask = 0x00000001, + FragmentShadingRateVertical4PixelsMask = 0x00000002, + FragmentShadingRateHorizontal2PixelsMask = 0x00000004, + FragmentShadingRateHorizontal4PixelsMask = 0x00000008, +}; + +enum FPDenormMode { + FPDenormModePreserve = 0, + FPDenormModeFlushToZero = 1, + FPDenormModeMax = 0x7fffffff, +}; + +enum FPOperationMode { + FPOperationModeIEEE = 0, + FPOperationModeALT = 1, + FPOperationModeMax = 0x7fffffff, +}; + +enum QuantizationModes { + QuantizationModesTRN = 0, + QuantizationModesTRN_ZERO = 1, + QuantizationModesRND = 2, + QuantizationModesRND_ZERO = 3, + QuantizationModesRND_INF = 4, + QuantizationModesRND_MIN_INF = 5, + QuantizationModesRND_CONV = 6, + QuantizationModesRND_CONV_ODD = 7, + QuantizationModesMax = 0x7fffffff, +}; + +enum OverflowModes { + OverflowModesWRAP = 0, + OverflowModesSAT = 1, + OverflowModesSAT_ZERO = 2, + OverflowModesSAT_SYM = 3, + OverflowModesMax = 0x7fffffff, +}; + +enum PackedVectorFormat { + PackedVectorFormatPackedVectorFormat4x8Bit = 0, + PackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + PackedVectorFormatMax = 0x7fffffff, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpTerminateInvocation = 4416, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpTraceRayKHR = 4445, + OpExecuteCallableKHR = 4446, + OpConvertUToAccelerationStructureKHR = 4447, + OpIgnoreIntersectionKHR = 4448, + OpTerminateRayKHR = 4449, + OpSDot = 4450, + OpSDotKHR = 4450, + OpUDot = 4451, + OpUDotKHR = 4451, + OpSUDot = 4452, + OpSUDotKHR = 4452, + OpSDotAccSat = 4453, + OpSDotAccSatKHR = 4453, + OpUDotAccSat = 4454, + OpUDotAccSatKHR = 4454, + OpSUDotAccSat = 4455, + OpSUDotAccSatKHR = 4455, + OpTypeRayQueryKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, + OpImageSampleFootprintNV = 5283, + OpEmitMeshTasksEXT = 5294, + OpSetMeshOutputsEXT = 5295, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTraceMotionNV = 5338, + OpTraceRayMotionNV = 5339, + OpTypeAccelerationStructureKHR = 5341, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocation = 5380, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpConvertUToImageNV = 5391, + OpConvertUToSamplerNV = 5392, + OpConvertImageToUNV = 5393, + OpConvertSamplerToUNV = 5394, + OpConvertUToSampledImageNV = 5395, + OpConvertSampledImageToUNV = 5396, + OpSamplerImageAddressingModeNV = 5397, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpConstantFunctionPointerINTEL = 5600, + OpFunctionPointerCallINTEL = 5601, + OpAsmTargetINTEL = 5609, + OpAsmINTEL = 5610, + OpAsmCallINTEL = 5611, + OpAtomicFMinEXT = 5614, + OpAtomicFMaxEXT = 5615, + OpAssumeTrueKHR = 5630, + OpExpectKHR = 5631, + OpDecorateString = 5632, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, + OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpVariableLengthArrayINTEL = 5818, + OpSaveMemoryINTEL = 5819, + OpRestoreMemoryINTEL = 5820, + OpArbitraryFloatSinCosPiINTEL = 5840, + OpArbitraryFloatCastINTEL = 5841, + OpArbitraryFloatCastFromIntINTEL = 5842, + OpArbitraryFloatCastToIntINTEL = 5843, + OpArbitraryFloatAddINTEL = 5846, + OpArbitraryFloatSubINTEL = 5847, + OpArbitraryFloatMulINTEL = 5848, + OpArbitraryFloatDivINTEL = 5849, + OpArbitraryFloatGTINTEL = 5850, + OpArbitraryFloatGEINTEL = 5851, + OpArbitraryFloatLTINTEL = 5852, + OpArbitraryFloatLEINTEL = 5853, + OpArbitraryFloatEQINTEL = 5854, + OpArbitraryFloatRecipINTEL = 5855, + OpArbitraryFloatRSqrtINTEL = 5856, + OpArbitraryFloatCbrtINTEL = 5857, + OpArbitraryFloatHypotINTEL = 5858, + OpArbitraryFloatSqrtINTEL = 5859, + OpArbitraryFloatLogINTEL = 5860, + OpArbitraryFloatLog2INTEL = 5861, + OpArbitraryFloatLog10INTEL = 5862, + OpArbitraryFloatLog1pINTEL = 5863, + OpArbitraryFloatExpINTEL = 5864, + OpArbitraryFloatExp2INTEL = 5865, + OpArbitraryFloatExp10INTEL = 5866, + OpArbitraryFloatExpm1INTEL = 5867, + OpArbitraryFloatSinINTEL = 5868, + OpArbitraryFloatCosINTEL = 5869, + OpArbitraryFloatSinCosINTEL = 5870, + OpArbitraryFloatSinPiINTEL = 5871, + OpArbitraryFloatCosPiINTEL = 5872, + OpArbitraryFloatASinINTEL = 5873, + OpArbitraryFloatASinPiINTEL = 5874, + OpArbitraryFloatACosINTEL = 5875, + OpArbitraryFloatACosPiINTEL = 5876, + OpArbitraryFloatATanINTEL = 5877, + OpArbitraryFloatATanPiINTEL = 5878, + OpArbitraryFloatATan2INTEL = 5879, + OpArbitraryFloatPowINTEL = 5880, + OpArbitraryFloatPowRINTEL = 5881, + OpArbitraryFloatPowNINTEL = 5882, + OpLoopControlINTEL = 5887, + OpFixedSqrtINTEL = 5923, + OpFixedRecipINTEL = 5924, + OpFixedRsqrtINTEL = 5925, + OpFixedSinINTEL = 5926, + OpFixedCosINTEL = 5927, + OpFixedSinCosINTEL = 5928, + OpFixedSinPiINTEL = 5929, + OpFixedCosPiINTEL = 5930, + OpFixedSinCosPiINTEL = 5931, + OpFixedLogINTEL = 5932, + OpFixedExpINTEL = 5933, + OpPtrCastToCrossWorkgroupINTEL = 5934, + OpCrossWorkgroupCastToPtrINTEL = 5938, + OpReadPipeBlockingINTEL = 5946, + OpWritePipeBlockingINTEL = 5947, + OpFPGARegINTEL = 5949, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpAtomicFAddEXT = 6035, + OpTypeBufferSurfaceINTEL = 6086, + OpTypeStructContinuedINTEL = 6090, + OpConstantCompositeContinuedINTEL = 6091, + OpSpecConstantCompositeContinuedINTEL = 6092, + OpMax = 0x7fffffff, +}; + +#ifdef SPV_ENABLE_UTILITY_CODE +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case OpNop: *hasResult = false; *hasResultType = false; break; + case OpUndef: *hasResult = true; *hasResultType = true; break; + case OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case OpSource: *hasResult = false; *hasResultType = false; break; + case OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case OpName: *hasResult = false; *hasResultType = false; break; + case OpMemberName: *hasResult = false; *hasResultType = false; break; + case OpString: *hasResult = true; *hasResultType = false; break; + case OpLine: *hasResult = false; *hasResultType = false; break; + case OpExtension: *hasResult = false; *hasResultType = false; break; + case OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case OpExtInst: *hasResult = true; *hasResultType = true; break; + case OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case OpCapability: *hasResult = false; *hasResultType = false; break; + case OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case OpTypeBool: *hasResult = true; *hasResultType = false; break; + case OpTypeInt: *hasResult = true; *hasResultType = false; break; + case OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case OpTypeVector: *hasResult = true; *hasResultType = false; break; + case OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case OpTypeImage: *hasResult = true; *hasResultType = false; break; + case OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case OpTypeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case OpTypePointer: *hasResult = true; *hasResultType = false; break; + case OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case OpTypePipe: *hasResult = true; *hasResultType = false; break; + case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpConstant: *hasResult = true; *hasResultType = true; break; + case OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case OpConstantNull: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case OpFunction: *hasResult = true; *hasResultType = true; break; + case OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case OpVariable: *hasResult = true; *hasResultType = true; break; + case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case OpLoad: *hasResult = true; *hasResultType = true; break; + case OpStore: *hasResult = false; *hasResultType = false; break; + case OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case OpAccessChain: *hasResult = true; *hasResultType = true; break; + case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpArrayLength: *hasResult = true; *hasResultType = true; break; + case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpDecorate: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case OpCopyObject: *hasResult = true; *hasResultType = true; break; + case OpTranspose: *hasResult = true; *hasResultType = true; break; + case OpSampledImage: *hasResult = true; *hasResultType = true; break; + case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageFetch: *hasResult = true; *hasResultType = true; break; + case OpImageGather: *hasResult = true; *hasResultType = true; break; + case OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageRead: *hasResult = true; *hasResultType = true; break; + case OpImageWrite: *hasResult = false; *hasResultType = false; break; + case OpImage: *hasResult = true; *hasResultType = true; break; + case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case OpUConvert: *hasResult = true; *hasResultType = true; break; + case OpSConvert: *hasResult = true; *hasResultType = true; break; + case OpFConvert: *hasResult = true; *hasResultType = true; break; + case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case OpBitcast: *hasResult = true; *hasResultType = true; break; + case OpSNegate: *hasResult = true; *hasResultType = true; break; + case OpFNegate: *hasResult = true; *hasResultType = true; break; + case OpIAdd: *hasResult = true; *hasResultType = true; break; + case OpFAdd: *hasResult = true; *hasResultType = true; break; + case OpISub: *hasResult = true; *hasResultType = true; break; + case OpFSub: *hasResult = true; *hasResultType = true; break; + case OpIMul: *hasResult = true; *hasResultType = true; break; + case OpFMul: *hasResult = true; *hasResultType = true; break; + case OpUDiv: *hasResult = true; *hasResultType = true; break; + case OpSDiv: *hasResult = true; *hasResultType = true; break; + case OpFDiv: *hasResult = true; *hasResultType = true; break; + case OpUMod: *hasResult = true; *hasResultType = true; break; + case OpSRem: *hasResult = true; *hasResultType = true; break; + case OpSMod: *hasResult = true; *hasResultType = true; break; + case OpFRem: *hasResult = true; *hasResultType = true; break; + case OpFMod: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case OpDot: *hasResult = true; *hasResultType = true; break; + case OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case OpAny: *hasResult = true; *hasResultType = true; break; + case OpAll: *hasResult = true; *hasResultType = true; break; + case OpIsNan: *hasResult = true; *hasResultType = true; break; + case OpIsInf: *hasResult = true; *hasResultType = true; break; + case OpIsFinite: *hasResult = true; *hasResultType = true; break; + case OpIsNormal: *hasResult = true; *hasResultType = true; break; + case OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case OpOrdered: *hasResult = true; *hasResultType = true; break; + case OpUnordered: *hasResult = true; *hasResultType = true; break; + case OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case OpSelect: *hasResult = true; *hasResultType = true; break; + case OpIEqual: *hasResult = true; *hasResultType = true; break; + case OpINotEqual: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpULessThan: *hasResult = true; *hasResultType = true; break; + case OpSLessThan: *hasResult = true; *hasResultType = true; break; + case OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpNot: *hasResult = true; *hasResultType = true; break; + case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case OpBitReverse: *hasResult = true; *hasResultType = true; break; + case OpBitCount: *hasResult = true; *hasResultType = true; break; + case OpDPdx: *hasResult = true; *hasResultType = true; break; + case OpDPdy: *hasResult = true; *hasResultType = true; break; + case OpFwidth: *hasResult = true; *hasResultType = true; break; + case OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case OpPhi: *hasResult = true; *hasResultType = true; break; + case OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case OpLabel: *hasResult = true; *hasResultType = false; break; + case OpBranch: *hasResult = false; *hasResultType = false; break; + case OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case OpSwitch: *hasResult = false; *hasResultType = false; break; + case OpKill: *hasResult = false; *hasResultType = false; break; + case OpReturn: *hasResult = false; *hasResultType = false; break; + case OpReturnValue: *hasResult = false; *hasResultType = false; break; + case OpUnreachable: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case OpGroupAll: *hasResult = true; *hasResultType = true; break; + case OpGroupAny: *hasResult = true; *hasResultType = true; break; + case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case OpReadPipe: *hasResult = true; *hasResultType = true; break; + case OpWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case OpNoLine: *hasResult = false; *hasResultType = false; break; + case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case OpSizeOf: *hasResult = true; *hasResultType = true; break; + case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case OpDecorateId: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case OpSDot: *hasResult = true; *hasResultType = true; break; + case OpUDot: *hasResult = true; *hasResultType = true; break; + case OpSUDot: *hasResult = true; *hasResultType = true; break; + case OpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case OpTraceNV: *hasResult = false; *hasResultType = false; break; + case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case OpExpectKHR: *hasResult = true; *hasResultType = true; break; + case OpDecorateString: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case OpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case OpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case OpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case OpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case OpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case OpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case OpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case OpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case OpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } +inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/spvIR.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/spvIR.h new file mode 100644 index 0000000..0969127 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/SPIRV/spvIR.h @@ -0,0 +1,520 @@ +// +// Copyright (C) 2014 LunarG, Inc. +// Copyright (C) 2015-2018 Google, Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +// SPIRV-IR +// +// Simple in-memory representation (IR) of SPIRV. Just for holding +// Each function's CFG of blocks. Has this hierarchy: +// - Module, which is a list of +// - Function, which is a list of +// - Block, which is a list of +// - Instruction +// + +#pragma once +#ifndef spvIR_H +#define spvIR_H + +#include "spirv.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + +class Block; +class Function; +class Module; + +const Id NoResult = 0; +const Id NoType = 0; + +const Decoration NoPrecision = DecorationMax; + +#ifdef __GNUC__ +# define POTENTIALLY_UNUSED __attribute__((unused)) +#else +# define POTENTIALLY_UNUSED +#endif + +POTENTIALLY_UNUSED +const MemorySemanticsMask MemorySemanticsAllMemory = + (MemorySemanticsMask)(MemorySemanticsUniformMemoryMask | + MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask | + MemorySemanticsImageMemoryMask); + +struct IdImmediate { + bool isId; // true if word is an Id, false if word is an immediate + unsigned word; + IdImmediate(bool i, unsigned w) : isId(i), word(w) {} +}; + +// +// SPIR-V IR instruction. +// + +class Instruction { +public: + Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { } + explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { } + virtual ~Instruction() {} + void addIdOperand(Id id) { + operands.push_back(id); + idOperand.push_back(true); + } + void addImmediateOperand(unsigned int immediate) { + operands.push_back(immediate); + idOperand.push_back(false); + } + void setImmediateOperand(unsigned idx, unsigned int immediate) { + assert(!idOperand[idx]); + operands[idx] = immediate; + } + + void addStringOperand(const char* str) + { + unsigned int word = 0; + unsigned int shiftAmount = 0; + char c; + + do { + c = *(str++); + word |= ((unsigned int)c) << shiftAmount; + shiftAmount += 8; + if (shiftAmount == 32) { + addImmediateOperand(word); + word = 0; + shiftAmount = 0; + } + } while (c != 0); + + // deal with partial last word + if (shiftAmount > 0) { + addImmediateOperand(word); + } + } + bool isIdOperand(int op) const { return idOperand[op]; } + void setBlock(Block* b) { block = b; } + Block* getBlock() const { return block; } + Op getOpCode() const { return opCode; } + int getNumOperands() const + { + assert(operands.size() == idOperand.size()); + return (int)operands.size(); + } + Id getResultId() const { return resultId; } + Id getTypeId() const { return typeId; } + Id getIdOperand(int op) const { + assert(idOperand[op]); + return operands[op]; + } + unsigned int getImmediateOperand(int op) const { + assert(!idOperand[op]); + return operands[op]; + } + + // Write out the binary form. + void dump(std::vector& out) const + { + // Compute the wordCount + unsigned int wordCount = 1; + if (typeId) + ++wordCount; + if (resultId) + ++wordCount; + wordCount += (unsigned int)operands.size(); + + // Write out the beginning of the instruction + out.push_back(((wordCount) << WordCountShift) | opCode); + if (typeId) + out.push_back(typeId); + if (resultId) + out.push_back(resultId); + + // Write out the operands + for (int op = 0; op < (int)operands.size(); ++op) + out.push_back(operands[op]); + } + +protected: + Instruction(const Instruction&); + Id resultId; + Id typeId; + Op opCode; + std::vector operands; // operands, both and immediates (both are unsigned int) + std::vector idOperand; // true for operands that are , false for immediates + Block* block; +}; + +// +// SPIR-V IR block. +// + +class Block { +public: + Block(Id id, Function& parent); + virtual ~Block() + { + } + + Id getId() { return instructions.front()->getResultId(); } + + Function& getParent() const { return parent; } + void addInstruction(std::unique_ptr inst); + void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} + void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } + const std::vector& getPredecessors() const { return predecessors; } + const std::vector& getSuccessors() const { return successors; } + const std::vector >& getInstructions() const { + return instructions; + } + const std::vector >& getLocalVariables() const { return localVariables; } + void setUnreachable() { unreachable = true; } + bool isUnreachable() const { return unreachable; } + // Returns the block's merge instruction, if one exists (otherwise null). + const Instruction* getMergeInstruction() const { + if (instructions.size() < 2) return nullptr; + const Instruction* nextToLast = (instructions.cend() - 2)->get(); + switch (nextToLast->getOpCode()) { + case OpSelectionMerge: + case OpLoopMerge: + return nextToLast; + default: + return nullptr; + } + return nullptr; + } + + // Change this block into a canonical dead merge block. Delete instructions + // as necessary. A canonical dead merge block has only an OpLabel and an + // OpUnreachable. + void rewriteAsCanonicalUnreachableMerge() { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + addInstruction(std::unique_ptr(new Instruction(OpUnreachable))); + } + // Change this block into a canonical dead continue target branching to the + // given header ID. Delete instructions as necessary. A canonical dead continue + // target has only an OpLabel and an unconditional branch back to the corresponding + // header. + void rewriteAsCanonicalUnreachableContinue(Block* header) { + assert(localVariables.empty()); + // Delete all instructions except for the label. + assert(instructions.size() > 0); + instructions.resize(1); + successors.clear(); + // Add OpBranch back to the header. + assert(header != nullptr); + Instruction* branch = new Instruction(OpBranch); + branch->addIdOperand(header->getId()); + addInstruction(std::unique_ptr(branch)); + successors.push_back(header); + } + + bool isTerminated() const + { + switch (instructions.back()->getOpCode()) { + case OpBranch: + case OpBranchConditional: + case OpSwitch: + case OpKill: + case OpTerminateInvocation: + case OpReturn: + case OpReturnValue: + case OpUnreachable: + return true; + default: + return false; + } + } + + void dump(std::vector& out) const + { + instructions[0]->dump(out); + for (int i = 0; i < (int)localVariables.size(); ++i) + localVariables[i]->dump(out); + for (int i = 1; i < (int)instructions.size(); ++i) + instructions[i]->dump(out); + } + +protected: + Block(const Block&); + Block& operator=(Block&); + + // To enforce keeping parent and ownership in sync: + friend Function; + + std::vector > instructions; + std::vector predecessors, successors; + std::vector > localVariables; + Function& parent; + + // track whether this block is known to be uncreachable (not necessarily + // true for all unreachable blocks, but should be set at least + // for the extraneous ones introduced by the builder). + bool unreachable; +}; + +// The different reasons for reaching a block in the inReadableOrder traversal. +enum ReachReason { + // Reachable from the entry block via transfers of control, i.e. branches. + ReachViaControlFlow = 0, + // A continue target that is not reachable via control flow. + ReachDeadContinue, + // A merge block that is not reachable via control flow. + ReachDeadMerge +}; + +// Traverses the control-flow graph rooted at root in an order suited for +// readable code generation. Invokes callback at every node in the traversal +// order. The callback arguments are: +// - the block, +// - the reason we reached the block, +// - if the reason was that block is an unreachable continue or unreachable merge block +// then the last parameter is the corresponding header block. +void inReadableOrder(Block* root, std::function callback); + +// +// SPIR-V IR Function. +// + +class Function { +public: + Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent); + virtual ~Function() + { + for (int i = 0; i < (int)parameterInstructions.size(); ++i) + delete parameterInstructions[i]; + + for (int i = 0; i < (int)blocks.size(); ++i) + delete blocks[i]; + } + Id getId() const { return functionInstruction.getResultId(); } + Id getParamId(int p) const { return parameterInstructions[p]->getResultId(); } + Id getParamType(int p) const { return parameterInstructions[p]->getTypeId(); } + + void addBlock(Block* block) { blocks.push_back(block); } + void removeBlock(Block* block) + { + auto found = find(blocks.begin(), blocks.end(), block); + assert(found != blocks.end()); + blocks.erase(found); + delete block; + } + + Module& getParent() const { return parent; } + Block* getEntryBlock() const { return blocks.front(); } + Block* getLastBlock() const { return blocks.back(); } + const std::vector& getBlocks() const { return blocks; } + void addLocalVariable(std::unique_ptr inst); + Id getReturnType() const { return functionInstruction.getTypeId(); } + Id getFuncId() const { return functionInstruction.getResultId(); } + void setReturnPrecision(Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionReturn = true; + } + Decoration getReturnPrecision() const + { return reducedPrecisionReturn ? DecorationRelaxedPrecision : NoPrecision; } + + void setDebugLineInfo(Id fileName, int line, int column) { + lineInstruction = std::unique_ptr{new Instruction(OpLine)}; + lineInstruction->addIdOperand(fileName); + lineInstruction->addImmediateOperand(line); + lineInstruction->addImmediateOperand(column); + } + bool hasDebugLineInfo() const { return lineInstruction != nullptr; } + + void setImplicitThis() { implicitThis = true; } + bool hasImplicitThis() const { return implicitThis; } + + void addParamPrecision(unsigned param, Decoration precision) + { + if (precision == DecorationRelaxedPrecision) + reducedPrecisionParams.insert(param); + } + Decoration getParamPrecision(unsigned param) const + { + return reducedPrecisionParams.find(param) != reducedPrecisionParams.end() ? + DecorationRelaxedPrecision : NoPrecision; + } + + void dump(std::vector& out) const + { + // OpLine + if (lineInstruction != nullptr) { + lineInstruction->dump(out); + } + + // OpFunction + functionInstruction.dump(out); + + // OpFunctionParameter + for (int p = 0; p < (int)parameterInstructions.size(); ++p) + parameterInstructions[p]->dump(out); + + // Blocks + inReadableOrder(blocks[0], [&out](const Block* b, ReachReason, Block*) { b->dump(out); }); + Instruction end(0, 0, OpFunctionEnd); + end.dump(out); + } + +protected: + Function(const Function&); + Function& operator=(Function&); + + Module& parent; + std::unique_ptr lineInstruction; + Instruction functionInstruction; + std::vector parameterInstructions; + std::vector blocks; + bool implicitThis; // true if this is a member function expecting to be passed a 'this' as the first argument + bool reducedPrecisionReturn; + std::set reducedPrecisionParams; // list of parameter indexes that need a relaxed precision arg +}; + +// +// SPIR-V IR Module. +// + +class Module { +public: + Module() {} + virtual ~Module() + { + // TODO delete things + } + + void addFunction(Function *fun) { functions.push_back(fun); } + + void mapInstruction(Instruction *instruction) + { + spv::Id resultId = instruction->getResultId(); + // map the instruction's result id + if (resultId >= idToInstruction.size()) + idToInstruction.resize(resultId + 16); + idToInstruction[resultId] = instruction; + } + + Instruction* getInstruction(Id id) const { return idToInstruction[id]; } + const std::vector& getFunctions() const { return functions; } + spv::Id getTypeId(Id resultId) const { + return idToInstruction[resultId] == nullptr ? NoType : idToInstruction[resultId]->getTypeId(); + } + StorageClass getStorageClass(Id typeId) const + { + assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer); + return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0); + } + + void dump(std::vector& out) const + { + for (int f = 0; f < (int)functions.size(); ++f) + functions[f]->dump(out); + } + +protected: + Module(const Module&); + std::vector functions; + + // map from result id to instruction having that result id + std::vector idToInstruction; + + // map from a result id to its type id +}; + +// +// Implementation (it's here due to circular type definitions). +// + +// Add both +// - the OpFunction instruction +// - all the OpFunctionParameter instructions +__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent) + : parent(parent), lineInstruction(nullptr), + functionInstruction(id, resultType, OpFunction), implicitThis(false), + reducedPrecisionReturn(false) +{ + // OpFunction + functionInstruction.addImmediateOperand(FunctionControlMaskNone); + functionInstruction.addIdOperand(functionType); + parent.mapInstruction(&functionInstruction); + parent.addFunction(this); + + // OpFunctionParameter + Instruction* typeInst = parent.getInstruction(functionType); + int numParams = typeInst->getNumOperands() - 1; + for (int p = 0; p < numParams; ++p) { + Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter); + parent.mapInstruction(param); + parameterInstructions.push_back(param); + } +} + +__inline void Function::addLocalVariable(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + blocks[0]->addLocalVariable(std::move(inst)); + parent.mapInstruction(raw_instruction); +} + +__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false) +{ + instructions.push_back(std::unique_ptr(new Instruction(id, NoType, OpLabel))); + instructions.back()->setBlock(this); + parent.getParent().mapInstruction(instructions.back().get()); +} + +__inline void Block::addInstruction(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + instructions.push_back(std::move(inst)); + raw_instruction->setBlock(this); + if (raw_instruction->getResultId()) + parent.getParent().mapInstruction(raw_instruction); +} + +} // end spv namespace + +#endif // spvIR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/build_info.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/build_info.h new file mode 100644 index 0000000..e711b20 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/glslang/build_info.h @@ -0,0 +1,62 @@ +// Copyright (C) 2020 The Khronos Group Inc. +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of The Khronos Group Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef GLSLANG_BUILD_INFO +#define GLSLANG_BUILD_INFO + +#define GLSLANG_VERSION_MAJOR 11 +#define GLSLANG_VERSION_MINOR 12 +#define GLSLANG_VERSION_PATCH 0 +#define GLSLANG_VERSION_FLAVOR "" + +#define GLSLANG_VERSION_GREATER_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) > (patch))))) + +#define GLSLANG_VERSION_GREATER_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH >= (patch)))))) + +#define GLSLANG_VERSION_LESS_THAN(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH) < (patch))))) + +#define GLSLANG_VERSION_LESS_OR_EQUAL_TO(major, minor, patch) \ + ((GLSLANG_VERSION_MAJOR) < (major) || ((major) == GLSLANG_VERSION_MAJOR && \ + ((GLSLANG_VERSION_MINOR) < (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ + (GLSLANG_VERSION_PATCH <= (patch)))))) + +#endif // GLSLANG_BUILD_INFO diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/allocator.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/allocator.h new file mode 100644 index 0000000..3a5ebca --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/allocator.h @@ -0,0 +1,448 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_ALLOCATOR_H +#define NCNN_ALLOCATOR_H + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +#include +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// the alignment of all the allocated buffers +#if NCNN_AVX512 +#define NCNN_MALLOC_ALIGN 64 +#elif NCNN_AVX +#define NCNN_MALLOC_ALIGN 32 +#else +#define NCNN_MALLOC_ALIGN 16 +#endif + +// we have some optimized kernels that may overread buffer a bit in loop +// it is common to interleave next-loop data load with arithmetic instructions +// allocating more bytes keeps us safe from SEGV_ACCERR failure +#define NCNN_MALLOC_OVERREAD 64 + +// Aligns a pointer to the specified number of bytes +// ptr Aligned pointer +// n Alignment size that must be a power of two +template +static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) +{ + return (_Tp*)(((size_t)ptr + n - 1) & -n); +} + +// Aligns a buffer size to the specified number of bytes +// The function returns the minimum number that is greater or equal to sz and is divisible by n +// sz Buffer size to align +// n Alignment size that must be a power of two +static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n) +{ + return (sz + n - 1) & -n; +} + +static NCNN_FORCEINLINE void* fastMalloc(size_t size) +{ +#if _MSC_VER + return _aligned_malloc(size, NCNN_MALLOC_ALIGN); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + void* ptr = 0; + if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) + ptr = 0; + return ptr; +#elif __ANDROID__ && __ANDROID_API__ < 17 + return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); +#else + unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); + if (!udata) + return 0; + unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN); + adata[-1] = udata; + return adata; +#endif +} + +static NCNN_FORCEINLINE void fastFree(void* ptr) +{ + if (ptr) + { +#if _MSC_VER + _aligned_free(ptr); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + free(ptr); +#elif __ANDROID__ && __ANDROID_API__ < 17 + free(ptr); +#else + unsigned char* udata = ((unsigned char**)ptr)[-1]; + free(udata); +#endif + } +} + +#if NCNN_THREADS +// exchange-add operation for atomic operations on reference counters +#if defined __riscv && !defined __riscv_atomic +// riscv target without A extension +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) +// atomic increment on the linux version of the Intel(tm) compiler +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) +#elif defined __GNUC__ +#if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) +#ifdef __ATOMIC_ACQ_REL +#define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) +#endif +#else +#if defined __ATOMIC_ACQ_REL && !defined __clang__ +// version for gcc >= 4.7 +#define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) +#endif +#endif +#elif defined _MSC_VER && !defined RC_INVOKED +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) +#else +// thread-unsafe branch +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif +#else // NCNN_THREADS +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif // NCNN_THREADS + +class NCNN_EXPORT Allocator +{ +public: + virtual ~Allocator(); + virtual void* fastMalloc(size_t size) = 0; + virtual void fastFree(void* ptr) = 0; +}; + +class PoolAllocatorPrivate; +class NCNN_EXPORT PoolAllocator : public Allocator +{ +public: + PoolAllocator(); + ~PoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + PoolAllocator(const PoolAllocator&); + PoolAllocator& operator=(const PoolAllocator&); + +private: + PoolAllocatorPrivate* const d; +}; + +class UnlockedPoolAllocatorPrivate; +class NCNN_EXPORT UnlockedPoolAllocator : public Allocator +{ +public: + UnlockedPoolAllocator(); + ~UnlockedPoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + UnlockedPoolAllocator(const UnlockedPoolAllocator&); + UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&); + +private: + UnlockedPoolAllocatorPrivate* const d; +}; + +#if NCNN_VULKAN + +class VulkanDevice; + +class NCNN_EXPORT VkBufferMemory +{ +public: + VkBuffer buffer; + + // the base offset assigned by allocator + size_t offset; + size_t capacity; + + VkDeviceMemory memory; + void* mapped_ptr; + + // buffer state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkPipelineStageFlags stage_flags; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkImageMemory +{ +public: + VkImage image; + VkImageView imageview; + + // underlying info assigned by allocator + int width; + int height; + int depth; + VkFormat format; + + VkDeviceMemory memory; + void* mapped_ptr; + + // the base offset assigned by allocator + size_t bind_offset; + size_t bind_capacity; + + // image state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkImageLayout image_layout; + mutable VkPipelineStageFlags stage_flags; + + // in-execution state, modified by command functions internally + mutable int command_refcount; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkAllocator +{ +public: + explicit VkAllocator(const VulkanDevice* _vkdev); + virtual ~VkAllocator(); + + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size) = 0; + virtual void fastFree(VkBufferMemory* ptr) = 0; + virtual int flush(VkBufferMemory* ptr); + virtual int invalidate(VkBufferMemory* ptr); + + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; + virtual void fastFree(VkImageMemory* ptr) = 0; + +public: + const VulkanDevice* vkdev; + uint32_t buffer_memory_type_index; + uint32_t image_memory_type_index; + uint32_t reserved_type_index; + bool mappable; + bool coherent; + +protected: + VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); + VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); + VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); + + VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); + VkImageView create_imageview(VkImage image, VkFormat format); +}; + +class VkBlobAllocatorPrivate; +class NCNN_EXPORT VkBlobAllocator : public VkAllocator +{ +public: + explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M + virtual ~VkBlobAllocator(); + +public: + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkBlobAllocator(const VkBlobAllocator&); + VkBlobAllocator& operator=(const VkBlobAllocator&); + +private: + VkBlobAllocatorPrivate* const d; +}; + +class VkWeightAllocatorPrivate; +class NCNN_EXPORT VkWeightAllocator : public VkAllocator +{ +public: + explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M + virtual ~VkWeightAllocator(); + +public: + // release all blocks immediately + virtual void clear(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightAllocator(const VkWeightAllocator&); + VkWeightAllocator& operator=(const VkWeightAllocator&); + +private: + VkWeightAllocatorPrivate* const d; +}; + +class VkStagingAllocatorPrivate; +class NCNN_EXPORT VkStagingAllocator : public VkAllocator +{ +public: + explicit VkStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkStagingAllocator(); + +public: + // ratio range 0 ~ 1 + // default cr = 0.75 + void set_size_compare_ratio(float scr); + + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkStagingAllocator(const VkStagingAllocator&); + VkStagingAllocator& operator=(const VkStagingAllocator&); + +private: + VkStagingAllocatorPrivate* const d; +}; + +class VkWeightStagingAllocatorPrivate; +class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator +{ +public: + explicit VkWeightStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkWeightStagingAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightStagingAllocator(const VkWeightStagingAllocator&); + VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&); + +private: + VkWeightStagingAllocatorPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator +{ +public: + VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); + virtual ~VkAndroidHardwareBufferImageAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&); + VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&); + +public: + int init(); + + int width() const; + int height() const; + uint64_t external_format() const; + +public: + AHardwareBuffer* hb; + AHardwareBuffer_Desc bufferDesc; + VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; + VkAndroidHardwareBufferPropertiesANDROID bufferProperties; + VkSamplerYcbcrConversionKHR samplerYcbcrConversion; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_ALLOCATOR_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/benchmark.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/benchmark.h new file mode 100644 index 0000000..3d5c0cd --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/benchmark.h @@ -0,0 +1,36 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BENCHMARK_H +#define NCNN_BENCHMARK_H + +#include "layer.h" +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +// get now timestamp in ms +NCNN_EXPORT double get_current_time(); + +#if NCNN_BENCHMARK + +NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); +NCNN_EXPORT void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); + +#endif // NCNN_BENCHMARK + +} // namespace ncnn + +#endif // NCNN_BENCHMARK_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/blob.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/blob.h new file mode 100644 index 0000000..c9f144f --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/blob.h @@ -0,0 +1,44 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BLOB_H +#define NCNN_BLOB_H + +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT Blob +{ +public: + // empty + Blob(); + +public: +#if NCNN_STRING + // blob name + std::string name; +#endif // NCNN_STRING + // layer index which produce this blob as output + int producer; + // layer index which need this blob as input + int consumer; + // shape hint + Mat shape; +}; + +} // namespace ncnn + +#endif // NCNN_BLOB_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/c_api.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/c_api.h new file mode 100644 index 0000000..ebf5db5 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/c_api.h @@ -0,0 +1,336 @@ +/* Tencent is pleased to support the open source community by making ncnn available. + * + * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +#ifndef NCNN_C_API_H +#define NCNN_C_API_H + +#include "platform.h" + +#if NCNN_C_API + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT const char* ncnn_version(); + +/* allocator api */ +typedef struct __ncnn_allocator_t* ncnn_allocator_t; +struct NCNN_EXPORT __ncnn_allocator_t +{ + void* pthis; + + void* (*fast_malloc)(ncnn_allocator_t allocator, size_t size); + void (*fast_free)(ncnn_allocator_t allocator, void* ptr); +}; + +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_pool_allocator(); +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator(); +NCNN_EXPORT void ncnn_allocator_destroy(ncnn_allocator_t allocator); + +/* option api */ +typedef struct __ncnn_option_t* ncnn_option_t; + +NCNN_EXPORT ncnn_option_t ncnn_option_create(); +NCNN_EXPORT void ncnn_option_destroy(ncnn_option_t opt); + +NCNN_EXPORT int ncnn_option_get_num_threads(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads); + +NCNN_EXPORT int ncnn_option_get_use_local_pool_allocator(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_local_pool_allocator(ncnn_option_t opt, int use_local_pool_allocator); + +NCNN_EXPORT void ncnn_option_set_blob_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_option_set_workspace_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute); + +/* mat api */ +typedef struct __ncnn_mat_t* ncnn_mat_t; + +NCNN_EXPORT ncnn_mat_t ncnn_mat_create(); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_destroy(ncnn_mat_t mat); + +NCNN_EXPORT void ncnn_mat_fill_float(ncnn_mat_t mat, float v); + +NCNN_EXPORT ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d, int c, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_mat_get_dims(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_w(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_h(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_d(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_c(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_elempack(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_cstep(const ncnn_mat_t mat); +NCNN_EXPORT void* ncnn_mat_get_data(const ncnn_mat_t mat); + +NCNN_EXPORT void* ncnn_mat_get_channel_data(const ncnn_mat_t mat, int c); + +#if NCNN_PIXEL + +/* mat pixel api */ +#define NCNN_MAT_PIXEL_RGB 1 +#define NCNN_MAT_PIXEL_BGR 2 +#define NCNN_MAT_PIXEL_GRAY 3 +#define NCNN_MAT_PIXEL_RGBA 4 +#define NCNN_MAT_PIXEL_BGRA 5 +#define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16)) +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int type, int stride); +NCNN_EXPORT void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride); + +#endif /* NCNN_PIXEL */ + +NCNN_EXPORT void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals); + +NCNN_EXPORT void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int elempack, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_option_t opt); + +/* blob api */ +typedef struct __ncnn_blob_t* ncnn_blob_t; + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_blob_get_name(const ncnn_blob_t blob); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_blob_get_producer(const ncnn_blob_t blob); +NCNN_EXPORT int ncnn_blob_get_consumer(const ncnn_blob_t blob); + +NCNN_EXPORT void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int* h, int* c); + +/* paramdict api */ +typedef struct __ncnn_paramdict_t* ncnn_paramdict_t; + +NCNN_EXPORT ncnn_paramdict_t ncnn_paramdict_create(); +NCNN_EXPORT void ncnn_paramdict_destroy(ncnn_paramdict_t pd); + +NCNN_EXPORT int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id); + +NCNN_EXPORT int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def); +NCNN_EXPORT float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float def); +NCNN_EXPORT ncnn_mat_t ncnn_paramdict_get_array(const ncnn_paramdict_t pd, int id, const ncnn_mat_t def); + +NCNN_EXPORT void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i); +NCNN_EXPORT void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f); +NCNN_EXPORT void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, const ncnn_mat_t v); + +/* datareader api */ +typedef struct __ncnn_datareader_t* ncnn_datareader_t; +struct NCNN_EXPORT __ncnn_datareader_t +{ + void* pthis; + +#if NCNN_STRING + int (*scan)(ncnn_datareader_t dr, const char* format, void* p); +#endif /* NCNN_STRING */ + size_t (*read)(ncnn_datareader_t dr, void* buf, size_t size); +}; + +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create(); +#if NCNN_STDIO +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp); +#endif /* NCNN_STDIO */ +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char** mem); +NCNN_EXPORT void ncnn_datareader_destroy(ncnn_datareader_t dr); + +/* modelbin api */ +typedef struct __ncnn_modelbin_t* ncnn_modelbin_t; +struct NCNN_EXPORT __ncnn_modelbin_t +{ + void* pthis; + + ncnn_mat_t (*load_1d)(const ncnn_modelbin_t mb, int w, int type); + ncnn_mat_t (*load_2d)(const ncnn_modelbin_t mb, int w, int h, int type); + ncnn_mat_t (*load_3d)(const ncnn_modelbin_t mb, int w, int h, int c, int type); +}; + +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_datareader_t dr); +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* weights, int n); +NCNN_EXPORT void ncnn_modelbin_destroy(ncnn_modelbin_t mb); + +/* layer api */ +typedef struct __ncnn_layer_t* ncnn_layer_t; +struct NCNN_EXPORT __ncnn_layer_t +{ + void* pthis; + + int (*load_param)(ncnn_layer_t layer, const ncnn_paramdict_t pd); + int (*load_model)(ncnn_layer_t layer, const ncnn_modelbin_t mb); + + int (*create_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + int (*destroy_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + + int (*forward_1)(const ncnn_layer_t layer, const ncnn_mat_t bottom_blob, ncnn_mat_t* top_blob, const ncnn_option_t opt); + int (*forward_n)(const ncnn_layer_t layer, const ncnn_mat_t* bottom_blobs, int n, ncnn_mat_t* top_blobs, int n2, const ncnn_option_t opt); + + int (*forward_inplace_1)(const ncnn_layer_t layer, ncnn_mat_t bottom_top_blob, const ncnn_option_t opt); + int (*forward_inplace_n)(const ncnn_layer_t layer, ncnn_mat_t* bottom_top_blobs, int n, const ncnn_option_t opt); +}; + +NCNN_EXPORT ncnn_layer_t ncnn_layer_create(); +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex); +#if NCNN_STRING +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_type(const char* type); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_layer_destroy(ncnn_layer_t layer); + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_name(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_typeindex(const ncnn_layer_t layer); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_type(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_inplace(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_packing(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_image_storage(const ncnn_layer_t layer); + +NCNN_EXPORT void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_image_storage(ncnn_layer_t layer, int enable); + +NCNN_EXPORT int ncnn_layer_get_bottom_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i); +NCNN_EXPORT int ncnn_layer_get_top_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_top(const ncnn_layer_t layer, int i); + +NCNN_EXPORT void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); +NCNN_EXPORT void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); + +/* layer factory function */ +typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata); +typedef void (*ncnn_layer_destroyer_t)(ncnn_layer_t layer, void* userdata); + +typedef struct __ncnn_net_custom_layer_factory_t* ncnn_net_custom_layer_factory_t; +struct __ncnn_net_custom_layer_factory_t +{ + ncnn_layer_creator_t creator; + ncnn_layer_destroyer_t destroyer; + void* userdata; + ncnn_net_custom_layer_factory_t next; +}; + +/* net api */ +typedef struct __ncnn_net_t* ncnn_net_t; +struct __ncnn_net_t +{ + void* pthis; + + ncnn_net_custom_layer_factory_t custom_layer_factory; +}; + +NCNN_EXPORT ncnn_net_t ncnn_net_create(); +NCNN_EXPORT void ncnn_net_destroy(ncnn_net_t net); + +NCNN_EXPORT ncnn_option_t ncnn_net_get_option(ncnn_net_t net); +NCNN_EXPORT void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* type, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typeindex, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param(ncnn_net_t net, const char* path); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin(ncnn_net_t net, const char* path); +NCNN_EXPORT int ncnn_net_load_model(ncnn_net_t net, const char* path); +#endif /* NCNN_STDIO */ + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem); +#endif /* NCNN_STRING */ +#endif /* NCNN_STDIO */ +NCNN_EXPORT int ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned char* mem); +NCNN_EXPORT int ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +NCNN_EXPORT int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader_t dr); + +NCNN_EXPORT void ncnn_net_clear(ncnn_net_t net); + +NCNN_EXPORT int ncnn_net_get_input_count(const ncnn_net_t net); +NCNN_EXPORT int ncnn_net_get_output_count(const ncnn_net_t net); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_net_get_input_name(const ncnn_net_t net, int i); +NCNN_EXPORT const char* ncnn_net_get_output_name(const ncnn_net_t net, int i); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_get_input_index(const ncnn_net_t net, int i); +NCNN_EXPORT int ncnn_net_get_output_index(const ncnn_net_t net, int i); + +/* extractor api */ +typedef struct __ncnn_extractor_t* ncnn_extractor_t; + +NCNN_EXPORT ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net); +NCNN_EXPORT void ncnn_extractor_destroy(ncnn_extractor_t ex); + +NCNN_EXPORT void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_mat_t* mat); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* NCNN_C_API */ + +#endif /* NCNN_C_API_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/command.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/command.h new file mode 100644 index 0000000..337d085 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/command.h @@ -0,0 +1,136 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_COMMAND_H +#define NCNN_COMMAND_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +namespace ncnn { + +class Pipeline; +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class ImportAndroidHardwareBufferPipeline; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API +class VkComputePrivate; +class NCNN_EXPORT VkCompute +{ +public: + explicit VkCompute(const VulkanDevice* vkdev); + virtual ~VkCompute(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_download(const VkMat& src, Mat& dst, const Option& opt); + + void record_download(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkImageMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkImageMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const Mat& dispatcher); + +#if NCNN_BENCHMARK + void record_write_timestamp(uint32_t query); +#endif // NCNN_BENCHMARK + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst); + + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + int submit_and_wait(); + + int reset(); + +#if NCNN_BENCHMARK + int create_query_pool(uint32_t query_count); + + int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); +#endif // NCNN_BENCHMARK + +protected: + const VulkanDevice* vkdev; + + void barrier_readwrite(const VkMat& binding); + void barrier_readwrite(const VkImageMat& binding); + void barrier_readonly(const VkImageMat& binding); + +private: + VkComputePrivate* const d; +}; + +class VkTransferPrivate; +class NCNN_EXPORT VkTransfer +{ +public: + explicit VkTransfer(const VulkanDevice* vkdev); + virtual ~VkTransfer(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + int submit_and_wait(); + +protected: + const VulkanDevice* vkdev; + +private: + VkTransferPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_COMMAND_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/cpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/cpu.h new file mode 100644 index 0000000..d03e7e8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/cpu.h @@ -0,0 +1,173 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_CPU_H +#define NCNN_CPU_H + +#include + +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#endif +#if defined __ANDROID__ || defined __linux__ +#include // cpu_set_t +#endif + +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT CpuSet +{ +public: + CpuSet(); + void enable(int cpu); + void disable(int cpu); + void disable_all(); + bool is_enabled(int cpu) const; + int num_enabled() const; + +public: +#if (defined _WIN32 && !(defined __MINGW32__)) + ULONG_PTR mask; +#endif +#if defined __ANDROID__ || defined __linux__ + cpu_set_t cpu_set; +#endif +#if __APPLE__ + unsigned int policy; +#endif +}; + +// test optional cpu features +// edsp = armv7 edsp +NCNN_EXPORT int cpu_support_arm_edsp(); +// neon = armv7 neon or aarch64 asimd +NCNN_EXPORT int cpu_support_arm_neon(); +// vfpv4 = armv7 fp16 + fma +NCNN_EXPORT int cpu_support_arm_vfpv4(); +// asimdhp = aarch64 asimd half precision +NCNN_EXPORT int cpu_support_arm_asimdhp(); +// asimddp = aarch64 asimd dot product +NCNN_EXPORT int cpu_support_arm_asimddp(); +// asimdfhm = aarch64 asimd fhm +NCNN_EXPORT int cpu_support_arm_asimdfhm(); +// bf16 = aarch64 bf16 +NCNN_EXPORT int cpu_support_arm_bf16(); +// i8mm = aarch64 i8mm +NCNN_EXPORT int cpu_support_arm_i8mm(); +// sve = aarch64 sve +NCNN_EXPORT int cpu_support_arm_sve(); +// sve2 = aarch64 sve2 +NCNN_EXPORT int cpu_support_arm_sve2(); +// svebf16 = aarch64 svebf16 +NCNN_EXPORT int cpu_support_arm_svebf16(); +// svei8mm = aarch64 svei8mm +NCNN_EXPORT int cpu_support_arm_svei8mm(); +// svef32mm = aarch64 svef32mm +NCNN_EXPORT int cpu_support_arm_svef32mm(); + +// avx = x86 avx +NCNN_EXPORT int cpu_support_x86_avx(); +// fma = x86 fma +NCNN_EXPORT int cpu_support_x86_fma(); +// xop = x86 xop +NCNN_EXPORT int cpu_support_x86_xop(); +// f16c = x86 f16c +NCNN_EXPORT int cpu_support_x86_f16c(); +// avx2 = x86 avx2 + fma + f16c +NCNN_EXPORT int cpu_support_x86_avx2(); +// avx_vnni = x86 avx vnni +NCNN_EXPORT int cpu_support_x86_avx_vnni(); +// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl +NCNN_EXPORT int cpu_support_x86_avx512(); +// avx512_vnni = x86 avx512 vnni +NCNN_EXPORT int cpu_support_x86_avx512_vnni(); +// avx512_bf16 = x86 avx512 bf16 +NCNN_EXPORT int cpu_support_x86_avx512_bf16(); +// avx512_fp16 = x86 avx512 fp16 +NCNN_EXPORT int cpu_support_x86_avx512_fp16(); + +// lsx = loongarch lsx +NCNN_EXPORT int cpu_support_loongarch_lsx(); +// lasx = loongarch lasx +NCNN_EXPORT int cpu_support_loongarch_lasx(); + +// msa = mips mas +NCNN_EXPORT int cpu_support_mips_msa(); +// mmi = loongson mmi +NCNN_EXPORT int cpu_support_loongson_mmi(); + +// v = riscv vector +NCNN_EXPORT int cpu_support_riscv_v(); +// zfh = riscv half-precision float +NCNN_EXPORT int cpu_support_riscv_zfh(); +// vlenb = riscv vector length in bytes +NCNN_EXPORT int cpu_riscv_vlenb(); + +// cpu info +NCNN_EXPORT int get_cpu_count(); +NCNN_EXPORT int get_little_cpu_count(); +NCNN_EXPORT int get_big_cpu_count(); + +NCNN_EXPORT int get_physical_cpu_count(); +NCNN_EXPORT int get_physical_little_cpu_count(); +NCNN_EXPORT int get_physical_big_cpu_count(); + +// cpu l2 varies from 64k to 1M, but l3 can be zero +NCNN_EXPORT int get_cpu_level2_cache_size(); +NCNN_EXPORT int get_cpu_level3_cache_size(); + +// bind all threads on little clusters if powersave enabled +// affects HMP arch cpu like ARM big.LITTLE +// only implemented on android at the moment +// switching powersave is expensive and not thread-safe +// 0 = all cores enabled(default) +// 1 = only little clusters enabled +// 2 = only big clusters enabled +// return 0 if success for setter function +NCNN_EXPORT int get_cpu_powersave(); +NCNN_EXPORT int set_cpu_powersave(int powersave); + +// convenient wrapper +NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); + +// set explicit thread affinity +NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); + +// misc function wrapper for openmp routines +NCNN_EXPORT int get_omp_num_threads(); +NCNN_EXPORT void set_omp_num_threads(int num_threads); + +NCNN_EXPORT int get_omp_dynamic(); +NCNN_EXPORT void set_omp_dynamic(int dynamic); + +NCNN_EXPORT int get_omp_thread_num(); + +NCNN_EXPORT int get_kmp_blocktime(); +NCNN_EXPORT void set_kmp_blocktime(int time_ms); + +// need to flush denormals on Intel Chipset. +// Other architectures such as ARM can be added as needed. +// 0 = DAZ OFF, FTZ OFF +// 1 = DAZ ON , FTZ OFF +// 2 = DAZ OFF, FTZ ON +// 3 = DAZ ON, FTZ ON +NCNN_EXPORT int get_flush_denormals(); +NCNN_EXPORT int set_flush_denormals(int flush_denormals); + +} // namespace ncnn + +#endif // NCNN_CPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/datareader.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/datareader.h new file mode 100644 index 0000000..ed2aba3 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/datareader.h @@ -0,0 +1,122 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_DATAREADER_H +#define NCNN_DATAREADER_H + +#include "platform.h" +#if NCNN_STDIO +#include +#endif + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// data read wrapper +class NCNN_EXPORT DataReader +{ +public: + DataReader(); + virtual ~DataReader(); + +#if NCNN_STRING + // parse plain param text + // return 1 if scan success + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + + // read binary param and model data + // return bytes read + virtual size_t read(void* buf, size_t size) const; + + // get model data reference + // return bytes referenced + virtual size_t reference(size_t size, const void** buf) const; +}; + +#if NCNN_STDIO +class DataReaderFromStdioPrivate; +class NCNN_EXPORT DataReaderFromStdio : public DataReader +{ +public: + explicit DataReaderFromStdio(FILE* fp); + virtual ~DataReaderFromStdio(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromStdio(const DataReaderFromStdio&); + DataReaderFromStdio& operator=(const DataReaderFromStdio&); + +private: + DataReaderFromStdioPrivate* const d; +}; +#endif // NCNN_STDIO + +class DataReaderFromMemoryPrivate; +class NCNN_EXPORT DataReaderFromMemory : public DataReader +{ +public: + explicit DataReaderFromMemory(const unsigned char*& mem); + virtual ~DataReaderFromMemory(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + virtual size_t reference(size_t size, const void** buf) const; + +private: + DataReaderFromMemory(const DataReaderFromMemory&); + DataReaderFromMemory& operator=(const DataReaderFromMemory&); + +private: + DataReaderFromMemoryPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +class DataReaderFromAndroidAssetPrivate; +class NCNN_EXPORT DataReaderFromAndroidAsset : public DataReader +{ +public: + explicit DataReaderFromAndroidAsset(AAsset* asset); + virtual ~DataReaderFromAndroidAsset(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromAndroidAsset(const DataReaderFromAndroidAsset&); + DataReaderFromAndroidAsset& operator=(const DataReaderFromAndroidAsset&); + +private: + DataReaderFromAndroidAssetPrivate* const d; +}; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +} // namespace ncnn + +#endif // NCNN_DATAREADER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/gpu.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/gpu.h new file mode 100644 index 0000000..1cda182 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/gpu.h @@ -0,0 +1,360 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_GPU_H +#define NCNN_GPU_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +#include "vulkan_header_fix.h" + +namespace ncnn { + +// instance +NCNN_EXPORT int create_gpu_instance(); +NCNN_EXPORT void destroy_gpu_instance(); + +// instance extension capability +extern int support_VK_KHR_external_memory_capabilities; +extern int support_VK_KHR_get_physical_device_properties2; +extern int support_VK_KHR_get_surface_capabilities2; +extern int support_VK_KHR_surface; +extern int support_VK_EXT_debug_utils; +#if __ANDROID_API__ >= 26 +extern int support_VK_KHR_android_surface; +#endif // __ANDROID_API__ >= 26 + +// VK_KHR_external_memory_capabilities +extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR; + +// VK_KHR_get_physical_device_properties2 +extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; +extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; +extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; +extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; +extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR; + +// VK_KHR_get_surface_capabilities2 +extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; + +// VK_KHR_surface +extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; +extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; +extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; +extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; + +#if __ANDROID_API__ >= 26 +// VK_KHR_android_surface +extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; +#endif // __ANDROID_API__ >= 26 + +// VK_NV_cooperative_matrix +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; + +// get info +NCNN_EXPORT int get_gpu_count(); +NCNN_EXPORT int get_default_gpu_index(); + +class GpuInfoPrivate; +class NCNN_EXPORT GpuInfo +{ +public: + explicit GpuInfo(); + virtual ~GpuInfo(); + + // vulkan physical device + VkPhysicalDevice physical_device() const; + + // memory properties + const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const; + + // info + uint32_t api_version() const; + uint32_t driver_version() const; + uint32_t vendor_id() const; + uint32_t device_id() const; + const char* device_name() const; + uint8_t* pipeline_cache_uuid() const; + + // 0 = discrete gpu + // 1 = integrated gpu + // 2 = virtual gpu + // 3 = cpu + int type() const; + + // hardware limit + uint32_t max_shared_memory_size() const; + uint32_t max_workgroup_count_x() const; + uint32_t max_workgroup_count_y() const; + uint32_t max_workgroup_count_z() const; + uint32_t max_workgroup_invocations() const; + uint32_t max_workgroup_size_x() const; + uint32_t max_workgroup_size_y() const; + uint32_t max_workgroup_size_z() const; + size_t memory_map_alignment() const; + size_t buffer_offset_alignment() const; + size_t non_coherent_atom_size() const; + size_t buffer_image_granularity() const; + uint32_t max_image_dimension_1d() const; + uint32_t max_image_dimension_2d() const; + uint32_t max_image_dimension_3d() const; + float timestamp_period() const; + + // runtime + uint32_t compute_queue_family_index() const; + uint32_t graphics_queue_family_index() const; + uint32_t transfer_queue_family_index() const; + + uint32_t compute_queue_count() const; + uint32_t graphics_queue_count() const; + uint32_t transfer_queue_count() const; + + // property + bool unified_compute_transfer_queue() const; + + // subgroup + uint32_t subgroup_size() const; + bool support_subgroup_basic() const; + bool support_subgroup_vote() const; + bool support_subgroup_ballot() const; + bool support_subgroup_shuffle() const; + + // bug is not feature + bool bug_storage_buffer_no_l1() const; + bool bug_corrupted_online_pipeline_cache() const; + bool bug_buffer_image_load_zero() const; + + // but sometimes bug is a feature + bool bug_implicit_fp16_arithmetic() const; + + // fp16 and int8 feature + bool support_fp16_packed() const; + bool support_fp16_storage() const; + bool support_fp16_arithmetic() const; + bool support_int8_packed() const; + bool support_int8_storage() const; + bool support_int8_arithmetic() const; + + // ycbcr conversion feature + bool support_ycbcr_conversion() const; + + // cooperative matrix feature + bool support_cooperative_matrix() const; + bool support_cooperative_matrix_16_8_8() const; + + // extension capability + int support_VK_KHR_8bit_storage() const; + int support_VK_KHR_16bit_storage() const; + int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_create_renderpass2() const; + int support_VK_KHR_dedicated_allocation() const; + int support_VK_KHR_descriptor_update_template() const; + int support_VK_KHR_external_memory() const; + int support_VK_KHR_get_memory_requirements2() const; + int support_VK_KHR_maintenance1() const; + int support_VK_KHR_maintenance2() const; + int support_VK_KHR_maintenance3() const; + int support_VK_KHR_multiview() const; + int support_VK_KHR_portability_subset() const; + int support_VK_KHR_push_descriptor() const; + int support_VK_KHR_sampler_ycbcr_conversion() const; + int support_VK_KHR_shader_float16_int8() const; + int support_VK_KHR_shader_float_controls() const; + int support_VK_KHR_storage_buffer_storage_class() const; + int support_VK_KHR_swapchain() const; + int support_VK_EXT_descriptor_indexing() const; + int support_VK_EXT_memory_budget() const; + int support_VK_EXT_queue_family_foreign() const; +#if __ANDROID_API__ >= 26 + int support_VK_ANDROID_external_memory_android_hardware_buffer() const; +#endif // __ANDROID_API__ >= 26 + int support_VK_NV_cooperative_matrix() const; + +private: + GpuInfo(const GpuInfo&); + GpuInfo& operator=(const GpuInfo&); + +private: + friend int create_gpu_instance(); + GpuInfoPrivate* const d; +}; + +NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); + +class VkAllocator; +class VkCompute; +class Option; +class PipelineCache; +class VulkanDevicePrivate; +class NCNN_EXPORT VulkanDevice +{ +public: + VulkanDevice(int device_index = get_default_gpu_index()); + ~VulkanDevice(); + + const GpuInfo& info; + + VkDevice vkdevice() const; + + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; + + // with fixed workgroup size + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const; + + // helper for creating pipeline + int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; + int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; + int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, VkPipeline* pipeline) const; + int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + + uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; + bool is_mappable(uint32_t memory_type_index) const; + bool is_coherent(uint32_t memory_type_index) const; + + VkQueue acquire_queue(uint32_t queue_family_index) const; + void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; + + // allocator on this device + VkAllocator* acquire_blob_allocator() const; + void reclaim_blob_allocator(VkAllocator* allocator) const; + + VkAllocator* acquire_staging_allocator() const; + void reclaim_staging_allocator(VkAllocator* allocator) const; + + // immutable sampler for texelfetch + const VkSampler* immutable_texelfetch_sampler() const; + + // dummy buffer image + VkMat get_dummy_buffer() const; + VkImageMat get_dummy_image() const; + VkImageMat get_dummy_image_readonly() const; + + // pipeline cache on this device + const PipelineCache* get_pipeline_cache() const; + + // test image allocation + bool shape_support_image_storage(const Mat& shape) const; + + // current gpu heap memory budget in MB + uint32_t get_heap_budget() const; + + // utility operator + void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + + // VK_KHR_bind_memory2 + PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; + PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + + // VK_KHR_create_renderpass2 + PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; + PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; + PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; + PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; + + // VK_KHR_descriptor_update_template + PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; + PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; + PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; + + // VK_KHR_get_memory_requirements2 + PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; + PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; + PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; + + // VK_KHR_maintenance1 + PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; + + // VK_KHR_maintenance3 + PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; + + // VK_KHR_push_descriptor + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; + PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; + + // VK_KHR_sampler_ycbcr_conversion + PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; + PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; + + // VK_KHR_swapchain + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkQueuePresentKHR vkQueuePresentKHR; + +#if __ANDROID_API__ >= 26 + // VK_ANDROID_external_memory_android_hardware_buffer + PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; + PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; +#endif // __ANDROID_API__ >= 26 + +protected: + // device extension + int init_device_extension(); + +private: + VulkanDevice(const VulkanDevice&); + VulkanDevice& operator=(const VulkanDevice&); + +private: + VulkanDevicePrivate* const d; +}; + +NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); + +// online spirv compilation +NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector& spirv); + +// info from spirv +class NCNN_EXPORT ShaderInfo +{ +public: + int specialization_count; + int binding_count; + int push_constant_count; + + // 0 = null + // 1 = storage buffer + // 2 = storage image + // 3 = combined image sampler + int binding_types[16]; // 16 is large enough I think ... + + int reserved_0; + int reserved_1; + int reserved_2; + int reserved_3; +}; + +NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info); + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_GPU_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer.h new file mode 100644 index 0000000..d02f65b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer.h @@ -0,0 +1,214 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_H +#define NCNN_LAYER_H + +#include "mat.h" +#include "modelbin.h" +#include "option.h" +#include "paramdict.h" +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include "command.h" +#include "pipeline.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +class NCNN_EXPORT Layer +{ +public: + // empty + Layer(); + // virtual destructor + virtual ~Layer(); + + // load layer specific parameter from parsed dict + // return 0 if success + virtual int load_param(const ParamDict& pd); + + // load layer specific weight data from model binary + // return 0 if success + virtual int load_model(const ModelBin& mb); + + // layer implementation specific setup + // return 0 if success + virtual int create_pipeline(const Option& opt); + + // layer implementation specific clean + // return 0 if success + virtual int destroy_pipeline(const Option& opt); + +public: + // one input and one output blob + bool one_blob_only; + + // support inplace inference + bool support_inplace; + + // support vulkan compute + bool support_vulkan; + + // accept input blob with packed storage + bool support_packing; + + // accept bf16 + bool support_bf16_storage; + + // accept fp16 + bool support_fp16_storage; + + // accept int8 + bool support_int8_storage; + + // shader image storage + bool support_image_storage; + + // shader tensor storage + bool support_tensor_storage; + + bool support_reserved_00; + + bool support_reserved_0; + bool support_reserved_1; + bool support_reserved_2; + bool support_reserved_3; + bool support_reserved_4; + bool support_reserved_5; + bool support_reserved_6; + bool support_reserved_7; + bool support_reserved_8; + bool support_reserved_9; + + // feature disabled set + int featmask; + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +#if NCNN_VULKAN +public: + // upload weight blob from host to device + virtual int upload_model(VkTransfer& cmd, const Option& opt); + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + +public: + // assigned immediately after creating this layer + const VulkanDevice* vkdev; +#endif // NCNN_VULKAN + +public: + // custom user data + void* userdata; + // layer type index + int typeindex; +#if NCNN_STRING + // layer type name + std::string type; + // layer name + std::string name; +#endif // NCNN_STRING + // blob index which this layer needs as input + std::vector bottoms; + // blob index which this layer produces as output + std::vector tops; + // shape hint + std::vector bottom_shapes; + std::vector top_shapes; +}; + +// layer factory function +typedef Layer* (*layer_creator_func)(void*); +typedef void (*layer_destroyer_func)(Layer*, void*); + +struct layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; +}; + +struct custom_layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + +#if NCNN_STRING +// get layer type from type name +NCNN_EXPORT int layer_to_index(const char* type); +// create layer from type name +NCNN_EXPORT Layer* create_layer(const char* type); +#endif // NCNN_STRING +// create layer from layer type +NCNN_EXPORT Layer* create_layer(int index); + +#define DEFINE_LAYER_CREATOR(name) \ + ::ncnn::Layer* name##_layer_creator(void* /*userdata*/) \ + { \ + return new name; \ + } + +#define DEFINE_LAYER_DESTROYER(name) \ + void name##_layer_destroyer(::ncnn::Layer* layer, void* /*userdata*/) \ + { \ + delete layer; \ + } + +} // namespace ncnn + +#endif // NCNN_LAYER_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_shader_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_shader_type.h new file mode 100644 index 0000000..c143e7d --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_shader_type.h @@ -0,0 +1,29 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_SHADER_TYPE_H +#define NCNN_LAYER_SHADER_TYPE_H + +namespace ncnn { + +namespace LayerShaderType { +enum LayerShaderType +{ +#include "layer_shader_type_enum.h" +}; +} // namespace LayerShaderType + +} // namespace ncnn + +#endif // NCNN_LAYER_SHADER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_shader_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_shader_type_enum.h new file mode 100644 index 0000000..bad5605 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_shader_type_enum.h @@ -0,0 +1,369 @@ +// Layer Shader Enum header +// +// This file is auto-generated by cmake, don't edit it. + +absval = 0, +absval_pack4 = 1, +absval_pack8 = 2, +batchnorm = 3, +batchnorm_pack4 = 4, +batchnorm_pack8 = 5, +concat = 6, +concat_pack4 = 7, +concat_pack4to1 = 8, +concat_pack8 = 9, +concat_pack8to1 = 10, +concat_pack8to4 = 11, +convolution = 12, +convolution_1x1s1d1 = 13, +convolution_3x3s1d1_winograd23_transform_input = 14, +convolution_3x3s1d1_winograd23_transform_output = 15, +convolution_3x3s1d1_winograd43_transform_input = 16, +convolution_3x3s1d1_winograd43_transform_output = 17, +convolution_3x3s1d1_winograd_gemm = 18, +convolution_gemm = 19, +convolution_pack1to4 = 20, +convolution_pack1to4_1x1s1d1 = 21, +convolution_pack1to4_3x3s1d1_winograd_gemm = 22, +convolution_pack1to4_gemm = 23, +convolution_pack1to8 = 24, +convolution_pack1to8_1x1s1d1 = 25, +convolution_pack1to8_3x3s1d1_winograd_gemm = 26, +convolution_pack1to8_gemm = 27, +convolution_pack4 = 28, +convolution_pack4_1x1s1d1 = 29, +convolution_pack4_1x1s1d1_cm_16_8_8 = 30, +convolution_pack4_3x3s1d1_winograd23_transform_input = 31, +convolution_pack4_3x3s1d1_winograd23_transform_output = 32, +convolution_pack4_3x3s1d1_winograd43_transform_input = 33, +convolution_pack4_3x3s1d1_winograd43_transform_output = 34, +convolution_pack4_3x3s1d1_winograd_gemm = 35, +convolution_pack4_3x3s1d1_winograd_gemm_cm_16_8_8 = 36, +convolution_pack4_gemm = 37, +convolution_pack4_gemm_cm_16_8_8 = 38, +convolution_pack4to1 = 39, +convolution_pack4to1_1x1s1d1 = 40, +convolution_pack4to1_3x3s1d1_winograd_gemm = 41, +convolution_pack4to1_gemm = 42, +convolution_pack4to8 = 43, +convolution_pack4to8_1x1s1d1 = 44, +convolution_pack4to8_3x3s1d1_winograd_gemm = 45, +convolution_pack4to8_gemm = 46, +convolution_pack8 = 47, +convolution_pack8_1x1s1d1 = 48, +convolution_pack8_3x3s1d1_winograd23_transform_input = 49, +convolution_pack8_3x3s1d1_winograd23_transform_output = 50, +convolution_pack8_3x3s1d1_winograd43_transform_input = 51, +convolution_pack8_3x3s1d1_winograd43_transform_output = 52, +convolution_pack8_3x3s1d1_winograd_gemm = 53, +convolution_pack8_gemm = 54, +convolution_pack8to1 = 55, +convolution_pack8to1_1x1s1d1 = 56, +convolution_pack8to1_3x3s1d1_winograd_gemm = 57, +convolution_pack8to1_gemm = 58, +convolution_pack8to4 = 59, +convolution_pack8to4_1x1s1d1 = 60, +convolution_pack8to4_3x3s1d1_winograd_gemm = 61, +convolution_pack8to4_gemm = 62, +crop = 63, +crop_pack1to4 = 64, +crop_pack1to8 = 65, +crop_pack4 = 66, +crop_pack4to1 = 67, +crop_pack4to8 = 68, +crop_pack8 = 69, +crop_pack8to1 = 70, +crop_pack8to4 = 71, +deconvolution = 72, +deconvolution_col2im = 73, +deconvolution_gemm = 74, +deconvolution_pack1to4 = 75, +deconvolution_pack1to4_gemm = 76, +deconvolution_pack1to8 = 77, +deconvolution_pack1to8_gemm = 78, +deconvolution_pack4 = 79, +deconvolution_pack4_col2im = 80, +deconvolution_pack4_gemm = 81, +deconvolution_pack4_gemm_cm_16_8_8 = 82, +deconvolution_pack4to1 = 83, +deconvolution_pack4to1_gemm = 84, +deconvolution_pack4to8 = 85, +deconvolution_pack4to8_gemm = 86, +deconvolution_pack8 = 87, +deconvolution_pack8_col2im = 88, +deconvolution_pack8_gemm = 89, +deconvolution_pack8to1 = 90, +deconvolution_pack8to1_gemm = 91, +deconvolution_pack8to4 = 92, +deconvolution_pack8to4_gemm = 93, +dropout = 94, +dropout_pack4 = 95, +dropout_pack8 = 96, +eltwise = 97, +eltwise_pack4 = 98, +eltwise_pack8 = 99, +elu = 100, +elu_pack4 = 101, +elu_pack8 = 102, +flatten = 103, +flatten_pack1to4 = 104, +flatten_pack1to8 = 105, +flatten_pack4 = 106, +flatten_pack4to8 = 107, +flatten_pack8 = 108, +innerproduct = 109, +innerproduct_gemm = 110, +innerproduct_gemm_wp1to4 = 111, +innerproduct_gemm_wp1to8 = 112, +innerproduct_gemm_wp4 = 113, +innerproduct_gemm_wp4to1 = 114, +innerproduct_gemm_wp4to8 = 115, +innerproduct_gemm_wp8 = 116, +innerproduct_gemm_wp8to1 = 117, +innerproduct_gemm_wp8to4 = 118, +innerproduct_pack1to4 = 119, +innerproduct_pack1to8 = 120, +innerproduct_pack4 = 121, +innerproduct_pack4to1 = 122, +innerproduct_pack4to8 = 123, +innerproduct_pack8 = 124, +innerproduct_pack8to1 = 125, +innerproduct_pack8to4 = 126, +innerproduct_reduce_sum8 = 127, +innerproduct_reduce_sum8_pack4 = 128, +innerproduct_reduce_sum8_pack8 = 129, +innerproduct_sum8 = 130, +innerproduct_sum8_pack1to4 = 131, +innerproduct_sum8_pack1to8 = 132, +innerproduct_sum8_pack4 = 133, +innerproduct_sum8_pack4to1 = 134, +innerproduct_sum8_pack4to8 = 135, +innerproduct_sum8_pack8 = 136, +innerproduct_sum8_pack8to1 = 137, +innerproduct_sum8_pack8to4 = 138, +lrn_norm = 139, +lrn_norm_across_channel_pack4 = 140, +lrn_norm_across_channel_pack8 = 141, +lrn_norm_within_channel_pack4 = 142, +lrn_norm_within_channel_pack8 = 143, +lrn_square_pad = 144, +lrn_square_pad_across_channel_pack4 = 145, +lrn_square_pad_across_channel_pack8 = 146, +lrn_square_pad_within_channel_pack4 = 147, +lrn_square_pad_within_channel_pack8 = 148, +pooling = 149, +pooling_adaptive = 150, +pooling_adaptive_pack4 = 151, +pooling_adaptive_pack8 = 152, +pooling_global = 153, +pooling_global_pack4 = 154, +pooling_global_pack8 = 155, +pooling_pack4 = 156, +pooling_pack8 = 157, +prelu = 158, +prelu_pack4 = 159, +prelu_pack8 = 160, +relu = 161, +relu_pack4 = 162, +relu_pack8 = 163, +reshape = 164, +reshape_pack1to4 = 165, +reshape_pack1to8 = 166, +reshape_pack4 = 167, +reshape_pack4to1 = 168, +reshape_pack4to8 = 169, +reshape_pack8 = 170, +reshape_pack8to1 = 171, +reshape_pack8to4 = 172, +scale = 173, +scale_pack4 = 174, +scale_pack8 = 175, +sigmoid = 176, +sigmoid_pack4 = 177, +sigmoid_pack8 = 178, +slice = 179, +slice_pack1to4 = 180, +slice_pack1to8 = 181, +slice_pack4 = 182, +slice_pack4to8 = 183, +slice_pack8 = 184, +softmax_div_sum = 185, +softmax_div_sum_pack4 = 186, +softmax_div_sum_pack8 = 187, +softmax_exp_sub_max = 188, +softmax_exp_sub_max_pack4 = 189, +softmax_exp_sub_max_pack8 = 190, +softmax_reduce_max = 191, +softmax_reduce_max_pack4 = 192, +softmax_reduce_max_pack8 = 193, +softmax_reduce_sum = 194, +softmax_reduce_sum_pack4 = 195, +softmax_reduce_sum_pack8 = 196, +tanh = 197, +tanh_pack4 = 198, +tanh_pack8 = 199, +binaryop = 200, +binaryop_broadcast_inner = 201, +binaryop_broadcast_inner_pack4 = 202, +binaryop_broadcast_inner_pack8 = 203, +binaryop_broadcast_outer = 204, +binaryop_broadcast_outer_pack4 = 205, +binaryop_broadcast_outer_pack8 = 206, +binaryop_pack4 = 207, +binaryop_pack8 = 208, +unaryop = 209, +unaryop_pack4 = 210, +unaryop_pack8 = 211, +convolutiondepthwise = 212, +convolutiondepthwise_group = 213, +convolutiondepthwise_group_pack1to4 = 214, +convolutiondepthwise_group_pack1to8 = 215, +convolutiondepthwise_group_pack4 = 216, +convolutiondepthwise_group_pack4to1 = 217, +convolutiondepthwise_group_pack4to8 = 218, +convolutiondepthwise_group_pack8 = 219, +convolutiondepthwise_group_pack8to1 = 220, +convolutiondepthwise_group_pack8to4 = 221, +convolutiondepthwise_pack4 = 222, +convolutiondepthwise_pack8 = 223, +padding = 224, +padding_3d = 225, +padding_3d_pack4 = 226, +padding_3d_pack8 = 227, +padding_pack1to4 = 228, +padding_pack1to8 = 229, +padding_pack4 = 230, +padding_pack4to1 = 231, +padding_pack4to8 = 232, +padding_pack8 = 233, +padding_pack8to1 = 234, +padding_pack8to4 = 235, +normalize_coeffs = 236, +normalize_coeffs_pack4 = 237, +normalize_coeffs_pack8 = 238, +normalize_norm = 239, +normalize_norm_pack4 = 240, +normalize_norm_pack8 = 241, +normalize_reduce_sum4_fp16_to_fp32 = 242, +normalize_reduce_sum4_fp16_to_fp32_pack4 = 243, +normalize_reduce_sum4_fp16_to_fp32_pack8 = 244, +normalize_reduce_sum4_fp32 = 245, +normalize_reduce_sum4_fp32_pack4 = 246, +normalize_reduce_sum4_fp32_pack8 = 247, +permute = 248, +permute_pack1to4 = 249, +permute_pack1to8 = 250, +permute_pack4 = 251, +permute_pack4to1 = 252, +permute_pack4to8 = 253, +permute_pack8 = 254, +permute_pack8to1 = 255, +permute_pack8to4 = 256, +priorbox = 257, +priorbox_mxnet = 258, +interp = 259, +interp_bicubic = 260, +interp_bicubic_coeffs = 261, +interp_bicubic_pack4 = 262, +interp_bicubic_pack8 = 263, +interp_pack4 = 264, +interp_pack8 = 265, +deconvolutiondepthwise = 266, +deconvolutiondepthwise_group = 267, +deconvolutiondepthwise_group_pack1to4 = 268, +deconvolutiondepthwise_group_pack1to8 = 269, +deconvolutiondepthwise_group_pack4 = 270, +deconvolutiondepthwise_group_pack4to1 = 271, +deconvolutiondepthwise_group_pack4to8 = 272, +deconvolutiondepthwise_group_pack8 = 273, +deconvolutiondepthwise_group_pack8to1 = 274, +deconvolutiondepthwise_group_pack8to4 = 275, +deconvolutiondepthwise_pack4 = 276, +deconvolutiondepthwise_pack8 = 277, +shufflechannel = 278, +shufflechannel_pack4 = 279, +shufflechannel_pack8 = 280, +instancenorm_coeffs = 281, +instancenorm_coeffs_pack4 = 282, +instancenorm_coeffs_pack8 = 283, +instancenorm_norm = 284, +instancenorm_norm_pack4 = 285, +instancenorm_norm_pack8 = 286, +instancenorm_reduce_mean = 287, +instancenorm_reduce_mean_pack4 = 288, +instancenorm_reduce_mean_pack8 = 289, +instancenorm_reduce_sum4_fp16_to_fp32 = 290, +instancenorm_reduce_sum4_fp16_to_fp32_pack4 = 291, +instancenorm_reduce_sum4_fp16_to_fp32_pack8 = 292, +instancenorm_reduce_sum4_fp32 = 293, +instancenorm_reduce_sum4_fp32_pack4 = 294, +instancenorm_reduce_sum4_fp32_pack8 = 295, +instancenorm_sub_mean_square = 296, +instancenorm_sub_mean_square_pack4 = 297, +instancenorm_sub_mean_square_pack8 = 298, +clip = 299, +clip_pack4 = 300, +clip_pack8 = 301, +reorg = 302, +reorg_pack1to4 = 303, +reorg_pack1to8 = 304, +reorg_pack4 = 305, +reorg_pack4to8 = 306, +reorg_pack8 = 307, +packing = 308, +packing_fp16_to_fp32 = 309, +packing_fp32_to_fp16 = 310, +packing_pack1to4 = 311, +packing_pack1to4_fp16_to_fp32 = 312, +packing_pack1to4_fp32_to_fp16 = 313, +packing_pack1to8 = 314, +packing_pack1to8_fp16_to_fp32 = 315, +packing_pack1to8_fp32_to_fp16 = 316, +packing_pack4 = 317, +packing_pack4_fp16_to_fp32 = 318, +packing_pack4_fp32_to_fp16 = 319, +packing_pack4to1 = 320, +packing_pack4to1_fp16_to_fp32 = 321, +packing_pack4to1_fp32_to_fp16 = 322, +packing_pack4to8 = 323, +packing_pack4to8_fp16_to_fp32 = 324, +packing_pack4to8_fp32_to_fp16 = 325, +packing_pack8 = 326, +packing_pack8_fp16_to_fp32 = 327, +packing_pack8_fp32_to_fp16 = 328, +packing_pack8to1 = 329, +packing_pack8to1_fp16_to_fp32 = 330, +packing_pack8to1_fp32_to_fp16 = 331, +packing_pack8to4 = 332, +packing_pack8to4_fp16_to_fp32 = 333, +packing_pack8to4_fp32_to_fp16 = 334, +cast_fp16_to_fp32 = 335, +cast_fp16_to_fp32_pack4 = 336, +cast_fp16_to_fp32_pack8 = 337, +cast_fp32_to_fp16 = 338, +cast_fp32_to_fp16_pack4 = 339, +cast_fp32_to_fp16_pack8 = 340, +hardsigmoid = 341, +hardsigmoid_pack4 = 342, +hardsigmoid_pack8 = 343, +hardswish = 344, +hardswish_pack4 = 345, +hardswish_pack8 = 346, +pixelshuffle = 347, +pixelshuffle_pack4 = 348, +pixelshuffle_pack4to1 = 349, +pixelshuffle_pack8 = 350, +pixelshuffle_pack8to1 = 351, +pixelshuffle_pack8to4 = 352, +deepcopy = 353, +deepcopy_pack4 = 354, +deepcopy_pack8 = 355, +mish = 356, +mish_pack4 = 357, +mish_pack8 = 358, +swish = 359, +swish_pack4 = 360, +swish_pack8 = 361, +convert_ycbcr = 362, +vulkan_activation = 363, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_type.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_type.h new file mode 100644 index 0000000..511c714 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_type.h @@ -0,0 +1,30 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_TYPE_H +#define NCNN_LAYER_TYPE_H + +namespace ncnn { + +namespace LayerType { +enum LayerType +{ +#include "layer_type_enum.h" + CustomBit = (1 << 8), +}; +} // namespace LayerType + +} // namespace ncnn + +#endif // NCNN_LAYER_TYPE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_type_enum.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_type_enum.h new file mode 100644 index 0000000..9c1be52 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/layer_type_enum.h @@ -0,0 +1,105 @@ +// Layer Type Enum header +// +// This file is auto-generated by cmake, don't edit it. + +AbsVal = 0, +ArgMax = 1, +BatchNorm = 2, +Bias = 3, +BNLL = 4, +Concat = 5, +Convolution = 6, +Crop = 7, +Deconvolution = 8, +Dropout = 9, +Eltwise = 10, +ELU = 11, +Embed = 12, +Exp = 13, +Flatten = 14, +InnerProduct = 15, +Input = 16, +Log = 17, +LRN = 18, +MemoryData = 19, +MVN = 20, +Pooling = 21, +Power = 22, +PReLU = 23, +Proposal = 24, +Reduction = 25, +ReLU = 26, +Reshape = 27, +ROIPooling = 28, +Scale = 29, +Sigmoid = 30, +Slice = 31, +Softmax = 32, +Split = 33, +SPP = 34, +TanH = 35, +Threshold = 36, +Tile = 37, +RNN = 38, +LSTM = 39, +BinaryOp = 40, +UnaryOp = 41, +ConvolutionDepthWise = 42, +Padding = 43, +Squeeze = 44, +ExpandDims = 45, +Normalize = 46, +Permute = 47, +PriorBox = 48, +DetectionOutput = 49, +Interp = 50, +DeconvolutionDepthWise = 51, +ShuffleChannel = 52, +InstanceNorm = 53, +Clip = 54, +Reorg = 55, +YoloDetectionOutput = 56, +Quantize = 57, +Dequantize = 58, +Yolov3DetectionOutput = 59, +PSROIPooling = 60, +ROIAlign = 61, +Packing = 62, +Requantize = 63, +Cast = 64, +HardSigmoid = 65, +SELU = 66, +HardSwish = 67, +Noop = 68, +PixelShuffle = 69, +DeepCopy = 70, +Mish = 71, +StatisticsPooling = 72, +Swish = 73, +Gemm = 74, +GroupNorm = 75, +LayerNorm = 76, +Softplus = 77, +GRU = 78, +MultiHeadAttention = 79, +GELU = 80, +Convolution1D = 81, +Pooling1D = 82, +ConvolutionDepthWise1D = 83, +Convolution3D = 84, +ConvolutionDepthWise3D = 85, +Pooling3D = 86, +MatMul = 87, +Deconvolution1D = 88, +DeconvolutionDepthWise1D = 89, +Deconvolution3D = 90, +DeconvolutionDepthWise3D = 91, +Einsum = 92, +DeformableConv2D = 93, +GLU = 94, +Fold = 95, +Unfold = 96, +GridSample = 97, +CumulativeSum = 98, +CopyTo = 99, + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/mat.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/mat.h new file mode 100644 index 0000000..c6f59ef --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/mat.h @@ -0,0 +1,1843 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MAT_H +#define NCNN_MAT_H + +#include +#include +#if __ARM_NEON +#include +#endif +#if __SSE2__ +#include +#if __AVX__ +#include +#endif +#endif +#if __mips_msa +#include +#endif +#if __loongarch_sx +#include +#endif +#if __riscv_vector +#include +#include "cpu.h" // cpu_riscv_vlenb() +#endif + +#include "allocator.h" +#include "option.h" +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PIXEL +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + +namespace ncnn { + +#if NCNN_VULKAN +class VkMat; +class VkImageMat; +#endif // NCNN_VULKAN + +// the three dimension matrix +class NCNN_EXPORT Mat +{ +public: + // empty + Mat(); + // vec + Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // image + Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // dim + Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // cube + Mat(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // packed vec + Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed image + Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed dim + Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed cube + Mat(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // copy + Mat(const Mat& m); + // external vec + Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external image + Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external dim + Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external packed vec + Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed image + Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed dim + Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // release + ~Mat(); + // assign + Mat& operator=(const Mat& m); + // set all + void fill(float v); + void fill(int v); +#if __ARM_NEON + void fill(float32x4_t _v); + void fill(uint16x4_t _v); + void fill(int32x4_t _v); + void fill(int32x4_t _v0, int32x4_t _v1); +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + void fill(float16x4_t _v); + void fill(float16x8_t _v); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ + void fill(__m512 _v); +#endif // __AVX512F__ + void fill(__m256 _v, int i = 0); +#endif // __AVX__ + void fill(__m128 _v); + void fill(__m128i _v); +#endif // __SSE2__ +#if __mips_msa + void fill(v4f32 _v); +#endif // __mips_msa +#if __loongarch_sx + void fill(__m128 _v); +#endif //__loongarch_sx +#if __riscv_vector + void fill(vfloat32m1_t _v); + void fill(vuint16m1_t _v); + void fill(vint8m1_t _v); +#if __riscv_zfh + void fill(vfloat16m1_t _v); +#endif // __riscv_zfh +#endif // __riscv_vector + template + void fill(T v); + // deep copy + Mat clone(Allocator* allocator = 0) const; + // deep copy from other mat, inplace + void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0); + // reshape vec + Mat reshape(int w, Allocator* allocator = 0) const; + // reshape image + Mat reshape(int w, int h, Allocator* allocator = 0) const; + // reshape dim + Mat reshape(int w, int h, int c, Allocator* allocator = 0) const; + // reshape cube + Mat reshape(int w, int h, int d, int c, Allocator* allocator = 0) const; + // allocate vec + void create(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate image + void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate dim + void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate like + void create_like(const Mat& m, Allocator* allocator = 0); +#if NCNN_VULKAN + // allocate like + void create_like(const VkMat& m, Allocator* allocator = 0); + // allocate like + void create_like(const VkImageMat& im, Allocator* allocator = 0); +#endif // NCNN_VULKAN + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // data reference + Mat channel(int c); + const Mat channel(int c) const; + Mat depth(int z); + const Mat depth(int z) const; + float* row(int y); + const float* row(int y) const; + template + T* row(int y); + template + const T* row(int y) const; + + // range reference + Mat channel_range(int c, int channels); + const Mat channel_range(int c, int channels) const; + Mat depth_range(int z, int depths); + const Mat depth_range(int z, int depths) const; + Mat row_range(int y, int rows); + const Mat row_range(int y, int rows) const; + Mat range(int x, int n); + const Mat range(int x, int n) const; + + // access raw data + template + operator T*(); + template + operator const T*() const; + + // convenient access float vec element + float& operator[](size_t i); + const float& operator[](size_t i) const; + +#if NCNN_PIXEL + enum PixelType + { + PIXEL_CONVERT_SHIFT = 16, + PIXEL_FORMAT_MASK = 0x0000ffff, + PIXEL_CONVERT_MASK = 0xffff0000, + + PIXEL_RGB = 1, + PIXEL_BGR = 2, + PIXEL_GRAY = 3, + PIXEL_RGBA = 4, + PIXEL_BGRA = 5, + + PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + }; + // convenient construct from pixel data + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0); + // convenient construct from pixel data with stride(bytes-per-row) parameter + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi with stride(bytes-per-row) parameter + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + + // convenient export to pixel data + void to_pixels(unsigned char* pixels, int type) const; + // convenient export to pixel data with stride(bytes-per-row) parameter + void to_pixels(unsigned char* pixels, int type, int stride) const; + // convenient export to pixel data and resize to specific size + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const; + // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 + // convenient construct from android Bitmap + static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0); + // convenient construct from android Bitmap and resize to specific size + static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from android Bitmap roi + static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from android Bitmap roi and resize to specific size + static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient export to android Bitmap and resize to the android Bitmap size + void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + + // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip + void substract_mean_normalize(const float* mean_vals, const float* norm_vals); + + // convenient construct from half precision floating point data + static Mat from_float16(const unsigned short* data, int size); + + // pointer to the data + void* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + Allocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +#if NCNN_VULKAN + +// the three dimension matrix, vulkan version +class NCNN_EXPORT VkMat +{ +public: + // empty + VkMat(); + // vec + VkMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkMat(const VkMat& m); + // external vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkMat(); + // assign + VkMat& operator=(const VkMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkBuffer buffer() const; + size_t buffer_offset() const; + size_t buffer_capacity() const; + + // device buffer + VkBufferMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +class NCNN_EXPORT VkImageMat +{ +public: + // empty + VkImageMat(); + // vec + VkImageMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkImageMat(const VkImageMat& m); + // external vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkImageMat(); + // assign + VkImageMat& operator=(const VkImageMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkImage image() const; + VkImageView imageview() const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + // convenient construct from android hardware buffer + static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + // device image + VkImageMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; +}; + +// type for vulkan specialization constant and push constant +union vk_specialization_type +{ + int i; + float f; + uint32_t u32; +}; +union vk_constant_type +{ + int i; + float f; +}; +#endif // NCNN_VULKAN + +// misc function +#if NCNN_PIXEL +// convert yuv420sp(nv21) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv12) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv21) to rgb with half resize, the faster approximate version +NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// image pixel bilinear resize +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +// image pixel bilinear resize with stride(bytes-per-row) parameter +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +// image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +#endif // NCNN_PIXEL +#if NCNN_PIXEL_ROTATE +// type is the from type, 6 means rotating from 6 to 1 +// +// 1 2 3 4 5 6 7 8 +// +// 888888 888888 88 88 8888888888 88 88 8888888888 +// 88 88 88 88 88 88 88 88 88 88 88 88 +// 8888 8888 8888 8888 88 8888888888 8888888888 88 +// 88 88 88 88 +// 88 88 888888 888888 +// +// ref http://sylvana.net/jpegcrop/exif_orientation.html +// image pixel kanna rotate +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +// image pixel kanna rotate with stride(bytes-per-row) parameter +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +// image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +#endif // NCNN_PIXEL_ROTATE +#if NCNN_PIXEL_AFFINE +// resolve affine transform matrix from rotation angle, scale factor and x y offset +NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm); +// resolve affine transform matrix from two set of points, num_point must be >= 2 +NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm); +// resolve the inversion affine transform matrix +NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv); +// image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +#endif // NCNN_PIXEL_AFFINE +#if NCNN_PIXEL_DRAWING +// draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw line, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// resolve text bounding box size +NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h); +// draw ascii printables and newline, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +#endif // NCNN_PIXEL_DRAWING + +// type conversion +// convert float to half precision floating point +NCNN_EXPORT unsigned short float32_to_float16(float value); +// convert half precision floating point to float +NCNN_EXPORT float float16_to_float32(unsigned short value); +// convert float to brain half +NCNN_EXPORT NCNN_FORCEINLINE unsigned short float32_to_bfloat16(float value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.f = value; + return tmp.u >> 16; +} +// convert brain half to float +NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.u = value << 16; + return tmp.f; +} + +// mat process +enum BorderType +{ + BORDER_CONSTANT = 0, + BORDER_REPLICATE = 1, + BORDER_REFLECT = 2, + BORDER_TRANSPARENT = -233, +}; +NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt = Option()); +NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option()); +NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option()); +NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option()); +NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option()); + +NCNN_FORCEINLINE Mat::Mat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(const Mat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c), cstep(m.cstep) +{ + addref(); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::~Mat() +{ + release(); +} + +NCNN_FORCEINLINE void Mat::fill(float _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + + int i = 0; +#if __ARM_NEON + float32x4_t _c = vdupq_n_f32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_f32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +NCNN_FORCEINLINE void Mat::fill(int _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + + int i = 0; +#if __ARM_NEON + int32x4_t _c = vdupq_n_s32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_s32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +#if __ARM_NEON +NCNN_FORCEINLINE void Mat::fill(float32x4_t _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vst1q_f32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(uint16x4_t _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vst1_u16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v0, int32x4_t _v1) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v0); + vst1q_s32(ptr + 4, _v1); + ptr += 8; + } +} +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +NCNN_FORCEINLINE void Mat::fill(float16x4_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1_f16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(float16x8_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1q_f16(ptr, _v); + ptr += 8; + } +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON + +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m512 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm512_storeu_ps(ptr, _v); + ptr += 16; + } +} +#endif // __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m256 _v, int _i) +{ + // old gcc cannot overload __m128 and __m256 type + // add a dummy int parameter for different mangled function symbol + (void)_i; + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm256_storeu_ps(ptr, _v); + ptr += 8; + } +} +#endif // __AVX__ +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm_storeu_ps(ptr, _v); + ptr += 4; + } +} +NCNN_FORCEINLINE void Mat::fill(__m128i _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + _mm_store_si128((__m128i*)ptr, _v); + ptr += 8; + } +} +#endif // __SSE2__ + +#if __mips_msa +NCNN_FORCEINLINE void Mat::fill(v4f32 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __msa_st_w((v4i32)_v, ptr, 0); + ptr += 4; + } +} +#endif // __mips_msa + +#if __loongarch_sx +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __lsx_vst(_v, ptr, 0); + ptr += 4; + } +} +#endif // __loongarch_sx +#if __riscv_vector +NCNN_FORCEINLINE void Mat::fill(vfloat32m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 4; + const size_t vl = vsetvl_e32m1(packn); + + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vse32_v_f32m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vuint16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vse16_v_u16m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 1; + const size_t vl = vsetvl_e8m1(packn); + + int size = (int)total(); + signed char* ptr = (signed char*)data; + for (int i = 0; i < size; i++) + { + vse8_v_i8m1(ptr, _v, vl); + ptr += packn; + } +} +#if __riscv_zfh +NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vse16_v_f16m1(ptr, _v, vl); + ptr += packn; + } +} +#endif // __riscv_zfh +#endif // __riscv_vector + +template +NCNN_FORCEINLINE void Mat::fill(T _v) +{ + int size = (int)total(); + T* ptr = (T*)data; + for (int i = 0; i < size; i++) + { + ptr[i] = _v; + } +} + +NCNN_FORCEINLINE Mat& Mat::operator=(const Mat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE void Mat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void Mat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator) + allocator->fastFree(data); + else + fastFree(data); + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool Mat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t Mat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int Mat::elembits() const +{ + return elempack ? static_cast(elemsize * 8) / elempack : 0; +} + +NCNN_FORCEINLINE Mat Mat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE Mat Mat::channel(int _c) +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel(int _c) const +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth(int z) +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::depth(int z) const +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE float* Mat::row(int y) +{ + return (float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE const float* Mat::row(int y) const +{ + return (const float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE T* Mat::row(int y) +{ + return (T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE const T* Mat::row(int y) const +{ + return (const T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE Mat Mat::channel_range(int _c, int channels) +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel_range(int _c, int channels) const +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth_range(int z, int depths) +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::depth_range(int z, int depths) const +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::row_range(int y, int rows) +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::row_range(int y, int rows) const +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE Mat Mat::range(int x, int n) +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::range(int x, int n) const +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +template +NCNN_FORCEINLINE Mat::operator T*() +{ + return (T*)data; +} + +template +NCNN_FORCEINLINE Mat::operator const T*() const +{ + return (const T*)data; +} + +NCNN_FORCEINLINE float& Mat::operator[](size_t i) +{ + return ((float*)data)[i]; +} + +NCNN_FORCEINLINE const float& Mat::operator[](size_t i) const +{ + return ((const float*)data)[i]; +} + +#if NCNN_VULKAN + +NCNN_FORCEINLINE VkMat::VkMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(const VkMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); + + cstep = m.cstep; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::~VkMat() +{ + release(); +} + +NCNN_FORCEINLINE VkMat& VkMat::operator=(const VkMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE Mat VkMat::mapped() const +{ + if (!allocator->mappable) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkMat::mapped_ptr() const +{ + if (!allocator->mappable) + return 0; + + return (unsigned char*)data->mapped_ptr + data->offset; +} + +NCNN_FORCEINLINE void VkMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkMat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int VkMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkBuffer VkMat::buffer() const +{ + return data->buffer; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_offset() const +{ + return data->offset; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_capacity() const +{ + return data->capacity; +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(const VkImageMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::~VkImageMat() +{ + release(); +} + +NCNN_FORCEINLINE VkImageMat& VkImageMat::operator=(const VkImageMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + return *this; +} + +NCNN_FORCEINLINE Mat VkImageMat::mapped() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkImageMat::mapped_ptr() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return 0; + + return (unsigned char*)data->mapped_ptr + data->bind_offset; +} + +NCNN_FORCEINLINE void VkImageMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkImageMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkImageMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkImageMat::total() const +{ + return w * h * d * c; +} + +NCNN_FORCEINLINE int VkImageMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkImageMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkImage VkImageMat::image() const +{ + return data->image; +} + +NCNN_FORCEINLINE VkImageView VkImageMat::imageview() const +{ + return data->imageview; +} + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_MAT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/modelbin.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/modelbin.h new file mode 100644 index 0000000..15d2b9c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/modelbin.h @@ -0,0 +1,80 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MODELBIN_H +#define NCNN_MODELBIN_H + +#include "mat.h" + +namespace ncnn { + +class DataReader; +class NCNN_EXPORT ModelBin +{ +public: + ModelBin(); + virtual ~ModelBin(); + // element type + // 0 = auto + // 1 = float32 + // 2 = float16 + // 3 = int8 + // load vec + virtual Mat load(int w, int type) const = 0; + // load image + virtual Mat load(int w, int h, int type) const; + // load dim + virtual Mat load(int w, int h, int c, int type) const; + // load cube + virtual Mat load(int w, int h, int d, int c, int type) const; +}; + +class ModelBinFromDataReaderPrivate; +class NCNN_EXPORT ModelBinFromDataReader : public ModelBin +{ +public: + explicit ModelBinFromDataReader(const DataReader& dr); + virtual ~ModelBinFromDataReader(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromDataReader(const ModelBinFromDataReader&); + ModelBinFromDataReader& operator=(const ModelBinFromDataReader&); + +private: + ModelBinFromDataReaderPrivate* const d; +}; + +class ModelBinFromMatArrayPrivate; +class NCNN_EXPORT ModelBinFromMatArray : public ModelBin +{ +public: + // construct from weight blob array + explicit ModelBinFromMatArray(const Mat* weights); + virtual ~ModelBinFromMatArray(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromMatArray(const ModelBinFromMatArray&); + ModelBinFromMatArray& operator=(const ModelBinFromMatArray&); + +private: + ModelBinFromMatArrayPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_MODELBIN_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/ncnn_export.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/ncnn_export.h new file mode 100644 index 0000000..7343310 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/ncnn_export.h @@ -0,0 +1,42 @@ + +#ifndef NCNN_EXPORT_H +#define NCNN_EXPORT_H + +#ifdef NCNN_STATIC_DEFINE +# define NCNN_EXPORT +# define NCNN_NO_EXPORT +#else +# ifndef NCNN_EXPORT +# ifdef ncnn_EXPORTS + /* We are building this library */ +# define NCNN_EXPORT +# else + /* We are using this library */ +# define NCNN_EXPORT +# endif +# endif + +# ifndef NCNN_NO_EXPORT +# define NCNN_NO_EXPORT +# endif +#endif + +#ifndef NCNN_DEPRECATED +# define NCNN_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef NCNN_DEPRECATED_EXPORT +# define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED +#endif + +#ifndef NCNN_DEPRECATED_NO_EXPORT +# define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef NCNN_NO_DEPRECATED +# define NCNN_NO_DEPRECATED +# endif +#endif + +#endif /* NCNN_EXPORT_H */ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/net.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/net.h new file mode 100644 index 0000000..9407042 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/net.h @@ -0,0 +1,272 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_NET_H +#define NCNN_NET_H + +#include "blob.h" +#include "layer.h" +#include "mat.h" +#include "option.h" +#include "platform.h" + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +#if NCNN_VULKAN +class VkCompute; +#endif // NCNN_VULKAN +class DataReader; +class Extractor; +class NetPrivate; +class NCNN_EXPORT Net +{ +public: + // empty init + Net(); + // clear and destroy + virtual ~Net(); + +public: + // option can be changed before loading + Option opt; + +#if NCNN_VULKAN + // set gpu device by index + void set_vulkan_device(int device_index); + + // set gpu device by device handle, no owner transfer + void set_vulkan_device(const VulkanDevice* vkdev); + + const VulkanDevice* vulkan_device() const; +#endif // NCNN_VULKAN + +#if NCNN_STRING + // register custom layer by layer type name + // return 0 if success + int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + virtual int custom_layer_to_index(const char* type); +#endif // NCNN_STRING + // register custom layer by layer type + // return 0 if success + int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + +#if NCNN_STRING + int load_param(const DataReader& dr); +#endif // NCNN_STRING + + int load_param_bin(const DataReader& dr); + + int load_model(const DataReader& dr); + +#if NCNN_STDIO +#if NCNN_STRING + // load network structure from plain param file + // return 0 if success + int load_param(FILE* fp); + int load_param(const char* protopath); + int load_param_mem(const char* mem); +#endif // NCNN_STRING + // load network structure from binary param file + // return 0 if success + int load_param_bin(FILE* fp); + int load_param_bin(const char* protopath); + + // load network weight data from model file + // return 0 if success + int load_model(FILE* fp); + int load_model(const char* modelpath); +#endif // NCNN_STDIO + + // load network structure from external memory + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_param(const unsigned char* mem); + + // reference network weight data from external memory + // weight data is not copied but referenced + // so external memory should be retained when used + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_model(const unsigned char* mem); + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#if NCNN_STRING + // convenient load network structure from android asset plain param file + int load_param(AAsset* asset); + int load_param(AAssetManager* mgr, const char* assetpath); +#endif // NCNN_STRING + // convenient load network structure from android asset binary param file + int load_param_bin(AAsset* asset); + int load_param_bin(AAssetManager* mgr, const char* assetpath); + + // convenient load network weight data from android asset model file + int load_model(AAsset* asset); + int load_model(AAssetManager* mgr, const char* assetpath); +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + + // unload network structure and weight data + void clear(); + + // construct an Extractor from network + Extractor create_extractor() const; + + // get input/output indexes/names + const std::vector& input_indexes() const; + const std::vector& output_indexes() const; +#if NCNN_STRING + const std::vector& input_names() const; + const std::vector& output_names() const; +#endif + + const std::vector& blobs() const; + const std::vector& layers() const; + + std::vector& mutable_blobs(); + std::vector& mutable_layers(); + +protected: + friend class Extractor; +#if NCNN_STRING + int find_blob_index_by_name(const char* name) const; + int find_layer_index_by_name(const char* name) const; + virtual Layer* create_custom_layer(const char* type); +#endif // NCNN_STRING + virtual Layer* create_custom_layer(int index); + +private: + Net(const Net&); + Net& operator=(const Net&); + +private: + NetPrivate* const d; +}; + +class ExtractorPrivate; +class NCNN_EXPORT Extractor +{ +public: + virtual ~Extractor(); + + // copy + Extractor(const Extractor&); + + // assign + Extractor& operator=(const Extractor&); + + // clear blob mats and alloctors + void clear(); + + // enable light mode + // intermediate blob will be recycled when enabled + // enabled by default + void set_light_mode(bool enable); + + // set thread count for this extractor + // this will overwrite the global setting + // default count is system depended + void set_num_threads(int num_threads); + + // set blob memory allocator + void set_blob_allocator(Allocator* allocator); + + // set workspace memory allocator + void set_workspace_allocator(Allocator* allocator); + +#if NCNN_VULKAN + void set_vulkan_compute(bool enable); + + void set_blob_vkallocator(VkAllocator* allocator); + + void set_workspace_vkallocator(VkAllocator* allocator); + + void set_staging_vkallocator(VkAllocator* allocator); +#endif // NCNN_VULKAN + +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const Mat& in); + + // get result by blob name + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(const char* blob_name, Mat& feat, int type = 0); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const Mat& in); + + // get result by blob index + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(int blob_index, Mat& feat, int type = 0); + +#if NCNN_VULKAN +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); + + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkImageMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkMat& feat, VkCompute& cmd); + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkImageMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_VULKAN + +protected: + friend Extractor Net::create_extractor() const; + Extractor(const Net* net, size_t blob_count); + +private: + ExtractorPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_NET_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/option.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/option.h new file mode 100644 index 0000000..3fda808 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/option.h @@ -0,0 +1,153 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_OPTION_H +#define NCNN_OPTION_H + +#include "platform.h" + +namespace ncnn { + +#if NCNN_VULKAN +class VkAllocator; +class PipelineCache; +#endif // NCNN_VULKAN + +class Allocator; +class NCNN_EXPORT Option +{ +public: + // default option + Option(); + +public: + // light mode + // intermediate blob will be recycled when enabled + // enabled by default + bool lightmode; + + // thread count + // default value is the one returned by get_cpu_count() + int num_threads; + + // blob memory allocator + Allocator* blob_allocator; + + // workspace memory allocator + Allocator* workspace_allocator; + +#if NCNN_VULKAN + // blob memory allocator + VkAllocator* blob_vkallocator; + + // workspace memory allocator + VkAllocator* workspace_vkallocator; + + // staging memory allocator + VkAllocator* staging_vkallocator; + + // pipeline cache + PipelineCache* pipeline_cache; +#endif // NCNN_VULKAN + + // the time openmp threads busy-wait for more work before going to sleep + // default value is 20ms to keep the cores enabled + // without too much extra power consumption afterwards + int openmp_blocktime; + + // enable winograd convolution optimization + // improve convolution 3x3 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_winograd_convolution; + + // enable sgemm convolution optimization + // improve convolution 1x1 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_sgemm_convolution; + + // enable quantized int8 inference + // use low-precision int8 path for quantized model + // changes should be applied before loading network structure and weight + // enabled by default + bool use_int8_inference; + + // enable vulkan compute + bool use_vulkan_compute; + + // enable bf16 data type for storage + // improve most operator performance on all arm devices, may consume more memory + bool use_bf16_storage; + + // enable options for gpu inference + bool use_fp16_packed; + bool use_fp16_storage; + bool use_fp16_arithmetic; + bool use_int8_packed; + bool use_int8_storage; + bool use_int8_arithmetic; + + // enable simd-friendly packed memory layout + // improve all operator performance on all arm devices, will consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_packing_layout; + + bool use_shader_pack8; + + // subgroup option + bool use_subgroup_basic; + bool use_subgroup_vote; + bool use_subgroup_ballot; + bool use_subgroup_shuffle; + + // turn on for adreno + bool use_image_storage; + bool use_tensor_storage; + + bool use_reserved_0; + + // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) + // default value is 3 + // 0 = DAZ OFF, FTZ OFF + // 1 = DAZ ON , FTZ OFF + // 2 = DAZ OFF, FTZ ON + // 3 = DAZ ON, FTZ ON + int flush_denormals; + + bool use_local_pool_allocator; + + // enable local memory optimization for gpu inference + bool use_shader_local_memory; + + // enable cooperative matrix optimization for gpu inference + bool use_cooperative_matrix; + + // more fine-grained control of winograd convolution + bool use_winograd23_convolution; + bool use_winograd43_convolution; + bool use_winograd63_convolution; + + bool use_reserved_6; + bool use_reserved_7; + bool use_reserved_8; + bool use_reserved_9; + bool use_reserved_10; + bool use_reserved_11; +}; + +} // namespace ncnn + +#endif // NCNN_OPTION_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/paramdict.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/paramdict.h new file mode 100644 index 0000000..c2ef160 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/paramdict.h @@ -0,0 +1,73 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PARAMDICT_H +#define NCNN_PARAMDICT_H + +#include "mat.h" + +// at most 32 parameters +#define NCNN_MAX_PARAM_COUNT 32 + +namespace ncnn { + +class DataReader; +class Net; +class ParamDictPrivate; +class NCNN_EXPORT ParamDict +{ +public: + // empty + ParamDict(); + + virtual ~ParamDict(); + + // copy + ParamDict(const ParamDict&); + + // assign + ParamDict& operator=(const ParamDict&); + + // get type + int type(int id) const; + + // get int + int get(int id, int def) const; + // get float + float get(int id, float def) const; + // get array + Mat get(int id, const Mat& def) const; + + // set int + void set(int id, int i); + // set float + void set(int id, float f); + // set array + void set(int id, const Mat& v); + +protected: + friend class Net; + + void clear(); + + int load_param(const DataReader& dr); + int load_param_bin(const DataReader& dr); + +private: + ParamDictPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_PARAMDICT_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/pipeline.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/pipeline.h new file mode 100644 index 0000000..c284a14 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/pipeline.h @@ -0,0 +1,113 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINE_H +#define NCNN_PIPELINE_H + +#include "mat.h" +#include "platform.h" +#if NCNN_VULKAN +#include "gpu.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +#if NCNN_VULKAN +class Option; +class PipelinePrivate; +class NCNN_EXPORT Pipeline +{ +public: + explicit Pipeline(const VulkanDevice* vkdev); + virtual ~Pipeline(); + +public: + void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4); + void set_optimal_local_size_xyz(const Mat& local_size_xyz); + void set_local_size_xyz(int w, int h, int c); + + int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations); + + int create(int shader_type_index, const Option& opt, const std::vector& specializations); + +public: + VkShaderModule shader_module() const; + VkDescriptorSetLayout descriptorset_layout() const; + VkPipelineLayout pipeline_layout() const; + VkPipeline pipeline() const; + VkDescriptorUpdateTemplateKHR descriptor_update_template() const; + + const ShaderInfo& shader_info() const; + + uint32_t local_size_x() const; + uint32_t local_size_y() const; + uint32_t local_size_z() const; + +protected: + void set_shader_module(VkShaderModule shader_module); + void set_descriptorset_layout(VkDescriptorSetLayout descriptorset_layout); + void set_pipeline_layout(VkPipelineLayout pipeline_layout); + void set_pipeline(VkPipeline pipeline); + void set_descriptor_update_template(VkDescriptorUpdateTemplateKHR descriptor_update_template); + + void set_shader_info(const ShaderInfo& shader_info); + +public: + const VulkanDevice* vkdev; + +private: + Pipeline(const Pipeline&); + Pipeline& operator=(const Pipeline&); + +private: + PipelinePrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class VkCompute; +class NCNN_EXPORT ImportAndroidHardwareBufferPipeline : private Pipeline +{ +public: + explicit ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev); + virtual ~ImportAndroidHardwareBufferPipeline(); + + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt); + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt); + void destroy(); + + friend class VkCompute; + +protected: + int create_shader_module(const Option& opt); + int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator); + int create_descriptorset_layout(); + +public: + int type_to; + int rotate_from; + bool need_resize; + + VkSampler sampler; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/pipelinecache.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/pipelinecache.h new file mode 100644 index 0000000..bb6b8fb --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/pipelinecache.h @@ -0,0 +1,85 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINECACHE_H +#define NCNN_PIPELINECACHE_H + +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#include "mat.h" +#include "gpu.h" + +namespace ncnn { + +#if NCNN_VULKAN + +class VulkanDevice; +class PipelineCachePrivate; +class NCNN_EXPORT PipelineCache +{ +public: + explicit PipelineCache(const VulkanDevice* _vkdev); + + virtual ~PipelineCache(); + + void clear(); + + int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + + int get_pipeline(int shader_type_index, const Option& opt, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + +protected: + int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* _shader_module, ShaderInfo& si) const; + + int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector& specializations, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + +protected: + const VulkanDevice* vkdev; + +private: + PipelineCache(const PipelineCache&); + PipelineCache& operator=(const PipelineCache&); + +private: + PipelineCachePrivate* const d; +}; + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINECACHE_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/platform.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/platform.h new file mode 100644 index 0000000..51d10f8 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/platform.h @@ -0,0 +1,290 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PLATFORM_H +#define NCNN_PLATFORM_H + +#define NCNN_STDIO 1 +#define NCNN_STRING 1 +#define NCNN_SIMPLEOCV 0 +#define NCNN_SIMPLEOMP 0 +#define NCNN_SIMPLESTL 0 +#define NCNN_THREADS 1 +#define NCNN_BENCHMARK 0 +#define NCNN_C_API 1 +#define NCNN_PLATFORM_API 1 +#define NCNN_PIXEL 1 +#define NCNN_PIXEL_ROTATE 1 +#define NCNN_PIXEL_AFFINE 1 +#define NCNN_PIXEL_DRAWING 1 +#define NCNN_VULKAN 1 +#define NCNN_SYSTEM_GLSLANG 0 +#define NCNN_RUNTIME_CPU 1 +#define NCNN_GNU_INLINE_ASM 1 +#define NCNN_AVX 1 +#define NCNN_XOP 1 +#define NCNN_FMA 1 +#define NCNN_F16C 1 +#define NCNN_AVX2 1 +#define NCNN_AVXVNNI 1 +#define NCNN_AVX512 1 +#define NCNN_AVX512VNNI 1 +#define NCNN_AVX512BF16 1 +#define NCNN_AVX512FP16 1 +#define NCNN_VFPV4 0 +#if __aarch64__ +#define NCNN_ARM82 0 +#define NCNN_ARM82DOT 0 +#define NCNN_ARM82FP16FML 0 +#define NCNN_ARM84BF16 0 +#define NCNN_ARM84I8MM 0 +#define NCNN_ARM86SVE 0 +#define NCNN_ARM86SVE2 0 +#define NCNN_ARM86SVEBF16 0 +#define NCNN_ARM86SVEI8MM 0 +#define NCNN_ARM86SVEF32MM 0 +#endif // __aarch64__ +#define NCNN_MSA 0 +#define NCNN_LSX 0 +#define NCNN_MMI 0 +#define NCNN_RVV 0 +#define NCNN_INT8 1 +#define NCNN_BF16 1 +#define NCNN_FORCE_INLINE 1 + +#define NCNN_VERSION_STRING "1.0.20230223" + +#include "ncnn_export.h" + +#ifdef __cplusplus + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#include +#else +#include +#endif +#endif // NCNN_THREADS + +#if __ANDROID_API__ >= 26 +#define VK_USE_PLATFORM_ANDROID_KHR +#endif // __ANDROID_API__ >= 26 + +namespace ncnn { + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { InitializeSRWLock(&srwlock); } + ~Mutex() {} + void lock_() { AcquireSRWLockExclusive(&srwlock); } + void unlock() { ReleaseSRWLockExclusive(&srwlock); } +private: + friend class ConditionVariable; + // NOTE SRWLock is available from windows vista + SRWLOCK srwlock; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { InitializeConditionVariable(&condvar); } + ~ConditionVariable() {} + void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } + void broadcast() { WakeAllConditionVariable(&condvar); } + void signal() { WakeConditionVariable(&condvar); } +private: + CONDITION_VARIABLE condvar; +}; + +static unsigned __stdcall start_wrapper(void* args); +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } + ~Thread() {} + void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } +private: + friend unsigned __stdcall start_wrapper(void* args) + { + Thread* t = (Thread*)args; + t->_start(t->_args); + return 0; + } + HANDLE handle; + void* (*_start)(void*); + void* _args; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { key = TlsAlloc(); } + ~ThreadLocalStorage() { TlsFree(key); } + void set(void* value) { TlsSetValue(key, (LPVOID)value); } + void* get() { return (void*)TlsGetValue(key); } +private: + DWORD key; +}; +#else // (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { pthread_mutex_init(&mutex, 0); } + ~Mutex() { pthread_mutex_destroy(&mutex); } + void lock() { pthread_mutex_lock(&mutex); } + void unlock() { pthread_mutex_unlock(&mutex); } +private: + friend class ConditionVariable; + pthread_mutex_t mutex; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { pthread_cond_init(&cond, 0); } + ~ConditionVariable() { pthread_cond_destroy(&cond); } + void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } + void broadcast() { pthread_cond_broadcast(&cond); } + void signal() { pthread_cond_signal(&cond); } +private: + pthread_cond_t cond; +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } + ~Thread() {} + void join() { pthread_join(t, 0); } +private: + pthread_t t; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { pthread_key_create(&key, 0); } + ~ThreadLocalStorage() { pthread_key_delete(key); } + void set(void* value) { pthread_setspecific(key, value); } + void* get() { return pthread_getspecific(key); } +private: + pthread_key_t key; +}; +#endif // (defined _WIN32 && !(defined __MINGW32__)) +#else // NCNN_THREADS +class NCNN_EXPORT Mutex +{ +public: + Mutex() {} + ~Mutex() {} + void lock_() {} + void unlock() {} +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() {} + ~ConditionVariable() {} + void wait(Mutex& /*mutex*/) {} + void broadcast() {} + void signal() {} +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {} + ~Thread() {} + void join() {} +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { data = 0; } + ~ThreadLocalStorage() {} + void set(void* value) { data = value; } + void* get() { return data; } +private: + void* data; +}; +#endif // NCNN_THREADS + +class NCNN_EXPORT MutexLockGuard +{ +public: + MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { + //原本: mutex.lock_(); + // 修改: + mutex.lock(); + } + ~MutexLockGuard() { mutex.unlock(); } +private: + Mutex& mutex; +}; + +} // namespace ncnn + +#if NCNN_SIMPLESTL +#include "simplestl.h" +#else +#include +#include +#include +#include +#endif + +#endif // __cplusplus + +#if NCNN_STDIO +#if NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \ + __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0) +#else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0) +#endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#else +#define NCNN_LOGE(...) +#endif + + +#if NCNN_FORCE_INLINE +#ifdef _MSC_VER + #define NCNN_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) +#elif defined(__CLANG__) + #if __has_attribute(__always_inline__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) + #else + #define NCNN_FORCEINLINE inline + #endif +#else + #define NCNN_FORCEINLINE inline +#endif +#else + #define NCNN_FORCEINLINE inline +#endif + +#endif // NCNN_PLATFORM_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simpleocv.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simpleocv.h new file mode 100644 index 0000000..55ede15 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simpleocv.h @@ -0,0 +1,501 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOCV_H +#define NCNN_SIMPLEOCV_H + +#include "platform.h" + +#if NCNN_SIMPLEOCV + +#include +#include +#include "allocator.h" +#include "mat.h" + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif + +#ifndef NCNN_XADD +using ncnn::NCNN_XADD; +#endif + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +enum +{ + CV_LOAD_IMAGE_UNCHANGED = -1, + CV_LOAD_IMAGE_GRAYSCALE = 0, + CV_LOAD_IMAGE_COLOR = 1, +}; + +enum +{ + CV_IMWRITE_JPEG_QUALITY = 1 +}; + +// minimal opencv style data structure implementation +namespace cv { + +template +static inline _Tp saturate_cast(int v) +{ + return _Tp(v); +} +template<> +inline uchar saturate_cast(int v) +{ + return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); +} + +template +struct Scalar_ +{ + Scalar_() + { + v[0] = 0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0) + { + v[0] = _v0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = _v3; + } + + const _Tp operator[](const int i) const + { + return v[i]; + } + + _Tp operator[](const int i) + { + return v[i]; + } + + _Tp v[4]; +}; + +typedef Scalar_ Scalar; + +template +struct Point_ +{ + Point_() + : x(0), y(0) + { + } + Point_(_Tp _x, _Tp _y) + : x(_x), y(_y) + { + } + + template + operator Point_<_Tp2>() const + { + return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); + } + + _Tp x; + _Tp y; +}; + +typedef Point_ Point; +typedef Point_ Point2f; + +template +struct Size_ +{ + Size_() + : width(0), height(0) + { + } + Size_(_Tp _w, _Tp _h) + : width(_w), height(_h) + { + } + + template + operator Size_<_Tp2>() const + { + return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp width; + _Tp height; +}; + +typedef Size_ Size; +typedef Size_ Size2f; + +template +struct Rect_ +{ + Rect_() + : x(0), y(0), width(0), height(0) + { + } + Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) + : x(_x), y(_y), width(_w), height(_h) + { + } + Rect_(Point_<_Tp> _p, Size_<_Tp> _size) + : x(_p.x), y(_p.y), width(_size.width), height(_size.height) + { + } + + template + operator Rect_<_Tp2>() const + { + return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp x; + _Tp y; + _Tp width; + _Tp height; + + // area + _Tp area() const + { + return width * height; + } +}; + +template +static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); + a.width = std::min(a.x + a.width, b.x + b.width) - x1; + a.height = std::min(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + if (a.width <= 0 || a.height <= 0) + a = Rect_<_Tp>(); + return a; +} + +template +static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + return a; +} + +template +static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c &= b; +} + +template +static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c |= b; +} + +typedef Rect_ Rect; +typedef Rect_ Rect2f; + +#define CV_8UC1 1 +#define CV_8UC3 3 +#define CV_8UC4 4 +#define CV_32FC1 4 + +struct NCNN_EXPORT Mat +{ + Mat() + : data(0), refcount(0), rows(0), cols(0), c(0) + { + } + + Mat(int _rows, int _cols, int flags) + : data(0), refcount(0) + { + create(_rows, _cols, flags); + } + + // copy + Mat(const Mat& m) + : data(m.data), refcount(m.refcount) + { + if (refcount) + NCNN_XADD(refcount, 1); + + rows = m.rows; + cols = m.cols; + c = m.c; + } + + Mat(int _rows, int _cols, int flags, void* _data) + : data((unsigned char*)_data), refcount(0) + { + rows = _rows; + cols = _cols; + c = flags; + } + + ~Mat() + { + release(); + } + + // assign + Mat& operator=(const Mat& m) + { + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + + rows = m.rows; + cols = m.cols; + c = m.c; + + return *this; + } + + Mat& operator=(const Scalar& s) + { + if (total() > 0) + { + uchar* p = data; + for (int i = 0; i < cols * rows; i++) + { + for (int j = 0; j < c; j++) + { + *p++ = s[j]; + } + } + } + + return *this; + } + + void create(int _rows, int _cols, int flags) + { + release(); + + rows = _rows; + cols = _cols; + c = flags; + + if (total() > 0) + { + // refcount address must be aligned, so we expand totalsize here + size_t totalsize = (total() + 3) >> 2 << 2; + data = (uchar*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((uchar*)data) + totalsize); + *refcount = 1; + } + } + + void release() + { + if (refcount && NCNN_XADD(refcount, -1) == 1) + ncnn::fastFree(data); + + data = 0; + + rows = 0; + cols = 0; + c = 0; + + refcount = 0; + } + + Mat clone() const + { + if (empty()) + return Mat(); + + Mat m(rows, cols, c); + + if (total() > 0) + { + memcpy(m.data, data, total()); + } + + return m; + } + + bool empty() const + { + return data == 0 || total() == 0; + } + + int channels() const + { + return c; + } + + int type() const + { + return c; + } + + size_t total() const + { + return cols * rows * c; + } + + const uchar* ptr(int y) const + { + return data + y * cols * c; + } + + uchar* ptr(int y) + { + return data + y * cols * c; + } + + template + const _Tp* ptr(int y) const + { + return (const _Tp*)data + y * cols * c; + } + + template + _Tp* ptr(int y) + { + return (_Tp*)data + y * cols * c; + } + + // roi + Mat operator()(const Rect& roi) const + { + if (empty()) + return Mat(); + + Mat m(roi.height, roi.width, c); + + int sy = roi.y; + for (int y = 0; y < roi.height; y++) + { + const uchar* sptr = ptr(sy) + roi.x * c; + uchar* dptr = m.ptr(y); + memcpy(dptr, sptr, roi.width * c); + sy++; + } + + return m; + } + + uchar* data; + + // pointer to the reference counter; + // when points to user-allocated data, the pointer is NULL + int* refcount; + + int rows; + int cols; + + int c; +}; + +enum ImreadModes +{ + IMREAD_UNCHANGED = -1, + IMREAD_GRAYSCALE = 0, + IMREAD_COLOR = 1 +}; + +NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); + +enum ImwriteFlags +{ + IMWRITE_JPEG_QUALITY = 1 +}; + +NCNN_EXPORT bool imwrite(const std::string& path, const Mat& m, const std::vector& params = std::vector()); + +NCNN_EXPORT void imshow(const std::string& name, const Mat& m); + +NCNN_EXPORT int waitKey(int delay = 0); + +#if NCNN_PIXEL +NCNN_EXPORT void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); +#endif // NCNN_PIXEL + +#if NCNN_PIXEL_DRAWING + +enum +{ + FILLED = -1 +}; + +NCNN_EXPORT void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void rectangle(Mat& img, Rect rec, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void circle(Mat& img, Point center, int radius, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void line(Mat& img, Point p0, Point p1, const Scalar& color, int thickness = 1); + +enum +{ + FONT_HERSHEY_SIMPLEX = 0 +}; + +NCNN_EXPORT void putText(Mat& img, const std::string& text, Point org, int fontFace, double fontScale, Scalar color, int thickness = 1); + +NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontScale, int thickness, int* baseLine); + +#endif // NCNN_PIXEL_DRAWING + +} // namespace cv + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma pop_macro("min") +#pragma pop_macro("max") +#endif + +#endif // NCNN_SIMPLEOCV + +#endif // NCNN_SIMPLEOCV_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simpleomp.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simpleomp.h new file mode 100644 index 0000000..13e2452 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simpleomp.h @@ -0,0 +1,53 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOMP_H +#define NCNN_SIMPLEOMP_H + +#include "platform.h" + +#if NCNN_SIMPLEOMP + +#include + +// This minimal openmp runtime implementation only supports the llvm openmp abi +// and only supports #pragma omp parallel for num_threads(X) + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT int omp_get_max_threads(); + +NCNN_EXPORT void omp_set_num_threads(int num_threads); + +NCNN_EXPORT int omp_get_dynamic(); + +NCNN_EXPORT void omp_set_dynamic(int dynamic); + +NCNN_EXPORT int omp_get_num_threads(); + +NCNN_EXPORT int omp_get_thread_num(); + +NCNN_EXPORT int kmp_get_blocktime(); + +NCNN_EXPORT void kmp_set_blocktime(int blocktime); + +#ifdef __cplusplus +} +#endif + +#endif // NCNN_SIMPLEOMP + +#endif // NCNN_SIMPLEOMP_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simplestl.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simplestl.h new file mode 100644 index 0000000..00ff468 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/simplestl.h @@ -0,0 +1,565 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLESTL_H +#define NCNN_SIMPLESTL_H + +#include +#include +#include + +#if !NCNN_SIMPLESTL + +#include + +#else + +// allocation functions +NCNN_EXPORT void* operator new(size_t size); +NCNN_EXPORT void* operator new[](size_t size); +// placement allocation functions +NCNN_EXPORT void* operator new(size_t size, void* ptr); +NCNN_EXPORT void* operator new[](size_t size, void* ptr); +// deallocation functions +NCNN_EXPORT void operator delete(void* ptr); +NCNN_EXPORT void operator delete[](void* ptr); +// deallocation functions since c++14 +#if __cplusplus >= 201402L +NCNN_EXPORT void operator delete(void* ptr, size_t sz); +NCNN_EXPORT void operator delete[](void* ptr, size_t sz); +#endif +// placement deallocation functions +NCNN_EXPORT void operator delete(void* ptr, void* voidptr2); +NCNN_EXPORT void operator delete[](void* ptr, void* voidptr2); + +#endif + +// minimal stl data structure implementation +namespace std { + +template +const T& max(const T& a, const T& b) +{ + return (a < b) ? b : a; +} + +template +const T& min(const T& a, const T& b) +{ + return (a > b) ? b : a; +} + +template +void swap(T& a, T& b) +{ + T temp(a); + a = b; + b = temp; +} + +template +struct pair +{ + pair() + : first(), second() + { + } + pair(const T1& t1, const T2& t2) + : first(t1), second(t2) + { + } + + T1 first; + T2 second; +}; + +template +bool operator==(const pair& x, const pair& y) +{ + return (x.first == y.first && x.second == y.second); +} +template +bool operator<(const pair& x, const pair& y) +{ + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} +template +bool operator!=(const pair& x, const pair& y) +{ + return !(x == y); +} +template +bool operator>(const pair& x, const pair& y) +{ + return y < x; +} +template +bool operator<=(const pair& x, const pair& y) +{ + return !(y < x); +} +template +bool operator>=(const pair& x, const pair& y) +{ + return !(x < y); +} + +template +pair make_pair(const T1& t1, const T2& t2) +{ + return pair(t1, t2); +} + +template +struct node +{ + node* prev_; + node* next_; + T data_; + + node() + : prev_(0), next_(0), data_() + { + } + node(const T& t) + : prev_(0), next_(0), data_(t) + { + } +}; + +template +struct iter_list +{ + iter_list() + : curr_(0) + { + } + iter_list(node* n) + : curr_(n) + { + } + iter_list(const iter_list& i) + : curr_(i.curr_) + { + } + ~iter_list() + { + } + + iter_list& operator=(const iter_list& i) + { + curr_ = i.curr_; + return *this; + } + + T& operator*() + { + return curr_->data_; + } + T* operator->() + { + return &(curr_->data_); + } + + bool operator==(const iter_list& i) + { + return curr_ == i.curr_; + } + bool operator!=(const iter_list& i) + { + return curr_ != i.curr_; + } + + iter_list& operator++() + { + curr_ = curr_->next_; + return *this; + } + iter_list& operator--() + { + curr_ = curr_->prev_; + return *this; + } + + node* curr_; +}; + +template +struct list +{ + typedef iter_list iterator; + + list() + { + head_ = new node(); + tail_ = head_; + count_ = 0; + } + ~list() + { + clear(); + delete head_; + } + list(const list& l) + { + head_ = new node(); + tail_ = head_; + count_ = 0; + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + } + + list& operator=(const list& l) + { + if (this == &l) + { + return *this; + } + clear(); + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + return *this; + } + + void clear() + { + while (count_ > 0) + { + pop_front(); + } + } + + void pop_front() + { + if (count_ > 0) + { + head_ = head_->next_; + delete head_->prev_; + head_->prev_ = 0; + --count_; + } + } + + size_t size() const + { + return count_; + } + iter_list begin() const + { + return iter_list(head_); + } + iter_list end() const + { + return iter_list(tail_); + } + bool empty() const + { + return count_ == 0; + } + + void push_back(const T& t) + { + if (count_ == 0) + { + head_ = new node(t); + head_->prev_ = 0; + head_->next_ = tail_; + tail_->prev_ = head_; + count_ = 1; + } + else + { + node* temp = new node(t); + temp->prev_ = tail_->prev_; + temp->next_ = tail_; + tail_->prev_->next_ = temp; + tail_->prev_ = temp; + ++count_; + } + } + + iter_list erase(iter_list pos) + { + if (pos != end()) + { + node* temp = pos.curr_; + if (temp == head_) + { + ++pos; + temp->next_->prev_ = 0; + head_ = temp->next_; + } + else + { + --pos; + temp->next_->prev_ = temp->prev_; + temp->prev_->next_ = temp->next_; + ++pos; + } + delete temp; + --count_; + } + return pos; + } + +protected: + node* head_; + node* tail_; + size_t count_; +}; + +template +struct greater +{ + bool operator()(const T& x, const T& y) const + { + return (x > y); + } +}; + +template +struct less +{ + bool operator()(const T& x, const T& y) const + { + return (x < y); + } +}; + +template +void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp) +{ + // [TODO] heap sort should be used here, but we simply use bubble sort now + for (RandomAccessIter i = first; i < middle; ++i) + { + // bubble sort + for (RandomAccessIter j = last - 1; j > first; --j) + { + if (comp(*j, *(j - 1))) + { + swap(*j, *(j - 1)); + } + } + } +} + +template +struct vector +{ + vector() + : data_(0), size_(0), capacity_(0) + { + } + vector(const size_t new_size, const T& value = T()) + : data_(0), size_(0), capacity_(0) + { + resize(new_size, value); + } + ~vector() + { + clear(); + } + vector(const vector& v) + : data_(0), size_(0), capacity_(0) + { + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + } + + vector& operator=(const vector& v) + { + if (this == &v) + { + return *this; + } + resize(0); + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + return *this; + } + + void resize(const size_t new_size, const T& value = T()) + { + try_alloc(new_size); + if (new_size > size_) + { + for (size_t i = size_; i < new_size; i++) + { + new (&data_[i]) T(value); + } + } + else if (new_size < size_) + { + for (size_t i = new_size; i < size_; i++) + { + data_[i].~T(); + } + } + size_ = new_size; + } + + void clear() + { + for (size_t i = 0; i < size_; i++) + { + data_[i].~T(); + } + delete[](char*) data_; + data_ = 0; + size_ = 0; + capacity_ = 0; + } + + T* data() const + { + return data_; + } + size_t size() const + { + return size_; + } + T& operator[](size_t i) const + { + return data_[i]; + } + T* begin() const + { + return &data_[0]; + } + T* end() const + { + return &data_[size_]; + } + bool empty() const + { + return size_ == 0; + } + + void push_back(const T& t) + { + try_alloc(size_ + 1); + new (&data_[size_]) T(t); + size_++; + } + + void insert(T* pos, T* b, T* e) + { + vector* v = 0; + if (b >= begin() && b < end()) + { + //the same vector + v = new vector(*this); + b = v->begin() + (b - begin()); + e = v->begin() + (e - begin()); + } + size_t diff = pos - begin(); + try_alloc(size_ + (e - b)); + pos = begin() + diff; + memmove(pos + (e - b), pos, (end() - pos) * sizeof(T)); + size_t len = e - b; + size_ += len; + for (size_t i = 0; i < len; i++) + { + *pos = *b; + pos++; + b++; + } + delete v; + } + + T* erase(T* pos) + { + pos->~T(); + memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T)); + size_--; + return pos; + } + +protected: + T* data_; + size_t size_; + size_t capacity_; + void try_alloc(size_t new_size) + { + if (new_size * 3 / 2 > capacity_ / 2) + { + capacity_ = new_size * 2; + T* new_data = (T*)new char[capacity_ * sizeof(T)]; + memset(static_cast(new_data), 0, capacity_ * sizeof(T)); + if (data_) + { + memmove(new_data, data_, sizeof(T) * size_); + delete[](char*) data_; + } + data_ = new_data; + } + } +}; + +struct NCNN_EXPORT string : public vector +{ + string() + { + } + string(const char* str) + { + size_t len = strlen(str); + resize(len); + memcpy(data_, str, len); + } + const char* c_str() const + { + return (const char*)data_; + } + bool operator==(const string& str2) const + { + return strcmp(data_, str2.data_) == 0; + } + bool operator==(const char* str2) const + { + return strcmp(data_, str2) == 0; + } + bool operator!=(const char* str2) const + { + return strcmp(data_, str2) != 0; + } + string& operator+=(const string& str1) + { + insert(end(), str1.begin(), str1.end()); + return *this; + } +}; + +inline string operator+(const string& str1, const string& str2) +{ + string str(str1); + str.insert(str.end(), str2.begin(), str2.end()); + return str; +} + +} // namespace std + +#endif // NCNN_SIMPLESTL_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/vulkan_header_fix.h b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/vulkan_header_fix.h new file mode 100644 index 0000000..103ac3e --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/include/ncnn/vulkan_header_fix.h @@ -0,0 +1,259 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_VULKAN_HEADER_FIX_H +#define NCNN_VULKAN_HEADER_FIX_H + +#include + +// This header contains new structure and function declearation to fix build with old vulkan sdk + +#if VK_HEADER_VERSION < 70 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES (VkStructureType)1000094000 +typedef enum VkSubgroupFeatureFlagBits +{ + VK_SUBGROUP_FEATURE_BASIC_BIT = 0x00000001, + VK_SUBGROUP_FEATURE_VOTE_BIT = 0x00000002, + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT = 0x00000004, + VK_SUBGROUP_FEATURE_BALLOT_BIT = 0x00000008, + VK_SUBGROUP_FEATURE_SHUFFLE_BIT = 0x00000010, + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT = 0x00000020, + VK_SUBGROUP_FEATURE_CLUSTERED_BIT = 0x00000040, + VK_SUBGROUP_FEATURE_QUAD_BIT = 0x00000080, + VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV = 0x00000100, + VK_SUBGROUP_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSubgroupFeatureFlagBits; +typedef VkFlags VkSubgroupFeatureFlags; +typedef struct VkPhysicalDeviceSubgroupProperties +{ + VkStructureType sType; + void* pNext; + uint32_t subgroupSize; + VkShaderStageFlags supportedStages; + VkSubgroupFeatureFlags supportedOperations; + VkBool32 quadOperationsInAllStages; +} VkPhysicalDeviceSubgroupProperties; +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES (VkStructureType)1000168000 +#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT (VkStructureType)1000168001 +typedef struct VkPhysicalDeviceMaintenance3Properties +{ + VkStructureType sType; + void* pNext; + uint32_t maxPerSetDescriptors; + VkDeviceSize maxMemoryAllocationSize; +} VkPhysicalDeviceMaintenance3Properties; +typedef struct VkDescriptorSetLayoutSupport +{ + VkStructureType sType; + void* pNext; + VkBool32 supported; +} VkDescriptorSetLayoutSupport; +typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR; +typedef VkDescriptorSetLayoutSupport VkDescriptorSetLayoutSupportKHR; +typedef void(VKAPI_PTR* PFN_vkGetDescriptorSetLayoutSupportKHR)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport); +#endif // VK_HEADER_VERSION < 70 + +#if VK_HEADER_VERSION < 80 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR (VkStructureType)1000177000 +typedef struct VkPhysicalDevice8BitStorageFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; +} VkPhysicalDevice8BitStorageFeaturesKHR; +#define VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR (VkStructureType)1000109000 +#define VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR (VkStructureType)1000109001 +#define VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR (VkStructureType)1000109002 +#define VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR (VkStructureType)1000109003 +#define VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR (VkStructureType)1000109004 +#define VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR (VkStructureType)1000109005 +#define VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR (VkStructureType)1000109006 +typedef struct VkAttachmentDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription2KHR; +typedef struct VkAttachmentReference2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t attachment; + VkImageLayout layout; + VkImageAspectFlags aspectMask; +} VkAttachmentReference2KHR; +typedef struct VkSubpassDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t viewMask; + uint32_t inputAttachmentCount; + const VkAttachmentReference2KHR* pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference2KHR* pColorAttachments; + const VkAttachmentReference2KHR* pResolveAttachments; + const VkAttachmentReference2KHR* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t* pPreserveAttachments; +} VkSubpassDescription2KHR; +typedef struct VkSubpassDependency2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; +} VkSubpassDependency2KHR; +typedef struct VkRenderPassCreateInfo2KHR +{ + VkStructureType sType; + const void* pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription2KHR* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription2KHR* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency2KHR* pDependencies; + uint32_t correlatedViewMaskCount; + const uint32_t* pCorrelatedViewMasks; +} VkRenderPassCreateInfo2KHR; +typedef struct VkSubpassBeginInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassContents contents; +} VkSubpassBeginInfoKHR; + +typedef struct VkSubpassEndInfoKHR +{ + VkStructureType sType; + const void* pNext; +} VkSubpassEndInfoKHR; +typedef VkResult(VKAPI_PTR* PFN_vkCreateRenderPass2KHR)(VkDevice device, const VkRenderPassCreateInfo2KHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void(VKAPI_PTR* PFN_vkCmdBeginRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfoKHR* pSubpassBeginInfo); +typedef void(VKAPI_PTR* PFN_vkCmdNextSubpass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfoKHR* pSubpassBeginInfo, const VkSubpassEndInfoKHR* pSubpassEndInfo); +typedef void(VKAPI_PTR* PFN_vkCmdEndRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR* pSubpassEndInfo); +#endif // VK_HEADER_VERSION < 80 + +#if VK_HEADER_VERSION < 95 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR (VkStructureType)1000082000 +typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceFloat16Int8FeaturesKHR; +#endif // VK_HEADER_VERSION < 95 + +#if VK_HEADER_VERSION < 97 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT +{ + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; +#endif // VK_HEADER_VERSION < 97 + +#if VK_HEADER_VERSION < 101 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV (VkStructureType)1000249000 +#define VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249002 +typedef enum VkComponentTypeNV +{ + VK_COMPONENT_TYPE_FLOAT16_NV = 0, + VK_COMPONENT_TYPE_FLOAT32_NV = 1, + VK_COMPONENT_TYPE_FLOAT64_NV = 2, + VK_COMPONENT_TYPE_SINT8_NV = 3, + VK_COMPONENT_TYPE_SINT16_NV = 4, + VK_COMPONENT_TYPE_SINT32_NV = 5, + VK_COMPONENT_TYPE_SINT64_NV = 6, + VK_COMPONENT_TYPE_UINT8_NV = 7, + VK_COMPONENT_TYPE_UINT16_NV = 8, + VK_COMPONENT_TYPE_UINT32_NV = 9, + VK_COMPONENT_TYPE_UINT64_NV = 10, + VK_COMPONENT_TYPE_BEGIN_RANGE_NV = VK_COMPONENT_TYPE_FLOAT16_NV, + VK_COMPONENT_TYPE_END_RANGE_NV = VK_COMPONENT_TYPE_UINT64_NV, + VK_COMPONENT_TYPE_RANGE_SIZE_NV = (VK_COMPONENT_TYPE_UINT64_NV - VK_COMPONENT_TYPE_FLOAT16_NV + 1), + VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkComponentTypeNV; +typedef enum VkScopeNV +{ + VK_SCOPE_DEVICE_NV = 1, + VK_SCOPE_WORKGROUP_NV = 2, + VK_SCOPE_SUBGROUP_NV = 3, + VK_SCOPE_QUEUE_FAMILY_NV = 5, + VK_SCOPE_BEGIN_RANGE_NV = VK_SCOPE_DEVICE_NV, + VK_SCOPE_END_RANGE_NV = VK_SCOPE_QUEUE_FAMILY_NV, + VK_SCOPE_RANGE_SIZE_NV = (VK_SCOPE_QUEUE_FAMILY_NV - VK_SCOPE_DEVICE_NV + 1), + VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkScopeNV; +typedef struct VkCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + uint32_t MSize; + uint32_t NSize; + uint32_t KSize; + VkComponentTypeNV AType; + VkComponentTypeNV BType; + VkComponentTypeNV CType; + VkComponentTypeNV DType; + VkScopeNV scope; +} VkCooperativeMatrixPropertiesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV +{ + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrix; + VkBool32 cooperativeMatrixRobustBufferAccess; +} VkPhysicalDeviceCooperativeMatrixFeaturesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + VkShaderStageFlags cooperativeMatrixSupportedStages; +} VkPhysicalDeviceCooperativeMatrixPropertiesNV; +typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); +#endif // VK_HEADER_VERSION < 101 + +#if VK_HEADER_VERSION < 208 +typedef enum VkInstanceCreateFlagBits +{ + VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR = 0x00000001, + VK_INSTANCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkInstanceCreateFlagBits; +#endif // VK_HEADER_VERSION < 208 + +#endif // NCNN_VULKAN_HEADER_FIX_H diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/OGLCompilerTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/OGLCompilerTargets.cmake new file mode 100644 index 0000000..89bb144 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/OGLCompilerTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OGLCompilerTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OGLCompiler) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OGLCompiler ALIAS glslang::OGLCompiler) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/OSDependentTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/OSDependentTargets.cmake new file mode 100644 index 0000000..c345456 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/OSDependentTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `OSDependentTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::OSDependent) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(OSDependent ALIAS glslang::OSDependent) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/SPIRVTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/SPIRVTargets.cmake new file mode 100644 index 0000000..1c614ab --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/SPIRVTargets.cmake @@ -0,0 +1,9 @@ + + message(WARNING "Using `SPIRVTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::SPIRV) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + add_library(SPIRV ALIAS glslang::SPIRV) + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-config-version.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-config-version.cmake new file mode 100644 index 0000000..c7acdba --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-config-version.cmake @@ -0,0 +1,70 @@ +# This is a basic version file for the Config-mode of find_package(). +# It is used by write_basic_package_version_file() as input file for configure_file() +# to create a version-file which can be installed along a config.cmake file. +# +# The created file sets PACKAGE_VERSION_EXACT if the current version string and +# the requested version string are exactly the same and it sets +# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version, +# but only if the requested major version is the same as the current one. +# The variable CVF_VERSION must be set before calling configure_file(). + + +set(PACKAGE_VERSION "11.12.0") + +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION) + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + + if("11.12.0" MATCHES "^([0-9]+)\\.") + set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}") + if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0) + string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}") + endif() + else() + set(CVF_VERSION_MAJOR "11.12.0") + endif() + + if(PACKAGE_FIND_VERSION_RANGE) + # both endpoints of the range must have the expected major version + math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1") + if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT))) + set(PACKAGE_VERSION_COMPATIBLE FALSE) + elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR + AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX) + OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX))) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + else() + if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_COMPATIBLE FALSE) + endif() + + if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + endif() + endif() +endif() + + +# if the installed project requested no architecture check, don't perform the check +if("FALSE") + return() +endif() + +# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it: +if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "") + return() +endif() + +# check that the installed version has the same 32/64bit-ness as the one which is currently searching: +if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8") + math(EXPR installedBits "8 * 8") + set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") + set(PACKAGE_VERSION_UNSUITABLE TRUE) +endif() diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-config.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-config.cmake new file mode 100644 index 0000000..5ebe599 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-config.cmake @@ -0,0 +1,27 @@ + +####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() ####### +####### Any changes to this file will be overwritten by the next CMake run #### +####### The input file was glslang-config.cmake.in ######## + +get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) + +macro(set_and_check _var _file) + set(${_var} "${_file}") + if(NOT EXISTS "${_file}") + message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") + endif() +endmacro() + +macro(check_required_components _NAME) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(NOT ${_NAME}_${comp}_FOUND) + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_NAME}_FOUND FALSE) + endif() + endif() + endforeach() +endmacro() + +#################################################################################### + include("${PACKAGE_PREFIX_DIR}/lib/cmake/glslang/glslang-targets.cmake") + diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-targets-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-targets-release.cmake new file mode 100644 index 0000000..e4de522 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-targets-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "glslang::OSDependent" for configuration "Release" +set_property(TARGET glslang::OSDependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OSDependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOSDependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OSDependent ) +list(APPEND _cmake_import_check_files_for_glslang::OSDependent "${_IMPORT_PREFIX}/lib/libOSDependent.a" ) + +# Import target "glslang::glslang" for configuration "Release" +set_property(TARGET glslang::glslang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::glslang PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libglslang.a" + ) + +list(APPEND _cmake_import_check_targets glslang::glslang ) +list(APPEND _cmake_import_check_files_for_glslang::glslang "${_IMPORT_PREFIX}/lib/libglslang.a" ) + +# Import target "glslang::MachineIndependent" for configuration "Release" +set_property(TARGET glslang::MachineIndependent APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::MachineIndependent PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" + ) + +list(APPEND _cmake_import_check_targets glslang::MachineIndependent ) +list(APPEND _cmake_import_check_files_for_glslang::MachineIndependent "${_IMPORT_PREFIX}/lib/libMachineIndependent.a" ) + +# Import target "glslang::GenericCodeGen" for configuration "Release" +set_property(TARGET glslang::GenericCodeGen APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::GenericCodeGen PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" + ) + +list(APPEND _cmake_import_check_targets glslang::GenericCodeGen ) +list(APPEND _cmake_import_check_files_for_glslang::GenericCodeGen "${_IMPORT_PREFIX}/lib/libGenericCodeGen.a" ) + +# Import target "glslang::OGLCompiler" for configuration "Release" +set_property(TARGET glslang::OGLCompiler APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::OGLCompiler PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" + ) + +list(APPEND _cmake_import_check_targets glslang::OGLCompiler ) +list(APPEND _cmake_import_check_files_for_glslang::OGLCompiler "${_IMPORT_PREFIX}/lib/libOGLCompiler.a" ) + +# Import target "glslang::SPIRV" for configuration "Release" +set_property(TARGET glslang::SPIRV APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(glslang::SPIRV PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libSPIRV.a" + ) + +list(APPEND _cmake_import_check_targets glslang::SPIRV ) +list(APPEND _cmake_import_check_files_for_glslang::SPIRV "${_IMPORT_PREFIX}/lib/libSPIRV.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-targets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-targets.cmake new file mode 100644 index 0000000..2173b87 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslang/glslang-targets.cmake @@ -0,0 +1,135 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS glslang::OSDependent glslang::glslang glslang::MachineIndependent glslang::GenericCodeGen glslang::OGLCompiler glslang::SPIRV) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target glslang::OSDependent +add_library(glslang::OSDependent STATIC IMPORTED) + +set_target_properties(glslang::OSDependent PROPERTIES + INTERFACE_LINK_LIBRARIES "-pthread" +) + +# Create imported target glslang::glslang +add_library(glslang::glslang STATIC IMPORTED) + +set_target_properties(glslang::glslang PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::MachineIndependent +add_library(glslang::MachineIndependent STATIC IMPORTED) + +set_target_properties(glslang::MachineIndependent PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target glslang::GenericCodeGen +add_library(glslang::GenericCodeGen STATIC IMPORTED) + +# Create imported target glslang::OGLCompiler +add_library(glslang::OGLCompiler STATIC IMPORTED) + +# Create imported target glslang::SPIRV +add_library(glslang::SPIRV STATIC IMPORTED) + +set_target_properties(glslang::SPIRV PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_LINK_LIBRARIES "\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/glslang-targets-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslangTargets.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslangTargets.cmake new file mode 100644 index 0000000..6f4e3bc --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/glslangTargets.cmake @@ -0,0 +1,15 @@ + + message(WARNING "Using `glslangTargets.cmake` is deprecated: use `find_package(glslang)` to find glslang CMake targets.") + + if (NOT TARGET glslang::glslang) + include("${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/glslang/glslang-targets.cmake") + endif() + + if(OFF) + add_library(glslang ALIAS glslang::glslang) + else() + add_library(glslang ALIAS glslang::glslang) + add_library(MachineIndependent ALIAS glslang::MachineIndependent) + add_library(GenericCodeGen ALIAS glslang::GenericCodeGen) + endif() + \ No newline at end of file diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnn-release.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnn-release.cmake new file mode 100644 index 0000000..37f24ba --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnn-release.cmake @@ -0,0 +1,19 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "ncnn" for configuration "Release" +set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(ncnn PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.a" + ) + +list(APPEND _cmake_import_check_targets ncnn ) +list(APPEND _cmake_import_check_files_for_ncnn "${_IMPORT_PREFIX}/lib/libncnn.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnn.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnn.cmake new file mode 100644 index 0000000..f224c1b --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnn.cmake @@ -0,0 +1,125 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS ncnn) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target ncnn +add_library(ncnn STATIC IMPORTED) + +set_target_properties(ncnn PROPERTIES + INTERFACE_COMPILE_OPTIONS "-fno-rtti;-fno-exceptions" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" + INTERFACE_LINK_LIBRARIES "-fopenmp;-static-openmp;-Wl,-wrap,__kmp_affinity_determine_capable;Threads::Threads;Vulkan::Vulkan;\$;\$;android;jnigraphics;log" + INTERFACE_POSITION_INDEPENDENT_CODE "ON" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/ncnn-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# Make sure the targets which have been exported in some other +# export set exist. +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) +foreach(_target "glslang::glslang" "glslang::SPIRV" ) + if(NOT TARGET "${_target}" ) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets "${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets} ${_target}") + endif() +endforeach() + +if(DEFINED ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + if(CMAKE_FIND_PACKAGE_NAME) + set( ${CMAKE_FIND_PACKAGE_NAME}_FOUND FALSE) + set( ${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + else() + message(FATAL_ERROR "The following imported targets are referenced, but are missing: ${${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets}") + endif() +endif() +unset(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE_targets) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnnConfig.cmake b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnnConfig.cmake new file mode 100644 index 0000000..c1e5613 --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/cmake/ncnn/ncnnConfig.cmake @@ -0,0 +1,42 @@ +set(NCNN_OPENMP ON) +set(NCNN_THREADS ON) +set(NCNN_VULKAN ON) +set(NCNN_SHARED_LIB OFF) +set(NCNN_SYSTEM_GLSLANG OFF) + +if(NCNN_OPENMP) + find_package(OpenMP) +endif() + +if(NCNN_THREADS) + set(CMAKE_THREAD_PREFER_PTHREAD TRUE) + set(THREADS_PREFER_PTHREAD_FLAG TRUE) + find_package(Threads REQUIRED) +endif() + +if(NCNN_VULKAN) + find_package(Vulkan REQUIRED) + + if(NOT NCNN_SHARED_LIB) + if(NCNN_SYSTEM_GLSLANG) + find_package(glslang QUIET) + if(NOT glslang_FOUND) + set(GLSLANG_TARGET_DIR "") + include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake) + include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake) + if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + # hlsl support can be optional + include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + endif() + include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake) + include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake) + endif() + else() + set(glslang_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/glslang") + find_package(glslang QUIET) + endif() + + endif() +endif() + +include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libGenericCodeGen.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libGenericCodeGen.a new file mode 100644 index 0000000..d6f48c8 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libGenericCodeGen.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libMachineIndependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libMachineIndependent.a new file mode 100644 index 0000000..1e53d04 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libMachineIndependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libOGLCompiler.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libOGLCompiler.a new file mode 100644 index 0000000..5e0bf8d Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libOGLCompiler.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libOSDependent.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libOSDependent.a new file mode 100644 index 0000000..34a7a3c Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libOSDependent.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libSPIRV.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libSPIRV.a new file mode 100644 index 0000000..edbcdbc Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libSPIRV.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libglslang.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libglslang.a new file mode 100644 index 0000000..74d8d61 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libglslang.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libncnn.a b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libncnn.a new file mode 100644 index 0000000..feec7b4 Binary files /dev/null and b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/libncnn.a differ diff --git a/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/pkgconfig/ncnn.pc b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/pkgconfig/ncnn.pc new file mode 100644 index 0000000..e683e4c --- /dev/null +++ b/android-demo/app/src/main/jni/ncnn-20230223-android-vulkan/x86_64/lib/pkgconfig/ncnn.pc @@ -0,0 +1,11 @@ +prefix=${pcfiledir}/../.. +librarydir=${prefix}/lib +includedir=${prefix}/include + +Name: ncnn +Description: high-performance neural network inference framework optimized for the mobile platform +Version: 1.0.20230223 +URL: https://github.com/Tencent/ncnn +Libs: -L"${librarydir}" -lncnn +Cflags: -I"${includedir}" + diff --git a/android-demo/app/src/main/jni/ndkcamera.cpp b/android-demo/app/src/main/jni/ndkcamera.cpp new file mode 100644 index 0000000..243af24 --- /dev/null +++ b/android-demo/app/src/main/jni/ndkcamera.cpp @@ -0,0 +1,757 @@ +#include "ndkcamera.h" + +#include + +#include + +#include + +#include "mat.h" + +static void onDisconnected(void* context, ACameraDevice* device) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onDisconnected %p", device); +} + +static void onError(void* context, ACameraDevice* device, int error) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onError %p %d", device, error); +} + +static void onImageAvailable(void* context, AImageReader* reader) +{ +// __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onImageAvailable %p", reader); + + AImage* image = 0; + media_status_t status = AImageReader_acquireLatestImage(reader, &image); + + if (status != AMEDIA_OK) + { + // error + return; + } + + int32_t format; + AImage_getFormat(image, &format); + + // assert format == AIMAGE_FORMAT_YUV_420_888 + + int32_t width = 0; + int32_t height = 0; + AImage_getWidth(image, &width); + AImage_getHeight(image, &height); + + int32_t y_pixelStride = 0; + int32_t u_pixelStride = 0; + int32_t v_pixelStride = 0; + AImage_getPlanePixelStride(image, 0, &y_pixelStride); + AImage_getPlanePixelStride(image, 1, &u_pixelStride); + AImage_getPlanePixelStride(image, 2, &v_pixelStride); + + int32_t y_rowStride = 0; + int32_t u_rowStride = 0; + int32_t v_rowStride = 0; + AImage_getPlaneRowStride(image, 0, &y_rowStride); + AImage_getPlaneRowStride(image, 1, &u_rowStride); + AImage_getPlaneRowStride(image, 2, &v_rowStride); + + uint8_t* y_data = 0; + uint8_t* u_data = 0; + uint8_t* v_data = 0; + int y_len = 0; + int u_len = 0; + int v_len = 0; + AImage_getPlaneData(image, 0, &y_data, &y_len); + AImage_getPlaneData(image, 1, &u_data, &u_len); + AImage_getPlaneData(image, 2, &v_data, &v_len); + + if (u_data == v_data + 1 && v_data == y_data + width * height && y_pixelStride == 1 && u_pixelStride == 2 && v_pixelStride == 2 && y_rowStride == width && u_rowStride == width && v_rowStride == width) + { + // already nv21 :) + ((NdkCamera*)context)->on_image((unsigned char*)y_data, (int)width, (int)height); + } + else + { + // construct nv21 + unsigned char* nv21 = new unsigned char[width * height + width * height / 2]; + { + // Y + unsigned char* yptr = nv21; + for (int y=0; yon_image((unsigned char*)nv21, (int)width, (int)height); + + delete[] nv21; + } + + AImage_delete(image); +} + +static void onSessionActive(void* context, ACameraCaptureSession *session) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onSessionActive %p", session); +} + +static void onSessionReady(void* context, ACameraCaptureSession *session) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onSessionReady %p", session); +} + +static void onSessionClosed(void* context, ACameraCaptureSession *session) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onSessionClosed %p", session); +} + +void onCaptureFailed(void* context, ACameraCaptureSession* session, ACaptureRequest* request, ACameraCaptureFailure* failure) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onCaptureFailed %p %p %p", session, request, failure); +} + +void onCaptureSequenceCompleted(void* context, ACameraCaptureSession* session, int sequenceId, int64_t frameNumber) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onCaptureSequenceCompleted %p %d %ld", session, sequenceId, frameNumber); +} + +void onCaptureSequenceAborted(void* context, ACameraCaptureSession* session, int sequenceId) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onCaptureSequenceAborted %p %d", session, sequenceId); +} + +void onCaptureCompleted(void* context, ACameraCaptureSession* session, ACaptureRequest* request, const ACameraMetadata* result) +{ +// __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "onCaptureCompleted %p %p %p", session, request, result); +} + +NdkCamera::NdkCamera() +{ + camera_facing = 0; + camera_orientation = 0; + + camera_manager = 0; + camera_device = 0; + image_reader = 0; + image_reader_surface = 0; + image_reader_target = 0; + capture_request = 0; + capture_session_output_container = 0; + capture_session_output = 0; + capture_session = 0; + + + // setup imagereader and its surface + { + AImageReader_new(640, 480, AIMAGE_FORMAT_YUV_420_888, /*maxImages*/2, &image_reader); + + AImageReader_ImageListener listener; + listener.context = this; + listener.onImageAvailable = onImageAvailable; + + AImageReader_setImageListener(image_reader, &listener); + + AImageReader_getWindow(image_reader, &image_reader_surface); + + ANativeWindow_acquire(image_reader_surface); + } +} + +NdkCamera::~NdkCamera() +{ + close(); + + if (image_reader) + { + AImageReader_delete(image_reader); + image_reader = 0; + } + + if (image_reader_surface) + { + ANativeWindow_release(image_reader_surface); + image_reader_surface = 0; + } +} + +int NdkCamera::open(int _camera_facing) +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "open"); + + camera_facing = _camera_facing; + + camera_manager = ACameraManager_create(); + + // find front camera + std::string camera_id; + { + ACameraIdList* camera_id_list = 0; + ACameraManager_getCameraIdList(camera_manager, &camera_id_list); + + for (int i = 0; i < camera_id_list->numCameras; ++i) + { + const char* id = camera_id_list->cameraIds[i]; + ACameraMetadata* camera_metadata = 0; + ACameraManager_getCameraCharacteristics(camera_manager, id, &camera_metadata); + + // query faceing + acamera_metadata_enum_android_lens_facing_t facing = ACAMERA_LENS_FACING_FRONT; + { + ACameraMetadata_const_entry e = { 0 }; + ACameraMetadata_getConstEntry(camera_metadata, ACAMERA_LENS_FACING, &e); + facing = (acamera_metadata_enum_android_lens_facing_t)e.data.u8[0]; + } + + if (camera_facing == 0 && facing != ACAMERA_LENS_FACING_FRONT) + { + ACameraMetadata_free(camera_metadata); + continue; + } + + if (camera_facing == 1 && facing != ACAMERA_LENS_FACING_BACK) + { + ACameraMetadata_free(camera_metadata); + continue; + } + + camera_id = id; + + // query orientation + int orientation = 0; + { + ACameraMetadata_const_entry e = { 0 }; + ACameraMetadata_getConstEntry(camera_metadata, ACAMERA_SENSOR_ORIENTATION, &e); + + orientation = (int)e.data.i32[0]; + } + + camera_orientation = orientation; + + ACameraMetadata_free(camera_metadata); + + break; + } + + ACameraManager_deleteCameraIdList(camera_id_list); + } + + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "open %s %d", camera_id.c_str(), camera_orientation); + + // open camera + { + ACameraDevice_StateCallbacks camera_device_state_callbacks; + camera_device_state_callbacks.context = this; + camera_device_state_callbacks.onDisconnected = onDisconnected; + camera_device_state_callbacks.onError = onError; + + ACameraManager_openCamera(camera_manager, camera_id.c_str(), &camera_device_state_callbacks, &camera_device); + } + + // capture request + { + ACameraDevice_createCaptureRequest(camera_device, TEMPLATE_PREVIEW, &capture_request); + + ACameraOutputTarget_create(image_reader_surface, &image_reader_target); + ACaptureRequest_addTarget(capture_request, image_reader_target); + } + + // capture session + { + ACameraCaptureSession_stateCallbacks camera_capture_session_state_callbacks; + camera_capture_session_state_callbacks.context = this; + camera_capture_session_state_callbacks.onActive = onSessionActive; + camera_capture_session_state_callbacks.onReady = onSessionReady; + camera_capture_session_state_callbacks.onClosed = onSessionClosed; + + ACaptureSessionOutputContainer_create(&capture_session_output_container); + + ACaptureSessionOutput_create(image_reader_surface, &capture_session_output); + + ACaptureSessionOutputContainer_add(capture_session_output_container, capture_session_output); + + ACameraDevice_createCaptureSession(camera_device, capture_session_output_container, &camera_capture_session_state_callbacks, &capture_session); + + ACameraCaptureSession_captureCallbacks camera_capture_session_capture_callbacks; + camera_capture_session_capture_callbacks.context = this; + camera_capture_session_capture_callbacks.onCaptureStarted = 0; + camera_capture_session_capture_callbacks.onCaptureProgressed = 0; + camera_capture_session_capture_callbacks.onCaptureCompleted = onCaptureCompleted; + camera_capture_session_capture_callbacks.onCaptureFailed = onCaptureFailed; + camera_capture_session_capture_callbacks.onCaptureSequenceCompleted = onCaptureSequenceCompleted; + camera_capture_session_capture_callbacks.onCaptureSequenceAborted = onCaptureSequenceAborted; + camera_capture_session_capture_callbacks.onCaptureBufferLost = 0; + + ACameraCaptureSession_setRepeatingRequest(capture_session, &camera_capture_session_capture_callbacks, 1, &capture_request, nullptr); + } + + return 0; +} + +void NdkCamera::close() +{ + __android_log_print(ANDROID_LOG_WARN, "NdkCamera", "close"); + + if (capture_session) + { + ACameraCaptureSession_stopRepeating(capture_session); + ACameraCaptureSession_close(capture_session); + capture_session = 0; + } + + if (camera_device) + { + ACameraDevice_close(camera_device); + camera_device = 0; + } + + if (capture_session_output_container) + { + ACaptureSessionOutputContainer_free(capture_session_output_container); + capture_session_output_container = 0; + } + + if (capture_session_output) + { + ACaptureSessionOutput_free(capture_session_output); + capture_session_output = 0; + } + + if (capture_request) + { + ACaptureRequest_free(capture_request); + capture_request = 0; + } + + if (image_reader_target) + { + ACameraOutputTarget_free(image_reader_target); + image_reader_target = 0; + } + + if (camera_manager) + { + ACameraManager_delete(camera_manager); + camera_manager = 0; + } +} + +void NdkCamera::on_image(const cv::Mat& rgb) const +{ +} + +void NdkCamera::on_image(const unsigned char* nv21, int nv21_width, int nv21_height) const +{ + // rotate nv21 + int w = 0; + int h = 0; + int rotate_type = 0; + { + if (camera_orientation == 0) + { + w = nv21_width; + h = nv21_height; + rotate_type = camera_facing == 0 ? 2 : 1; + } + if (camera_orientation == 90) + { + w = nv21_height; + h = nv21_width; + rotate_type = camera_facing == 0 ? 5 : 6; + } + if (camera_orientation == 180) + { + w = nv21_width; + h = nv21_height; + rotate_type = camera_facing == 0 ? 4 : 3; + } + if (camera_orientation == 270) + { + w = nv21_height; + h = nv21_width; + rotate_type = camera_facing == 0 ? 7 : 8; + } + } + + cv::Mat nv21_rotated(h + h / 2, w, CV_8UC1); + ncnn::kanna_rotate_yuv420sp(nv21, nv21_width, nv21_height, nv21_rotated.data, w, h, rotate_type); + + // nv21_rotated to rgb + cv::Mat rgb(h, w, CV_8UC3); + ncnn::yuv420sp2rgb(nv21_rotated.data, w, h, rgb.data); + + on_image(rgb); +} + +static const int NDKCAMERAWINDOW_ID = 233; + +NdkCameraWindow::NdkCameraWindow() : NdkCamera() +{ + sensor_manager = 0; + sensor_event_queue = 0; + accelerometer_sensor = 0; + win = 0; + + accelerometer_orientation = 0; + + // sensor + sensor_manager = ASensorManager_getInstance(); + + accelerometer_sensor = ASensorManager_getDefaultSensor(sensor_manager, ASENSOR_TYPE_ACCELEROMETER); +} + +NdkCameraWindow::~NdkCameraWindow() +{ + if (accelerometer_sensor) + { + ASensorEventQueue_disableSensor(sensor_event_queue, accelerometer_sensor); + accelerometer_sensor = 0; + } + + if (sensor_event_queue) + { + ASensorManager_destroyEventQueue(sensor_manager, sensor_event_queue); + sensor_event_queue = 0; + } + + if (win) + { + ANativeWindow_release(win); + } +} + +void NdkCameraWindow::set_window(ANativeWindow* _win) +{ + if (win) + { + ANativeWindow_release(win); + } + + win = _win; + ANativeWindow_acquire(win); +} + +void NdkCameraWindow::on_image_render(cv::Mat& rgb) const +{ +} + +void NdkCameraWindow::on_image(const unsigned char* nv21, int nv21_width, int nv21_height) const +{ + // resolve orientation from camera_orientation and accelerometer_sensor + { + if (!sensor_event_queue) + { + sensor_event_queue = ASensorManager_createEventQueue(sensor_manager, ALooper_prepare(ALOOPER_PREPARE_ALLOW_NON_CALLBACKS), NDKCAMERAWINDOW_ID, 0, 0); + + ASensorEventQueue_enableSensor(sensor_event_queue, accelerometer_sensor); + } + + int id = ALooper_pollAll(0, 0, 0, 0); + if (id == NDKCAMERAWINDOW_ID) + { + ASensorEvent e[8]; + ssize_t num_event = 0; + while (ASensorEventQueue_hasEvents(sensor_event_queue) == 1) + { + num_event = ASensorEventQueue_getEvents(sensor_event_queue, e, 8); + if (num_event < 0) + break; + } + + if (num_event > 0) + { + float acceleration_x = e[num_event - 1].acceleration.x; + float acceleration_y = e[num_event - 1].acceleration.y; + float acceleration_z = e[num_event - 1].acceleration.z; +// __android_log_print(ANDROID_LOG_WARN, "NdkCameraWindow", "x = %f, y = %f, z = %f", x, y, z); + + if (acceleration_y > 7) + { + accelerometer_orientation = 0; + } + if (acceleration_x < -7) + { + accelerometer_orientation = 90; + } + if (acceleration_y < -7) + { + accelerometer_orientation = 180; + } + if (acceleration_x > 7) + { + accelerometer_orientation = 270; + } + } + } + } + + // roi crop and rotate nv21 + int nv21_roi_x = 0; + int nv21_roi_y = 0; + int nv21_roi_w = 0; + int nv21_roi_h = 0; + int roi_x = 0; + int roi_y = 0; + int roi_w = 0; + int roi_h = 0; + int rotate_type = 0; + int render_w = 0; + int render_h = 0; + int render_rotate_type = 0; + { + int win_w = ANativeWindow_getWidth(win); + int win_h = ANativeWindow_getHeight(win); + + if (accelerometer_orientation == 90 || accelerometer_orientation == 270) + { + std::swap(win_w, win_h); + } + + const int final_orientation = (camera_orientation + accelerometer_orientation) % 360; + + if (final_orientation == 0 || final_orientation == 180) + { + if (win_w * nv21_height > win_h * nv21_width) + { + roi_w = nv21_width; + roi_h = (nv21_width * win_h / win_w) / 2 * 2; + roi_x = 0; + roi_y = ((nv21_height - roi_h) / 2) / 2 * 2; + } + else + { + roi_h = nv21_height; + roi_w = (nv21_height * win_w / win_h) / 2 * 2; + roi_x = ((nv21_width - roi_w) / 2) / 2 * 2; + roi_y = 0; + } + + nv21_roi_x = roi_x; + nv21_roi_y = roi_y; + nv21_roi_w = roi_w; + nv21_roi_h = roi_h; + } + if (final_orientation == 90 || final_orientation == 270) + { + if (win_w * nv21_width > win_h * nv21_height) + { + roi_w = nv21_height; + roi_h = (nv21_height * win_h / win_w) / 2 * 2; + roi_x = 0; + roi_y = ((nv21_width - roi_h) / 2) / 2 * 2; + } + else + { + roi_h = nv21_width; + roi_w = (nv21_width * win_w / win_h) / 2 * 2; + roi_x = ((nv21_height - roi_w) / 2) / 2 * 2; + roi_y = 0; + } + + nv21_roi_x = roi_y; + nv21_roi_y = roi_x; + nv21_roi_w = roi_h; + nv21_roi_h = roi_w; + } + + if (camera_facing == 0) + { + if (camera_orientation == 0 && accelerometer_orientation == 0) + { + rotate_type = 2; + } + if (camera_orientation == 0 && accelerometer_orientation == 90) + { + rotate_type = 7; + } + if (camera_orientation == 0 && accelerometer_orientation == 180) + { + rotate_type = 4; + } + if (camera_orientation == 0 && accelerometer_orientation == 270) + { + rotate_type = 5; + } + if (camera_orientation == 90 && accelerometer_orientation == 0) + { + rotate_type = 5; + } + if (camera_orientation == 90 && accelerometer_orientation == 90) + { + rotate_type = 2; + } + if (camera_orientation == 90 && accelerometer_orientation == 180) + { + rotate_type = 7; + } + if (camera_orientation == 90 && accelerometer_orientation == 270) + { + rotate_type = 4; + } + if (camera_orientation == 180 && accelerometer_orientation == 0) + { + rotate_type = 4; + } + if (camera_orientation == 180 && accelerometer_orientation == 90) + { + rotate_type = 5; + } + if (camera_orientation == 180 && accelerometer_orientation == 180) + { + rotate_type = 2; + } + if (camera_orientation == 180 && accelerometer_orientation == 270) + { + rotate_type = 7; + } + if (camera_orientation == 270 && accelerometer_orientation == 0) + { + rotate_type = 7; + } + if (camera_orientation == 270 && accelerometer_orientation == 90) + { + rotate_type = 4; + } + if (camera_orientation == 270 && accelerometer_orientation == 180) + { + rotate_type = 5; + } + if (camera_orientation == 270 && accelerometer_orientation == 270) + { + rotate_type = 2; + } + } + else + { + if (final_orientation == 0) + { + rotate_type = 1; + } + if (final_orientation == 90) + { + rotate_type = 6; + } + if (final_orientation == 180) + { + rotate_type = 3; + } + if (final_orientation == 270) + { + rotate_type = 8; + } + } + + if (accelerometer_orientation == 0) + { + render_w = roi_w; + render_h = roi_h; + render_rotate_type = 1; + } + if (accelerometer_orientation == 90) + { + render_w = roi_h; + render_h = roi_w; + render_rotate_type = 8; + } + if (accelerometer_orientation == 180) + { + render_w = roi_w; + render_h = roi_h; + render_rotate_type = 3; + } + if (accelerometer_orientation == 270) + { + render_w = roi_h; + render_h = roi_w; + render_rotate_type = 6; + } + } + + // crop and rotate nv21 + cv::Mat nv21_croprotated(roi_h + roi_h / 2, roi_w, CV_8UC1); + { + const unsigned char* srcY = nv21 + nv21_roi_y * nv21_width + nv21_roi_x; + unsigned char* dstY = nv21_croprotated.data; + ncnn::kanna_rotate_c1(srcY, nv21_roi_w, nv21_roi_h, nv21_width, dstY, roi_w, roi_h, roi_w, rotate_type); + + const unsigned char* srcUV = nv21 + nv21_width * nv21_height + nv21_roi_y * nv21_width / 2 + nv21_roi_x; + unsigned char* dstUV = nv21_croprotated.data + roi_w * roi_h; + ncnn::kanna_rotate_c2(srcUV, nv21_roi_w / 2, nv21_roi_h / 2, nv21_width, dstUV, roi_w / 2, roi_h / 2, roi_w, rotate_type); + } + + // nv21_croprotated to rgb + cv::Mat rgb(roi_h, roi_w, CV_8UC3); + ncnn::yuv420sp2rgb(nv21_croprotated.data, roi_w, roi_h, rgb.data); + + on_image_render(rgb); + + // rotate to native window orientation + cv::Mat rgb_render(render_h, render_w, CV_8UC3); + ncnn::kanna_rotate_c3(rgb.data, roi_w, roi_h, rgb_render.data, render_w, render_h, render_rotate_type); + + ANativeWindow_setBuffersGeometry(win, render_w, render_h, AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM); + + ANativeWindow_Buffer buf; + ANativeWindow_lock(win, &buf, NULL); + + // scale to target size + if (buf.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM || buf.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) + { + for (int y = 0; y < render_h; y++) + { + const unsigned char* ptr = rgb_render.ptr(y); + unsigned char* outptr = (unsigned char*)buf.bits + buf.stride * 4 * y; + + int x = 0; +#if __ARM_NEON + for (; x + 7 < render_w; x += 8) + { + uint8x8x3_t _rgb = vld3_u8(ptr); + uint8x8x4_t _rgba; + _rgba.val[0] = _rgb.val[0]; + _rgba.val[1] = _rgb.val[1]; + _rgba.val[2] = _rgb.val[2]; + _rgba.val[3] = vdup_n_u8(255); + vst4_u8(outptr, _rgba); + + ptr += 24; + outptr += 32; + } +#endif // __ARM_NEON + for (; x < render_w; x++) + { + outptr[0] = ptr[0]; + outptr[1] = ptr[1]; + outptr[2] = ptr[2]; + outptr[3] = 255; + + ptr += 3; + outptr += 4; + } + } + } + + ANativeWindow_unlockAndPost(win); +} diff --git a/android-demo/app/src/main/jni/ndkcamera.h b/android-demo/app/src/main/jni/ndkcamera.h new file mode 100644 index 0000000..ddd30eb --- /dev/null +++ b/android-demo/app/src/main/jni/ndkcamera.h @@ -0,0 +1,80 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NDKCAMERA_H +#define NDKCAMERA_H + +#include +#include +#include +#include +#include +#include +#include + +#include + +class NdkCamera +{ +public: + NdkCamera(); + virtual ~NdkCamera(); + + // facing 0=front 1=back + int open(int camera_facing = 0); + void close(); + + virtual void on_image(const cv::Mat& rgb) const; + + virtual void on_image(const unsigned char* nv21, int nv21_width, int nv21_height) const; + +public: + int camera_facing; + int camera_orientation; + +private: + ACameraManager* camera_manager; + ACameraDevice* camera_device; + AImageReader* image_reader; + ANativeWindow* image_reader_surface; + ACameraOutputTarget* image_reader_target; + ACaptureRequest* capture_request; + ACaptureSessionOutputContainer* capture_session_output_container; + ACaptureSessionOutput* capture_session_output; + ACameraCaptureSession* capture_session; +}; + +class NdkCameraWindow : public NdkCamera +{ +public: + NdkCameraWindow(); + virtual ~NdkCameraWindow(); + + void set_window(ANativeWindow* win); + + virtual void on_image_render(cv::Mat& rgb) const; + + virtual void on_image(const unsigned char* nv21, int nv21_width, int nv21_height) const; + +public: + mutable int accelerometer_orientation; + +private: + ASensorManager* sensor_manager; + mutable ASensorEventQueue* sensor_event_queue; + const ASensor* accelerometer_sensor; + ANativeWindow* win; +}; + +#endif // NDKCAMERA_H diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/LICENSE b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/LICENSE new file mode 100644 index 0000000..fad41ea --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Result_Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Result_Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Result_Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Result_Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Result_Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/README.android b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/README.android new file mode 100644 index 0000000..c572577 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/README.android @@ -0,0 +1 @@ +See http://opencv.org/platforms/android diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/OpenCVConfig-version.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/OpenCVConfig-version.cmake new file mode 100644 index 0000000..7a05194 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/OpenCVConfig-version.cmake @@ -0,0 +1,15 @@ +set(OpenCV_VERSION 4.6.0) +set(PACKAGE_VERSION ${OpenCV_VERSION}) + +set(PACKAGE_VERSION_EXACT False) +set(PACKAGE_VERSION_COMPATIBLE False) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT True) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() + +if(PACKAGE_FIND_VERSION_MAJOR EQUAL 4 + AND PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/OpenCVConfig.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/OpenCVConfig.cmake new file mode 100644 index 0000000..48ac4b0 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/OpenCVConfig.cmake @@ -0,0 +1,50 @@ +# =================================================================================== +# The OpenCV CMake configuration file +# +# ** File generated automatically, do not modify ** +# +# Usage from an external project: +# In your CMakeLists.txt, add these lines: +# +# find_package(OpenCV REQUIRED) +# include_directories(${OpenCV_INCLUDE_DIRS}) # Not needed for CMake >= 2.8.11 +# target_link_libraries(MY_TARGET_NAME ${OpenCV_LIBS}) +# +# Or you can search for specific OpenCV modules: +# +# find_package(OpenCV REQUIRED core videoio) +# +# If the module is found then OPENCV__FOUND is set to TRUE. +# +# This file will define the following variables: +# - OpenCV_LIBS : The list of all imported targets for OpenCV modules. +# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. +# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API. +# - OpenCV_VERSION : The version of this OpenCV build: "4.6.0" +# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION: "4" +# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION: "6" +# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION: "0" +# - OpenCV_VERSION_STATUS : Development status of this build: "" +# +# =================================================================================== + +# Extract directory name from full path of the file currently being processed. +# Note that CMake 2.8.3 introduced CMAKE_CURRENT_LIST_DIR. We reimplement it +# for older versions of CMake to support these as well. +if(CMAKE_VERSION VERSION_LESS "2.8.3") + get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +endif() + +if(NOT DEFINED OpenCV_CONFIG_SUBDIR) + set(OpenCV_CONFIG_SUBDIR "/abi-${ANDROID_NDK_ABI_NAME}") +endif() + +set(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_DIR}${OpenCV_CONFIG_SUBDIR}") +if(EXISTS "${OpenCV_CONFIG_PATH}/OpenCVConfig.cmake") + include("${OpenCV_CONFIG_PATH}/OpenCVConfig.cmake") +else() + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "Found OpenCV Android Pack but it has no binaries compatible with your ABI (can't find: ${OpenCV_CONFIG_SUBDIR})") + endif() + set(OpenCV_FOUND FALSE) +endif() diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVConfig-version.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVConfig-version.cmake new file mode 100644 index 0000000..7a05194 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVConfig-version.cmake @@ -0,0 +1,15 @@ +set(OpenCV_VERSION 4.6.0) +set(PACKAGE_VERSION ${OpenCV_VERSION}) + +set(PACKAGE_VERSION_EXACT False) +set(PACKAGE_VERSION_COMPATIBLE False) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT True) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() + +if(PACKAGE_FIND_VERSION_MAJOR EQUAL 4 + AND PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVConfig.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVConfig.cmake new file mode 100644 index 0000000..e0574c2 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVConfig.cmake @@ -0,0 +1,368 @@ +# =================================================================================== +# The OpenCV CMake configuration file +# +# ** File generated automatically, do not modify ** +# +# Usage from an external project: +# In your CMakeLists.txt, add these lines: +# +# find_package(OpenCV REQUIRED) +# include_directories(${OpenCV_INCLUDE_DIRS}) # Not needed for CMake >= 2.8.11 +# target_link_libraries(MY_TARGET_NAME ${OpenCV_LIBS}) +# +# Or you can search for specific OpenCV modules: +# +# find_package(OpenCV REQUIRED core videoio) +# +# You can also mark OpenCV components as optional: + +# find_package(OpenCV REQUIRED core OPTIONAL_COMPONENTS viz) +# +# If the module is found then OPENCV__FOUND is set to TRUE. +# +# This file will define the following variables: +# - OpenCV_LIBS : The list of all imported targets for OpenCV modules. +# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. +# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability. +# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API. +# - OpenCV_VERSION : The version of this OpenCV build: "4.6.0" +# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION: "4" +# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION: "6" +# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION: "0" +# - OpenCV_VERSION_STATUS : Development status of this build: "" +# +# Advanced variables: +# - OpenCV_SHARED : Use OpenCV as shared library +# - OpenCV_INSTALL_PATH : OpenCV location +# - OpenCV_LIB_COMPONENTS : Present OpenCV modules list +# - OpenCV_USE_MANGLED_PATHS : Mangled OpenCV path flag +# +# Deprecated variables: +# - OpenCV_VERSION_TWEAK : Always "0" +# +# =================================================================================== + +# ====================================================== +# Version variables: +# ====================================================== +SET(OpenCV_VERSION 4.6.0) +SET(OpenCV_VERSION_MAJOR 4) +SET(OpenCV_VERSION_MINOR 6) +SET(OpenCV_VERSION_PATCH 0) +SET(OpenCV_VERSION_TWEAK 0) +SET(OpenCV_VERSION_STATUS "") + +include(FindPackageHandleStandardArgs) + +if(NOT CMAKE_VERSION VERSION_LESS 2.8.8 + AND OpenCV_FIND_COMPONENTS # prevent excessive output +) + # HANDLE_COMPONENTS was introduced in CMake 2.8.8 + list(APPEND _OpenCV_FPHSA_ARGS HANDLE_COMPONENTS) + # The missing components will be handled by the FindPackageHandleStandardArgs + # module. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY FALSE) +else() + # The missing components will be handled by this config. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY TRUE) +endif() + +# Extract directory name from full path of the file currently being processed. +# Note that CMake 2.8.3 introduced CMAKE_CURRENT_LIST_DIR. We reimplement it +# for older versions of CMake to support these as well. +if(CMAKE_VERSION VERSION_LESS "2.8.3") + get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +endif() + +# Extract the directory where *this* file has been installed (determined at cmake run-time) +# Get the absolute path with no ../.. relative marks, to eliminate implicit linker warnings +get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../../../../" REALPATH) + +# Search packages for host system instead of packages for target system. +# in case of cross compilation this macro should be defined by toolchain file +if(NOT COMMAND find_host_package) + macro(find_host_package) + find_package(${ARGN}) + endmacro() +endif() +if(NOT COMMAND find_host_program) + macro(find_host_program) + find_program(${ARGN}) + endmacro() +endif() + + + +# Android API level from which OpenCV has been compiled is remembered +set(OpenCV_ANDROID_NATIVE_API_LEVEL "24") + +# ============================================================== +# Check OpenCV availability +# ============================================================== +if(OpenCV_ANDROID_NATIVE_API_LEVEL GREATER ANDROID_NATIVE_API_LEVEL) + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "Minimum required by OpenCV API level is android-${OpenCV_ANDROID_NATIVE_API_LEVEL}") + endif() + set(OpenCV_FOUND 0) + return() +endif() + + + + + +# Some additional settings are required if OpenCV is built as static libs +set(OpenCV_SHARED OFF) + +# Enables mangled install paths, that help with side by side installs +set(OpenCV_USE_MANGLED_PATHS FALSE) + +set(OpenCV_LIB_COMPONENTS opencv_core;opencv_features2d;opencv_highgui;opencv_imgproc;opencv_photo;opencv_video) +set(__OpenCV_INCLUDE_DIRS "${OpenCV_INSTALL_PATH}/sdk/native/jni/include") + +set(OpenCV_INCLUDE_DIRS "") +foreach(d ${__OpenCV_INCLUDE_DIRS}) + get_filename_component(__d "${d}" REALPATH) + if(NOT EXISTS "${__d}") + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "OpenCV: Include directory doesn't exist: '${d}'. OpenCV installation may be broken. Skip...") + endif() + else() + list(APPEND OpenCV_INCLUDE_DIRS "${__d}") + endif() +endforeach() +unset(__d) + + +if(NOT TARGET opencv_core) + include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${OpenCV_MODULES_SUFFIX}.cmake) +endif() + +if(NOT CMAKE_VERSION VERSION_LESS "2.8.11") + # Target property INTERFACE_INCLUDE_DIRECTORIES available since 2.8.11: + # * http://www.cmake.org/cmake/help/v2.8.11/cmake.html#prop_tgt:INTERFACE_INCLUDE_DIRECTORIES + foreach(__component ${OpenCV_LIB_COMPONENTS}) + if(TARGET ${__component}) + set_target_properties( + ${__component} + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OpenCV_INCLUDE_DIRS}" + ) + endif() + endforeach() +endif() + + +if(NOT DEFINED OPENCV_MAP_IMPORTED_CONFIG) + if(CMAKE_GENERATOR MATCHES "Visual Studio" OR MSVC) + # OpenCV supports Debug and Release builds only. + # But MSVS has 'RelWithDebInfo' 'MinSizeRel' configurations for applications. + # By default CMake maps these configuration on the first available (Debug) which is wrong. + # Non-Debug build of Application can't be used with OpenCV Debug build (ABI mismatch problem) + # Add mapping of RelWithDebInfo and MinSizeRel to Release here + set(OPENCV_MAP_IMPORTED_CONFIG "RELWITHDEBINFO=!Release;MINSIZEREL=!Release") + endif() +endif() +set(__remap_warnings "") +macro(ocv_map_imported_config target) + if(DEFINED OPENCV_MAP_IMPORTED_CONFIG) # list, "RELWITHDEBINFO=Release;MINSIZEREL=Release" + get_target_property(__available_configurations ${target} IMPORTED_CONFIGURATIONS) + foreach(remap ${OPENCV_MAP_IMPORTED_CONFIG}) + if(remap MATCHES "^(.+)=(!?)([^!]+)$") + set(__remap_config "${CMAKE_MATCH_1}") + set(__final_config "${CMAKE_MATCH_3}") + set(__force_flag "${CMAKE_MATCH_2}") + string(TOUPPER "${__final_config}" __final_config_upper) + string(TOUPPER "${__remap_config}" __remap_config_upper) + if(";${__available_configurations};" MATCHES ";${__remap_config_upper};" AND NOT "${__force_flag}" STREQUAL "!") + # configuration already exists, skip remap + set(__remap_warnings "${__remap_warnings}... Configuration already exists ${__remap_config} (skip mapping ${__remap_config} => ${__final_config}) (available configurations: ${__available_configurations})\n") + continue() + endif() + if(__available_configurations AND NOT ";${__available_configurations};" MATCHES ";${__final_config_upper};") + # skip, configuration is not available + if(NOT "${__force_flag}" STREQUAL "!") + set(__remap_warnings "${__remap_warnings}... Configuration is not available '${__final_config}' for ${target}, build may fail (available configurations: ${__available_configurations})\n") + endif() + endif() + set_target_properties(${target} PROPERTIES + MAP_IMPORTED_CONFIG_${__remap_config} "${__final_config}" + ) + else() + message(WARNING "Invalid entry of OPENCV_MAP_IMPORTED_CONFIG: '${remap}' (${OPENCV_MAP_IMPORTED_CONFIG})") + endif() + endforeach() + endif() +endmacro() + + +# ============================================================== +# Form list of modules (components) to find +# ============================================================== +if(NOT OpenCV_FIND_COMPONENTS) + set(OpenCV_FIND_COMPONENTS ${OpenCV_LIB_COMPONENTS}) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_java) + if(GTest_FOUND OR GTEST_FOUND) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_ts) + endif() +endif() + +set(OpenCV_WORLD_COMPONENTS ) + +# expand short module names and see if requested components exist +foreach(__cvcomponent ${OpenCV_FIND_COMPONENTS}) + # Store the name of the original component so we can set the + # OpenCV__FOUND variable which can be checked by the user. + set (__original_cvcomponent ${__cvcomponent}) + if(NOT __cvcomponent MATCHES "^opencv_") + set(__cvcomponent opencv_${__cvcomponent}) + endif() + list(FIND OpenCV_LIB_COMPONENTS ${__cvcomponent} __cvcomponentIdx) + if(__cvcomponentIdx LESS 0) + if(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + # Either the component is required or the user did not set any components at + # all. In the latter case, the OpenCV_FIND_REQUIRED_ variable + # will not be defined since it is not set by this config. So let's assume + # the implicitly set components are always required. + if(NOT DEFINED OpenCV_FIND_REQUIRED_${__original_cvcomponent} OR + OpenCV_FIND_REQUIRED_${__original_cvcomponent}) + message(FATAL_ERROR "${__cvcomponent} is required but was not found") + elseif(NOT OpenCV_FIND_QUIETLY) + # The component was marked as optional using OPTIONAL_COMPONENTS + message(WARNING "Optional component ${__cvcomponent} was not found") + endif() + endif(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + #indicate that module is NOT found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND "${__cvcomponentUP}_FOUND-NOTFOUND") + set(OpenCV_${__original_cvcomponent}_FOUND FALSE) + else() + # Not using list(APPEND) here, because OpenCV_LIBS may not exist yet. + # Also not clearing OpenCV_LIBS anywhere, so that multiple calls + # to find_package(OpenCV) with different component lists add up. + set(OpenCV_LIBS ${OpenCV_LIBS} "${__cvcomponent}") + #indicate that module is found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND 1) + set(OpenCV_${__original_cvcomponent}_FOUND TRUE) + endif() + if(OpenCV_SHARED AND ";${OpenCV_WORLD_COMPONENTS};" MATCHES ";${__cvcomponent};" AND NOT TARGET ${__cvcomponent}) + get_target_property(__implib_dbg opencv_world IMPORTED_IMPLIB_DEBUG) + get_target_property(__implib_release opencv_world IMPORTED_IMPLIB_RELEASE) + get_target_property(__location_dbg opencv_world IMPORTED_LOCATION_DEBUG) + get_target_property(__location_release opencv_world IMPORTED_LOCATION_RELEASE) + get_target_property(__include_dir opencv_world INTERFACE_INCLUDE_DIRECTORIES) + add_library(${__cvcomponent} SHARED IMPORTED) + set_target_properties(${__cvcomponent} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${__include_dir}") + if(__location_dbg) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_DEBUG "${__implib_dbg}" + IMPORTED_LINK_INTERFACE_LIBRARIES_DEBUG "" + IMPORTED_LOCATION_DEBUG "${__location_dbg}" + ) + endif() + if(__location_release) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_RELEASE "${__implib_release}" + IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "" + IMPORTED_LOCATION_RELEASE "${__location_release}" + ) + endif() + endif() + if(TARGET ${__cvcomponent}) + ocv_map_imported_config(${__cvcomponent}) + endif() +endforeach() + +if(__remap_warnings AND NOT OpenCV_FIND_QUIETLY) + message("OpenCV: configurations remap warnings:\n${__remap_warnings}OpenCV: Check variable OPENCV_MAP_IMPORTED_CONFIG=${OPENCV_MAP_IMPORTED_CONFIG}") +endif() + +# ============================================================== +# Compatibility stuff +# ============================================================== +set(OpenCV_LIBRARIES ${OpenCV_LIBS}) + +# Require C++11 features for OpenCV modules +if(CMAKE_VERSION VERSION_LESS "3.1") + if(NOT OpenCV_FIND_QUIETLY AND NOT OPENCV_HIDE_WARNING_COMPILE_FEATURES) + message(STATUS "OpenCV: CMake version is low (${CMAKE_VERSION}, required 3.1+). Can't enable C++11 features: https://github.com/opencv/opencv/issues/13000") + endif() +else() + set(__target opencv_core) + if(TARGET opencv_world) + set(__target opencv_world) + endif() + set(__compile_features cxx_std_11) # CMake 3.8+ + if(DEFINED OPENCV_COMPILE_FEATURES) + set(__compile_features ${OPENCV_COMPILE_FEATURES}) # custom override + elseif(CMAKE_VERSION VERSION_LESS "3.8") + set(__compile_features cxx_auto_type cxx_rvalue_references cxx_lambdas) + endif() + if(__compile_features) + # Simulate exported result of target_compile_features(opencv_core PUBLIC ...) + set_target_properties(${__target} PROPERTIES + INTERFACE_COMPILE_FEATURES "${__compile_features}" + ) + endif() + unset(__target) + unset(__compile_features) +endif() + +# +# Some macros for samples +# +macro(ocv_check_dependencies) + set(OCV_DEPENDENCIES_FOUND TRUE) + foreach(d ${ARGN}) + if(NOT TARGET ${d}) + message(WARNING "OpenCV: Can't resolve dependency: ${d}") + set(OCV_DEPENDENCIES_FOUND FALSE) + break() + endif() + endforeach() +endmacro() + +# adds include directories in such way that directories from the OpenCV source tree go first +function(ocv_include_directories) + set(__add_before "") + file(TO_CMAKE_PATH "${OpenCV_INSTALL_PATH}" __baseDir) + foreach(dir ${ARGN}) + get_filename_component(__abs_dir "${dir}" ABSOLUTE) + if("${__abs_dir}" MATCHES "^${__baseDir}") + list(APPEND __add_before "${dir}") + else() + include_directories(AFTER SYSTEM "${dir}") + endif() + endforeach() + include_directories(BEFORE ${__add_before}) +endfunction() + +macro(ocv_include_modules) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_include_modules_recurse) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_target_link_libraries) + target_link_libraries(${ARGN}) +endmacro() + +# remove all matching elements from the list +macro(ocv_list_filterout lst regex) + foreach(item ${${lst}}) + if(item MATCHES "${regex}") + list(REMOVE_ITEM ${lst} "${item}") + endif() + endforeach() +endmacro() + +# We do not actually need REQUIRED_VARS to be checked for. Just use the +# installation directory for the status. +find_package_handle_standard_args(OpenCV REQUIRED_VARS OpenCV_INSTALL_PATH + VERSION_VAR OpenCV_VERSION ${_OpenCV_FPHSA_ARGS}) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVModules-release.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVModules-release.cmake new file mode 100644 index 0000000..fd47999 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVModules-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "opencv_core" for configuration "Release" +set_property(TARGET opencv_core APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_core PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_core.a" + ) + +list(APPEND _cmake_import_check_targets opencv_core ) +list(APPEND _cmake_import_check_files_for_opencv_core "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_core.a" ) + +# Import target "opencv_imgproc" for configuration "Release" +set_property(TARGET opencv_imgproc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_imgproc PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_imgproc.a" + ) + +list(APPEND _cmake_import_check_targets opencv_imgproc ) +list(APPEND _cmake_import_check_files_for_opencv_imgproc "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_imgproc.a" ) + +# Import target "opencv_photo" for configuration "Release" +set_property(TARGET opencv_photo APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_photo PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_photo.a" + ) + +list(APPEND _cmake_import_check_targets opencv_photo ) +list(APPEND _cmake_import_check_files_for_opencv_photo "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_photo.a" ) + +# Import target "opencv_video" for configuration "Release" +set_property(TARGET opencv_video APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_video PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_video.a" + ) + +list(APPEND _cmake_import_check_targets opencv_video ) +list(APPEND _cmake_import_check_files_for_opencv_video "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_video.a" ) + +# Import target "opencv_features2d" for configuration "Release" +set_property(TARGET opencv_features2d APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_features2d PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_features2d.a" + ) + +list(APPEND _cmake_import_check_targets opencv_features2d ) +list(APPEND _cmake_import_check_files_for_opencv_features2d "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_features2d.a" ) + +# Import target "opencv_highgui" for configuration "Release" +set_property(TARGET opencv_highgui APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_highgui PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_highgui.a" + ) + +list(APPEND _cmake_import_check_targets opencv_highgui ) +list(APPEND _cmake_import_check_files_for_opencv_highgui "${_IMPORT_PREFIX}/sdk/native/staticlibs/arm64-v8a/libopencv_highgui.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVModules.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVModules.cmake new file mode 100644 index 0000000..a27dd7b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-arm64-v8a/OpenCVModules.cmake @@ -0,0 +1,142 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS opencv_core opencv_imgproc opencv_photo opencv_video opencv_features2d opencv_highgui) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target opencv_core +add_library(opencv_core STATIC IMPORTED) + +set_target_properties(opencv_core PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target opencv_imgproc +add_library(opencv_imgproc STATIC IMPORTED) + +set_target_properties(opencv_imgproc PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_core;\$;\$;\$" +) + +# Create imported target opencv_photo +add_library(opencv_photo STATIC IMPORTED) + +set_target_properties(opencv_photo PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_video +add_library(opencv_video STATIC IMPORTED) + +set_target_properties(opencv_video PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_features2d +add_library(opencv_features2d STATIC IMPORTED) + +set_target_properties(opencv_features2d PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_highgui +add_library(opencv_highgui STATIC IMPORTED) + +set_target_properties(opencv_highgui PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/OpenCVModules-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVConfig-version.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVConfig-version.cmake new file mode 100644 index 0000000..7a05194 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVConfig-version.cmake @@ -0,0 +1,15 @@ +set(OpenCV_VERSION 4.6.0) +set(PACKAGE_VERSION ${OpenCV_VERSION}) + +set(PACKAGE_VERSION_EXACT False) +set(PACKAGE_VERSION_COMPATIBLE False) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT True) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() + +if(PACKAGE_FIND_VERSION_MAJOR EQUAL 4 + AND PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVConfig.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVConfig.cmake new file mode 100644 index 0000000..e0574c2 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVConfig.cmake @@ -0,0 +1,368 @@ +# =================================================================================== +# The OpenCV CMake configuration file +# +# ** File generated automatically, do not modify ** +# +# Usage from an external project: +# In your CMakeLists.txt, add these lines: +# +# find_package(OpenCV REQUIRED) +# include_directories(${OpenCV_INCLUDE_DIRS}) # Not needed for CMake >= 2.8.11 +# target_link_libraries(MY_TARGET_NAME ${OpenCV_LIBS}) +# +# Or you can search for specific OpenCV modules: +# +# find_package(OpenCV REQUIRED core videoio) +# +# You can also mark OpenCV components as optional: + +# find_package(OpenCV REQUIRED core OPTIONAL_COMPONENTS viz) +# +# If the module is found then OPENCV__FOUND is set to TRUE. +# +# This file will define the following variables: +# - OpenCV_LIBS : The list of all imported targets for OpenCV modules. +# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. +# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability. +# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API. +# - OpenCV_VERSION : The version of this OpenCV build: "4.6.0" +# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION: "4" +# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION: "6" +# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION: "0" +# - OpenCV_VERSION_STATUS : Development status of this build: "" +# +# Advanced variables: +# - OpenCV_SHARED : Use OpenCV as shared library +# - OpenCV_INSTALL_PATH : OpenCV location +# - OpenCV_LIB_COMPONENTS : Present OpenCV modules list +# - OpenCV_USE_MANGLED_PATHS : Mangled OpenCV path flag +# +# Deprecated variables: +# - OpenCV_VERSION_TWEAK : Always "0" +# +# =================================================================================== + +# ====================================================== +# Version variables: +# ====================================================== +SET(OpenCV_VERSION 4.6.0) +SET(OpenCV_VERSION_MAJOR 4) +SET(OpenCV_VERSION_MINOR 6) +SET(OpenCV_VERSION_PATCH 0) +SET(OpenCV_VERSION_TWEAK 0) +SET(OpenCV_VERSION_STATUS "") + +include(FindPackageHandleStandardArgs) + +if(NOT CMAKE_VERSION VERSION_LESS 2.8.8 + AND OpenCV_FIND_COMPONENTS # prevent excessive output +) + # HANDLE_COMPONENTS was introduced in CMake 2.8.8 + list(APPEND _OpenCV_FPHSA_ARGS HANDLE_COMPONENTS) + # The missing components will be handled by the FindPackageHandleStandardArgs + # module. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY FALSE) +else() + # The missing components will be handled by this config. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY TRUE) +endif() + +# Extract directory name from full path of the file currently being processed. +# Note that CMake 2.8.3 introduced CMAKE_CURRENT_LIST_DIR. We reimplement it +# for older versions of CMake to support these as well. +if(CMAKE_VERSION VERSION_LESS "2.8.3") + get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +endif() + +# Extract the directory where *this* file has been installed (determined at cmake run-time) +# Get the absolute path with no ../.. relative marks, to eliminate implicit linker warnings +get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../../../../" REALPATH) + +# Search packages for host system instead of packages for target system. +# in case of cross compilation this macro should be defined by toolchain file +if(NOT COMMAND find_host_package) + macro(find_host_package) + find_package(${ARGN}) + endmacro() +endif() +if(NOT COMMAND find_host_program) + macro(find_host_program) + find_program(${ARGN}) + endmacro() +endif() + + + +# Android API level from which OpenCV has been compiled is remembered +set(OpenCV_ANDROID_NATIVE_API_LEVEL "24") + +# ============================================================== +# Check OpenCV availability +# ============================================================== +if(OpenCV_ANDROID_NATIVE_API_LEVEL GREATER ANDROID_NATIVE_API_LEVEL) + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "Minimum required by OpenCV API level is android-${OpenCV_ANDROID_NATIVE_API_LEVEL}") + endif() + set(OpenCV_FOUND 0) + return() +endif() + + + + + +# Some additional settings are required if OpenCV is built as static libs +set(OpenCV_SHARED OFF) + +# Enables mangled install paths, that help with side by side installs +set(OpenCV_USE_MANGLED_PATHS FALSE) + +set(OpenCV_LIB_COMPONENTS opencv_core;opencv_features2d;opencv_highgui;opencv_imgproc;opencv_photo;opencv_video) +set(__OpenCV_INCLUDE_DIRS "${OpenCV_INSTALL_PATH}/sdk/native/jni/include") + +set(OpenCV_INCLUDE_DIRS "") +foreach(d ${__OpenCV_INCLUDE_DIRS}) + get_filename_component(__d "${d}" REALPATH) + if(NOT EXISTS "${__d}") + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "OpenCV: Include directory doesn't exist: '${d}'. OpenCV installation may be broken. Skip...") + endif() + else() + list(APPEND OpenCV_INCLUDE_DIRS "${__d}") + endif() +endforeach() +unset(__d) + + +if(NOT TARGET opencv_core) + include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${OpenCV_MODULES_SUFFIX}.cmake) +endif() + +if(NOT CMAKE_VERSION VERSION_LESS "2.8.11") + # Target property INTERFACE_INCLUDE_DIRECTORIES available since 2.8.11: + # * http://www.cmake.org/cmake/help/v2.8.11/cmake.html#prop_tgt:INTERFACE_INCLUDE_DIRECTORIES + foreach(__component ${OpenCV_LIB_COMPONENTS}) + if(TARGET ${__component}) + set_target_properties( + ${__component} + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OpenCV_INCLUDE_DIRS}" + ) + endif() + endforeach() +endif() + + +if(NOT DEFINED OPENCV_MAP_IMPORTED_CONFIG) + if(CMAKE_GENERATOR MATCHES "Visual Studio" OR MSVC) + # OpenCV supports Debug and Release builds only. + # But MSVS has 'RelWithDebInfo' 'MinSizeRel' configurations for applications. + # By default CMake maps these configuration on the first available (Debug) which is wrong. + # Non-Debug build of Application can't be used with OpenCV Debug build (ABI mismatch problem) + # Add mapping of RelWithDebInfo and MinSizeRel to Release here + set(OPENCV_MAP_IMPORTED_CONFIG "RELWITHDEBINFO=!Release;MINSIZEREL=!Release") + endif() +endif() +set(__remap_warnings "") +macro(ocv_map_imported_config target) + if(DEFINED OPENCV_MAP_IMPORTED_CONFIG) # list, "RELWITHDEBINFO=Release;MINSIZEREL=Release" + get_target_property(__available_configurations ${target} IMPORTED_CONFIGURATIONS) + foreach(remap ${OPENCV_MAP_IMPORTED_CONFIG}) + if(remap MATCHES "^(.+)=(!?)([^!]+)$") + set(__remap_config "${CMAKE_MATCH_1}") + set(__final_config "${CMAKE_MATCH_3}") + set(__force_flag "${CMAKE_MATCH_2}") + string(TOUPPER "${__final_config}" __final_config_upper) + string(TOUPPER "${__remap_config}" __remap_config_upper) + if(";${__available_configurations};" MATCHES ";${__remap_config_upper};" AND NOT "${__force_flag}" STREQUAL "!") + # configuration already exists, skip remap + set(__remap_warnings "${__remap_warnings}... Configuration already exists ${__remap_config} (skip mapping ${__remap_config} => ${__final_config}) (available configurations: ${__available_configurations})\n") + continue() + endif() + if(__available_configurations AND NOT ";${__available_configurations};" MATCHES ";${__final_config_upper};") + # skip, configuration is not available + if(NOT "${__force_flag}" STREQUAL "!") + set(__remap_warnings "${__remap_warnings}... Configuration is not available '${__final_config}' for ${target}, build may fail (available configurations: ${__available_configurations})\n") + endif() + endif() + set_target_properties(${target} PROPERTIES + MAP_IMPORTED_CONFIG_${__remap_config} "${__final_config}" + ) + else() + message(WARNING "Invalid entry of OPENCV_MAP_IMPORTED_CONFIG: '${remap}' (${OPENCV_MAP_IMPORTED_CONFIG})") + endif() + endforeach() + endif() +endmacro() + + +# ============================================================== +# Form list of modules (components) to find +# ============================================================== +if(NOT OpenCV_FIND_COMPONENTS) + set(OpenCV_FIND_COMPONENTS ${OpenCV_LIB_COMPONENTS}) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_java) + if(GTest_FOUND OR GTEST_FOUND) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_ts) + endif() +endif() + +set(OpenCV_WORLD_COMPONENTS ) + +# expand short module names and see if requested components exist +foreach(__cvcomponent ${OpenCV_FIND_COMPONENTS}) + # Store the name of the original component so we can set the + # OpenCV__FOUND variable which can be checked by the user. + set (__original_cvcomponent ${__cvcomponent}) + if(NOT __cvcomponent MATCHES "^opencv_") + set(__cvcomponent opencv_${__cvcomponent}) + endif() + list(FIND OpenCV_LIB_COMPONENTS ${__cvcomponent} __cvcomponentIdx) + if(__cvcomponentIdx LESS 0) + if(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + # Either the component is required or the user did not set any components at + # all. In the latter case, the OpenCV_FIND_REQUIRED_ variable + # will not be defined since it is not set by this config. So let's assume + # the implicitly set components are always required. + if(NOT DEFINED OpenCV_FIND_REQUIRED_${__original_cvcomponent} OR + OpenCV_FIND_REQUIRED_${__original_cvcomponent}) + message(FATAL_ERROR "${__cvcomponent} is required but was not found") + elseif(NOT OpenCV_FIND_QUIETLY) + # The component was marked as optional using OPTIONAL_COMPONENTS + message(WARNING "Optional component ${__cvcomponent} was not found") + endif() + endif(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + #indicate that module is NOT found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND "${__cvcomponentUP}_FOUND-NOTFOUND") + set(OpenCV_${__original_cvcomponent}_FOUND FALSE) + else() + # Not using list(APPEND) here, because OpenCV_LIBS may not exist yet. + # Also not clearing OpenCV_LIBS anywhere, so that multiple calls + # to find_package(OpenCV) with different component lists add up. + set(OpenCV_LIBS ${OpenCV_LIBS} "${__cvcomponent}") + #indicate that module is found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND 1) + set(OpenCV_${__original_cvcomponent}_FOUND TRUE) + endif() + if(OpenCV_SHARED AND ";${OpenCV_WORLD_COMPONENTS};" MATCHES ";${__cvcomponent};" AND NOT TARGET ${__cvcomponent}) + get_target_property(__implib_dbg opencv_world IMPORTED_IMPLIB_DEBUG) + get_target_property(__implib_release opencv_world IMPORTED_IMPLIB_RELEASE) + get_target_property(__location_dbg opencv_world IMPORTED_LOCATION_DEBUG) + get_target_property(__location_release opencv_world IMPORTED_LOCATION_RELEASE) + get_target_property(__include_dir opencv_world INTERFACE_INCLUDE_DIRECTORIES) + add_library(${__cvcomponent} SHARED IMPORTED) + set_target_properties(${__cvcomponent} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${__include_dir}") + if(__location_dbg) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_DEBUG "${__implib_dbg}" + IMPORTED_LINK_INTERFACE_LIBRARIES_DEBUG "" + IMPORTED_LOCATION_DEBUG "${__location_dbg}" + ) + endif() + if(__location_release) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_RELEASE "${__implib_release}" + IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "" + IMPORTED_LOCATION_RELEASE "${__location_release}" + ) + endif() + endif() + if(TARGET ${__cvcomponent}) + ocv_map_imported_config(${__cvcomponent}) + endif() +endforeach() + +if(__remap_warnings AND NOT OpenCV_FIND_QUIETLY) + message("OpenCV: configurations remap warnings:\n${__remap_warnings}OpenCV: Check variable OPENCV_MAP_IMPORTED_CONFIG=${OPENCV_MAP_IMPORTED_CONFIG}") +endif() + +# ============================================================== +# Compatibility stuff +# ============================================================== +set(OpenCV_LIBRARIES ${OpenCV_LIBS}) + +# Require C++11 features for OpenCV modules +if(CMAKE_VERSION VERSION_LESS "3.1") + if(NOT OpenCV_FIND_QUIETLY AND NOT OPENCV_HIDE_WARNING_COMPILE_FEATURES) + message(STATUS "OpenCV: CMake version is low (${CMAKE_VERSION}, required 3.1+). Can't enable C++11 features: https://github.com/opencv/opencv/issues/13000") + endif() +else() + set(__target opencv_core) + if(TARGET opencv_world) + set(__target opencv_world) + endif() + set(__compile_features cxx_std_11) # CMake 3.8+ + if(DEFINED OPENCV_COMPILE_FEATURES) + set(__compile_features ${OPENCV_COMPILE_FEATURES}) # custom override + elseif(CMAKE_VERSION VERSION_LESS "3.8") + set(__compile_features cxx_auto_type cxx_rvalue_references cxx_lambdas) + endif() + if(__compile_features) + # Simulate exported result of target_compile_features(opencv_core PUBLIC ...) + set_target_properties(${__target} PROPERTIES + INTERFACE_COMPILE_FEATURES "${__compile_features}" + ) + endif() + unset(__target) + unset(__compile_features) +endif() + +# +# Some macros for samples +# +macro(ocv_check_dependencies) + set(OCV_DEPENDENCIES_FOUND TRUE) + foreach(d ${ARGN}) + if(NOT TARGET ${d}) + message(WARNING "OpenCV: Can't resolve dependency: ${d}") + set(OCV_DEPENDENCIES_FOUND FALSE) + break() + endif() + endforeach() +endmacro() + +# adds include directories in such way that directories from the OpenCV source tree go first +function(ocv_include_directories) + set(__add_before "") + file(TO_CMAKE_PATH "${OpenCV_INSTALL_PATH}" __baseDir) + foreach(dir ${ARGN}) + get_filename_component(__abs_dir "${dir}" ABSOLUTE) + if("${__abs_dir}" MATCHES "^${__baseDir}") + list(APPEND __add_before "${dir}") + else() + include_directories(AFTER SYSTEM "${dir}") + endif() + endforeach() + include_directories(BEFORE ${__add_before}) +endfunction() + +macro(ocv_include_modules) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_include_modules_recurse) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_target_link_libraries) + target_link_libraries(${ARGN}) +endmacro() + +# remove all matching elements from the list +macro(ocv_list_filterout lst regex) + foreach(item ${${lst}}) + if(item MATCHES "${regex}") + list(REMOVE_ITEM ${lst} "${item}") + endif() + endforeach() +endmacro() + +# We do not actually need REQUIRED_VARS to be checked for. Just use the +# installation directory for the status. +find_package_handle_standard_args(OpenCV REQUIRED_VARS OpenCV_INSTALL_PATH + VERSION_VAR OpenCV_VERSION ${_OpenCV_FPHSA_ARGS}) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVModules-release.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVModules-release.cmake new file mode 100644 index 0000000..2142802 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVModules-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "opencv_core" for configuration "Release" +set_property(TARGET opencv_core APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_core PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_core.a" + ) + +list(APPEND _cmake_import_check_targets opencv_core ) +list(APPEND _cmake_import_check_files_for_opencv_core "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_core.a" ) + +# Import target "opencv_imgproc" for configuration "Release" +set_property(TARGET opencv_imgproc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_imgproc PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_imgproc.a" + ) + +list(APPEND _cmake_import_check_targets opencv_imgproc ) +list(APPEND _cmake_import_check_files_for_opencv_imgproc "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_imgproc.a" ) + +# Import target "opencv_photo" for configuration "Release" +set_property(TARGET opencv_photo APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_photo PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_photo.a" + ) + +list(APPEND _cmake_import_check_targets opencv_photo ) +list(APPEND _cmake_import_check_files_for_opencv_photo "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_photo.a" ) + +# Import target "opencv_video" for configuration "Release" +set_property(TARGET opencv_video APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_video PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_video.a" + ) + +list(APPEND _cmake_import_check_targets opencv_video ) +list(APPEND _cmake_import_check_files_for_opencv_video "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_video.a" ) + +# Import target "opencv_features2d" for configuration "Release" +set_property(TARGET opencv_features2d APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_features2d PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_features2d.a" + ) + +list(APPEND _cmake_import_check_targets opencv_features2d ) +list(APPEND _cmake_import_check_files_for_opencv_features2d "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_features2d.a" ) + +# Import target "opencv_highgui" for configuration "Release" +set_property(TARGET opencv_highgui APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_highgui PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_highgui.a" + ) + +list(APPEND _cmake_import_check_targets opencv_highgui ) +list(APPEND _cmake_import_check_files_for_opencv_highgui "${_IMPORT_PREFIX}/sdk/native/staticlibs/armeabi-v7a/libopencv_highgui.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVModules.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVModules.cmake new file mode 100644 index 0000000..a27dd7b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-armeabi-v7a/OpenCVModules.cmake @@ -0,0 +1,142 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS opencv_core opencv_imgproc opencv_photo opencv_video opencv_features2d opencv_highgui) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target opencv_core +add_library(opencv_core STATIC IMPORTED) + +set_target_properties(opencv_core PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target opencv_imgproc +add_library(opencv_imgproc STATIC IMPORTED) + +set_target_properties(opencv_imgproc PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_core;\$;\$;\$" +) + +# Create imported target opencv_photo +add_library(opencv_photo STATIC IMPORTED) + +set_target_properties(opencv_photo PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_video +add_library(opencv_video STATIC IMPORTED) + +set_target_properties(opencv_video PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_features2d +add_library(opencv_features2d STATIC IMPORTED) + +set_target_properties(opencv_features2d PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_highgui +add_library(opencv_highgui STATIC IMPORTED) + +set_target_properties(opencv_highgui PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/OpenCVModules-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVConfig-version.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVConfig-version.cmake new file mode 100644 index 0000000..7a05194 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVConfig-version.cmake @@ -0,0 +1,15 @@ +set(OpenCV_VERSION 4.6.0) +set(PACKAGE_VERSION ${OpenCV_VERSION}) + +set(PACKAGE_VERSION_EXACT False) +set(PACKAGE_VERSION_COMPATIBLE False) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT True) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() + +if(PACKAGE_FIND_VERSION_MAJOR EQUAL 4 + AND PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVConfig.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVConfig.cmake new file mode 100644 index 0000000..e0574c2 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVConfig.cmake @@ -0,0 +1,368 @@ +# =================================================================================== +# The OpenCV CMake configuration file +# +# ** File generated automatically, do not modify ** +# +# Usage from an external project: +# In your CMakeLists.txt, add these lines: +# +# find_package(OpenCV REQUIRED) +# include_directories(${OpenCV_INCLUDE_DIRS}) # Not needed for CMake >= 2.8.11 +# target_link_libraries(MY_TARGET_NAME ${OpenCV_LIBS}) +# +# Or you can search for specific OpenCV modules: +# +# find_package(OpenCV REQUIRED core videoio) +# +# You can also mark OpenCV components as optional: + +# find_package(OpenCV REQUIRED core OPTIONAL_COMPONENTS viz) +# +# If the module is found then OPENCV__FOUND is set to TRUE. +# +# This file will define the following variables: +# - OpenCV_LIBS : The list of all imported targets for OpenCV modules. +# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. +# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability. +# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API. +# - OpenCV_VERSION : The version of this OpenCV build: "4.6.0" +# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION: "4" +# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION: "6" +# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION: "0" +# - OpenCV_VERSION_STATUS : Development status of this build: "" +# +# Advanced variables: +# - OpenCV_SHARED : Use OpenCV as shared library +# - OpenCV_INSTALL_PATH : OpenCV location +# - OpenCV_LIB_COMPONENTS : Present OpenCV modules list +# - OpenCV_USE_MANGLED_PATHS : Mangled OpenCV path flag +# +# Deprecated variables: +# - OpenCV_VERSION_TWEAK : Always "0" +# +# =================================================================================== + +# ====================================================== +# Version variables: +# ====================================================== +SET(OpenCV_VERSION 4.6.0) +SET(OpenCV_VERSION_MAJOR 4) +SET(OpenCV_VERSION_MINOR 6) +SET(OpenCV_VERSION_PATCH 0) +SET(OpenCV_VERSION_TWEAK 0) +SET(OpenCV_VERSION_STATUS "") + +include(FindPackageHandleStandardArgs) + +if(NOT CMAKE_VERSION VERSION_LESS 2.8.8 + AND OpenCV_FIND_COMPONENTS # prevent excessive output +) + # HANDLE_COMPONENTS was introduced in CMake 2.8.8 + list(APPEND _OpenCV_FPHSA_ARGS HANDLE_COMPONENTS) + # The missing components will be handled by the FindPackageHandleStandardArgs + # module. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY FALSE) +else() + # The missing components will be handled by this config. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY TRUE) +endif() + +# Extract directory name from full path of the file currently being processed. +# Note that CMake 2.8.3 introduced CMAKE_CURRENT_LIST_DIR. We reimplement it +# for older versions of CMake to support these as well. +if(CMAKE_VERSION VERSION_LESS "2.8.3") + get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +endif() + +# Extract the directory where *this* file has been installed (determined at cmake run-time) +# Get the absolute path with no ../.. relative marks, to eliminate implicit linker warnings +get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../../../../" REALPATH) + +# Search packages for host system instead of packages for target system. +# in case of cross compilation this macro should be defined by toolchain file +if(NOT COMMAND find_host_package) + macro(find_host_package) + find_package(${ARGN}) + endmacro() +endif() +if(NOT COMMAND find_host_program) + macro(find_host_program) + find_program(${ARGN}) + endmacro() +endif() + + + +# Android API level from which OpenCV has been compiled is remembered +set(OpenCV_ANDROID_NATIVE_API_LEVEL "24") + +# ============================================================== +# Check OpenCV availability +# ============================================================== +if(OpenCV_ANDROID_NATIVE_API_LEVEL GREATER ANDROID_NATIVE_API_LEVEL) + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "Minimum required by OpenCV API level is android-${OpenCV_ANDROID_NATIVE_API_LEVEL}") + endif() + set(OpenCV_FOUND 0) + return() +endif() + + + + + +# Some additional settings are required if OpenCV is built as static libs +set(OpenCV_SHARED OFF) + +# Enables mangled install paths, that help with side by side installs +set(OpenCV_USE_MANGLED_PATHS FALSE) + +set(OpenCV_LIB_COMPONENTS opencv_core;opencv_features2d;opencv_highgui;opencv_imgproc;opencv_photo;opencv_video) +set(__OpenCV_INCLUDE_DIRS "${OpenCV_INSTALL_PATH}/sdk/native/jni/include") + +set(OpenCV_INCLUDE_DIRS "") +foreach(d ${__OpenCV_INCLUDE_DIRS}) + get_filename_component(__d "${d}" REALPATH) + if(NOT EXISTS "${__d}") + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "OpenCV: Include directory doesn't exist: '${d}'. OpenCV installation may be broken. Skip...") + endif() + else() + list(APPEND OpenCV_INCLUDE_DIRS "${__d}") + endif() +endforeach() +unset(__d) + + +if(NOT TARGET opencv_core) + include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${OpenCV_MODULES_SUFFIX}.cmake) +endif() + +if(NOT CMAKE_VERSION VERSION_LESS "2.8.11") + # Target property INTERFACE_INCLUDE_DIRECTORIES available since 2.8.11: + # * http://www.cmake.org/cmake/help/v2.8.11/cmake.html#prop_tgt:INTERFACE_INCLUDE_DIRECTORIES + foreach(__component ${OpenCV_LIB_COMPONENTS}) + if(TARGET ${__component}) + set_target_properties( + ${__component} + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OpenCV_INCLUDE_DIRS}" + ) + endif() + endforeach() +endif() + + +if(NOT DEFINED OPENCV_MAP_IMPORTED_CONFIG) + if(CMAKE_GENERATOR MATCHES "Visual Studio" OR MSVC) + # OpenCV supports Debug and Release builds only. + # But MSVS has 'RelWithDebInfo' 'MinSizeRel' configurations for applications. + # By default CMake maps these configuration on the first available (Debug) which is wrong. + # Non-Debug build of Application can't be used with OpenCV Debug build (ABI mismatch problem) + # Add mapping of RelWithDebInfo and MinSizeRel to Release here + set(OPENCV_MAP_IMPORTED_CONFIG "RELWITHDEBINFO=!Release;MINSIZEREL=!Release") + endif() +endif() +set(__remap_warnings "") +macro(ocv_map_imported_config target) + if(DEFINED OPENCV_MAP_IMPORTED_CONFIG) # list, "RELWITHDEBINFO=Release;MINSIZEREL=Release" + get_target_property(__available_configurations ${target} IMPORTED_CONFIGURATIONS) + foreach(remap ${OPENCV_MAP_IMPORTED_CONFIG}) + if(remap MATCHES "^(.+)=(!?)([^!]+)$") + set(__remap_config "${CMAKE_MATCH_1}") + set(__final_config "${CMAKE_MATCH_3}") + set(__force_flag "${CMAKE_MATCH_2}") + string(TOUPPER "${__final_config}" __final_config_upper) + string(TOUPPER "${__remap_config}" __remap_config_upper) + if(";${__available_configurations};" MATCHES ";${__remap_config_upper};" AND NOT "${__force_flag}" STREQUAL "!") + # configuration already exists, skip remap + set(__remap_warnings "${__remap_warnings}... Configuration already exists ${__remap_config} (skip mapping ${__remap_config} => ${__final_config}) (available configurations: ${__available_configurations})\n") + continue() + endif() + if(__available_configurations AND NOT ";${__available_configurations};" MATCHES ";${__final_config_upper};") + # skip, configuration is not available + if(NOT "${__force_flag}" STREQUAL "!") + set(__remap_warnings "${__remap_warnings}... Configuration is not available '${__final_config}' for ${target}, build may fail (available configurations: ${__available_configurations})\n") + endif() + endif() + set_target_properties(${target} PROPERTIES + MAP_IMPORTED_CONFIG_${__remap_config} "${__final_config}" + ) + else() + message(WARNING "Invalid entry of OPENCV_MAP_IMPORTED_CONFIG: '${remap}' (${OPENCV_MAP_IMPORTED_CONFIG})") + endif() + endforeach() + endif() +endmacro() + + +# ============================================================== +# Form list of modules (components) to find +# ============================================================== +if(NOT OpenCV_FIND_COMPONENTS) + set(OpenCV_FIND_COMPONENTS ${OpenCV_LIB_COMPONENTS}) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_java) + if(GTest_FOUND OR GTEST_FOUND) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_ts) + endif() +endif() + +set(OpenCV_WORLD_COMPONENTS ) + +# expand short module names and see if requested components exist +foreach(__cvcomponent ${OpenCV_FIND_COMPONENTS}) + # Store the name of the original component so we can set the + # OpenCV__FOUND variable which can be checked by the user. + set (__original_cvcomponent ${__cvcomponent}) + if(NOT __cvcomponent MATCHES "^opencv_") + set(__cvcomponent opencv_${__cvcomponent}) + endif() + list(FIND OpenCV_LIB_COMPONENTS ${__cvcomponent} __cvcomponentIdx) + if(__cvcomponentIdx LESS 0) + if(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + # Either the component is required or the user did not set any components at + # all. In the latter case, the OpenCV_FIND_REQUIRED_ variable + # will not be defined since it is not set by this config. So let's assume + # the implicitly set components are always required. + if(NOT DEFINED OpenCV_FIND_REQUIRED_${__original_cvcomponent} OR + OpenCV_FIND_REQUIRED_${__original_cvcomponent}) + message(FATAL_ERROR "${__cvcomponent} is required but was not found") + elseif(NOT OpenCV_FIND_QUIETLY) + # The component was marked as optional using OPTIONAL_COMPONENTS + message(WARNING "Optional component ${__cvcomponent} was not found") + endif() + endif(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + #indicate that module is NOT found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND "${__cvcomponentUP}_FOUND-NOTFOUND") + set(OpenCV_${__original_cvcomponent}_FOUND FALSE) + else() + # Not using list(APPEND) here, because OpenCV_LIBS may not exist yet. + # Also not clearing OpenCV_LIBS anywhere, so that multiple calls + # to find_package(OpenCV) with different component lists add up. + set(OpenCV_LIBS ${OpenCV_LIBS} "${__cvcomponent}") + #indicate that module is found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND 1) + set(OpenCV_${__original_cvcomponent}_FOUND TRUE) + endif() + if(OpenCV_SHARED AND ";${OpenCV_WORLD_COMPONENTS};" MATCHES ";${__cvcomponent};" AND NOT TARGET ${__cvcomponent}) + get_target_property(__implib_dbg opencv_world IMPORTED_IMPLIB_DEBUG) + get_target_property(__implib_release opencv_world IMPORTED_IMPLIB_RELEASE) + get_target_property(__location_dbg opencv_world IMPORTED_LOCATION_DEBUG) + get_target_property(__location_release opencv_world IMPORTED_LOCATION_RELEASE) + get_target_property(__include_dir opencv_world INTERFACE_INCLUDE_DIRECTORIES) + add_library(${__cvcomponent} SHARED IMPORTED) + set_target_properties(${__cvcomponent} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${__include_dir}") + if(__location_dbg) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_DEBUG "${__implib_dbg}" + IMPORTED_LINK_INTERFACE_LIBRARIES_DEBUG "" + IMPORTED_LOCATION_DEBUG "${__location_dbg}" + ) + endif() + if(__location_release) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_RELEASE "${__implib_release}" + IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "" + IMPORTED_LOCATION_RELEASE "${__location_release}" + ) + endif() + endif() + if(TARGET ${__cvcomponent}) + ocv_map_imported_config(${__cvcomponent}) + endif() +endforeach() + +if(__remap_warnings AND NOT OpenCV_FIND_QUIETLY) + message("OpenCV: configurations remap warnings:\n${__remap_warnings}OpenCV: Check variable OPENCV_MAP_IMPORTED_CONFIG=${OPENCV_MAP_IMPORTED_CONFIG}") +endif() + +# ============================================================== +# Compatibility stuff +# ============================================================== +set(OpenCV_LIBRARIES ${OpenCV_LIBS}) + +# Require C++11 features for OpenCV modules +if(CMAKE_VERSION VERSION_LESS "3.1") + if(NOT OpenCV_FIND_QUIETLY AND NOT OPENCV_HIDE_WARNING_COMPILE_FEATURES) + message(STATUS "OpenCV: CMake version is low (${CMAKE_VERSION}, required 3.1+). Can't enable C++11 features: https://github.com/opencv/opencv/issues/13000") + endif() +else() + set(__target opencv_core) + if(TARGET opencv_world) + set(__target opencv_world) + endif() + set(__compile_features cxx_std_11) # CMake 3.8+ + if(DEFINED OPENCV_COMPILE_FEATURES) + set(__compile_features ${OPENCV_COMPILE_FEATURES}) # custom override + elseif(CMAKE_VERSION VERSION_LESS "3.8") + set(__compile_features cxx_auto_type cxx_rvalue_references cxx_lambdas) + endif() + if(__compile_features) + # Simulate exported result of target_compile_features(opencv_core PUBLIC ...) + set_target_properties(${__target} PROPERTIES + INTERFACE_COMPILE_FEATURES "${__compile_features}" + ) + endif() + unset(__target) + unset(__compile_features) +endif() + +# +# Some macros for samples +# +macro(ocv_check_dependencies) + set(OCV_DEPENDENCIES_FOUND TRUE) + foreach(d ${ARGN}) + if(NOT TARGET ${d}) + message(WARNING "OpenCV: Can't resolve dependency: ${d}") + set(OCV_DEPENDENCIES_FOUND FALSE) + break() + endif() + endforeach() +endmacro() + +# adds include directories in such way that directories from the OpenCV source tree go first +function(ocv_include_directories) + set(__add_before "") + file(TO_CMAKE_PATH "${OpenCV_INSTALL_PATH}" __baseDir) + foreach(dir ${ARGN}) + get_filename_component(__abs_dir "${dir}" ABSOLUTE) + if("${__abs_dir}" MATCHES "^${__baseDir}") + list(APPEND __add_before "${dir}") + else() + include_directories(AFTER SYSTEM "${dir}") + endif() + endforeach() + include_directories(BEFORE ${__add_before}) +endfunction() + +macro(ocv_include_modules) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_include_modules_recurse) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_target_link_libraries) + target_link_libraries(${ARGN}) +endmacro() + +# remove all matching elements from the list +macro(ocv_list_filterout lst regex) + foreach(item ${${lst}}) + if(item MATCHES "${regex}") + list(REMOVE_ITEM ${lst} "${item}") + endif() + endforeach() +endmacro() + +# We do not actually need REQUIRED_VARS to be checked for. Just use the +# installation directory for the status. +find_package_handle_standard_args(OpenCV REQUIRED_VARS OpenCV_INSTALL_PATH + VERSION_VAR OpenCV_VERSION ${_OpenCV_FPHSA_ARGS}) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVModules-release.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVModules-release.cmake new file mode 100644 index 0000000..a424eb4 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVModules-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "opencv_core" for configuration "Release" +set_property(TARGET opencv_core APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_core PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_core.a" + ) + +list(APPEND _cmake_import_check_targets opencv_core ) +list(APPEND _cmake_import_check_files_for_opencv_core "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_core.a" ) + +# Import target "opencv_imgproc" for configuration "Release" +set_property(TARGET opencv_imgproc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_imgproc PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_imgproc.a" + ) + +list(APPEND _cmake_import_check_targets opencv_imgproc ) +list(APPEND _cmake_import_check_files_for_opencv_imgproc "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_imgproc.a" ) + +# Import target "opencv_photo" for configuration "Release" +set_property(TARGET opencv_photo APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_photo PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_photo.a" + ) + +list(APPEND _cmake_import_check_targets opencv_photo ) +list(APPEND _cmake_import_check_files_for_opencv_photo "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_photo.a" ) + +# Import target "opencv_video" for configuration "Release" +set_property(TARGET opencv_video APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_video PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_video.a" + ) + +list(APPEND _cmake_import_check_targets opencv_video ) +list(APPEND _cmake_import_check_files_for_opencv_video "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_video.a" ) + +# Import target "opencv_features2d" for configuration "Release" +set_property(TARGET opencv_features2d APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_features2d PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_features2d.a" + ) + +list(APPEND _cmake_import_check_targets opencv_features2d ) +list(APPEND _cmake_import_check_files_for_opencv_features2d "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_features2d.a" ) + +# Import target "opencv_highgui" for configuration "Release" +set_property(TARGET opencv_highgui APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_highgui PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_highgui.a" + ) + +list(APPEND _cmake_import_check_targets opencv_highgui ) +list(APPEND _cmake_import_check_files_for_opencv_highgui "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86/libopencv_highgui.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVModules.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVModules.cmake new file mode 100644 index 0000000..a27dd7b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86/OpenCVModules.cmake @@ -0,0 +1,142 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS opencv_core opencv_imgproc opencv_photo opencv_video opencv_features2d opencv_highgui) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target opencv_core +add_library(opencv_core STATIC IMPORTED) + +set_target_properties(opencv_core PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target opencv_imgproc +add_library(opencv_imgproc STATIC IMPORTED) + +set_target_properties(opencv_imgproc PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_core;\$;\$;\$" +) + +# Create imported target opencv_photo +add_library(opencv_photo STATIC IMPORTED) + +set_target_properties(opencv_photo PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_video +add_library(opencv_video STATIC IMPORTED) + +set_target_properties(opencv_video PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_features2d +add_library(opencv_features2d STATIC IMPORTED) + +set_target_properties(opencv_features2d PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_highgui +add_library(opencv_highgui STATIC IMPORTED) + +set_target_properties(opencv_highgui PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/OpenCVModules-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVConfig-version.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVConfig-version.cmake new file mode 100644 index 0000000..7a05194 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVConfig-version.cmake @@ -0,0 +1,15 @@ +set(OpenCV_VERSION 4.6.0) +set(PACKAGE_VERSION ${OpenCV_VERSION}) + +set(PACKAGE_VERSION_EXACT False) +set(PACKAGE_VERSION_COMPATIBLE False) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT True) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() + +if(PACKAGE_FIND_VERSION_MAJOR EQUAL 4 + AND PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVConfig.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVConfig.cmake new file mode 100644 index 0000000..e0574c2 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVConfig.cmake @@ -0,0 +1,368 @@ +# =================================================================================== +# The OpenCV CMake configuration file +# +# ** File generated automatically, do not modify ** +# +# Usage from an external project: +# In your CMakeLists.txt, add these lines: +# +# find_package(OpenCV REQUIRED) +# include_directories(${OpenCV_INCLUDE_DIRS}) # Not needed for CMake >= 2.8.11 +# target_link_libraries(MY_TARGET_NAME ${OpenCV_LIBS}) +# +# Or you can search for specific OpenCV modules: +# +# find_package(OpenCV REQUIRED core videoio) +# +# You can also mark OpenCV components as optional: + +# find_package(OpenCV REQUIRED core OPTIONAL_COMPONENTS viz) +# +# If the module is found then OPENCV__FOUND is set to TRUE. +# +# This file will define the following variables: +# - OpenCV_LIBS : The list of all imported targets for OpenCV modules. +# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. +# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability. +# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API. +# - OpenCV_VERSION : The version of this OpenCV build: "4.6.0" +# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION: "4" +# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION: "6" +# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION: "0" +# - OpenCV_VERSION_STATUS : Development status of this build: "" +# +# Advanced variables: +# - OpenCV_SHARED : Use OpenCV as shared library +# - OpenCV_INSTALL_PATH : OpenCV location +# - OpenCV_LIB_COMPONENTS : Present OpenCV modules list +# - OpenCV_USE_MANGLED_PATHS : Mangled OpenCV path flag +# +# Deprecated variables: +# - OpenCV_VERSION_TWEAK : Always "0" +# +# =================================================================================== + +# ====================================================== +# Version variables: +# ====================================================== +SET(OpenCV_VERSION 4.6.0) +SET(OpenCV_VERSION_MAJOR 4) +SET(OpenCV_VERSION_MINOR 6) +SET(OpenCV_VERSION_PATCH 0) +SET(OpenCV_VERSION_TWEAK 0) +SET(OpenCV_VERSION_STATUS "") + +include(FindPackageHandleStandardArgs) + +if(NOT CMAKE_VERSION VERSION_LESS 2.8.8 + AND OpenCV_FIND_COMPONENTS # prevent excessive output +) + # HANDLE_COMPONENTS was introduced in CMake 2.8.8 + list(APPEND _OpenCV_FPHSA_ARGS HANDLE_COMPONENTS) + # The missing components will be handled by the FindPackageHandleStandardArgs + # module. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY FALSE) +else() + # The missing components will be handled by this config. + set(_OpenCV_HANDLE_COMPONENTS_MANUALLY TRUE) +endif() + +# Extract directory name from full path of the file currently being processed. +# Note that CMake 2.8.3 introduced CMAKE_CURRENT_LIST_DIR. We reimplement it +# for older versions of CMake to support these as well. +if(CMAKE_VERSION VERSION_LESS "2.8.3") + get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +endif() + +# Extract the directory where *this* file has been installed (determined at cmake run-time) +# Get the absolute path with no ../.. relative marks, to eliminate implicit linker warnings +get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(OpenCV_INSTALL_PATH "${OpenCV_CONFIG_PATH}/../../../../" REALPATH) + +# Search packages for host system instead of packages for target system. +# in case of cross compilation this macro should be defined by toolchain file +if(NOT COMMAND find_host_package) + macro(find_host_package) + find_package(${ARGN}) + endmacro() +endif() +if(NOT COMMAND find_host_program) + macro(find_host_program) + find_program(${ARGN}) + endmacro() +endif() + + + +# Android API level from which OpenCV has been compiled is remembered +set(OpenCV_ANDROID_NATIVE_API_LEVEL "24") + +# ============================================================== +# Check OpenCV availability +# ============================================================== +if(OpenCV_ANDROID_NATIVE_API_LEVEL GREATER ANDROID_NATIVE_API_LEVEL) + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "Minimum required by OpenCV API level is android-${OpenCV_ANDROID_NATIVE_API_LEVEL}") + endif() + set(OpenCV_FOUND 0) + return() +endif() + + + + + +# Some additional settings are required if OpenCV is built as static libs +set(OpenCV_SHARED OFF) + +# Enables mangled install paths, that help with side by side installs +set(OpenCV_USE_MANGLED_PATHS FALSE) + +set(OpenCV_LIB_COMPONENTS opencv_core;opencv_features2d;opencv_highgui;opencv_imgproc;opencv_photo;opencv_video) +set(__OpenCV_INCLUDE_DIRS "${OpenCV_INSTALL_PATH}/sdk/native/jni/include") + +set(OpenCV_INCLUDE_DIRS "") +foreach(d ${__OpenCV_INCLUDE_DIRS}) + get_filename_component(__d "${d}" REALPATH) + if(NOT EXISTS "${__d}") + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "OpenCV: Include directory doesn't exist: '${d}'. OpenCV installation may be broken. Skip...") + endif() + else() + list(APPEND OpenCV_INCLUDE_DIRS "${__d}") + endif() +endforeach() +unset(__d) + + +if(NOT TARGET opencv_core) + include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${OpenCV_MODULES_SUFFIX}.cmake) +endif() + +if(NOT CMAKE_VERSION VERSION_LESS "2.8.11") + # Target property INTERFACE_INCLUDE_DIRECTORIES available since 2.8.11: + # * http://www.cmake.org/cmake/help/v2.8.11/cmake.html#prop_tgt:INTERFACE_INCLUDE_DIRECTORIES + foreach(__component ${OpenCV_LIB_COMPONENTS}) + if(TARGET ${__component}) + set_target_properties( + ${__component} + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OpenCV_INCLUDE_DIRS}" + ) + endif() + endforeach() +endif() + + +if(NOT DEFINED OPENCV_MAP_IMPORTED_CONFIG) + if(CMAKE_GENERATOR MATCHES "Visual Studio" OR MSVC) + # OpenCV supports Debug and Release builds only. + # But MSVS has 'RelWithDebInfo' 'MinSizeRel' configurations for applications. + # By default CMake maps these configuration on the first available (Debug) which is wrong. + # Non-Debug build of Application can't be used with OpenCV Debug build (ABI mismatch problem) + # Add mapping of RelWithDebInfo and MinSizeRel to Release here + set(OPENCV_MAP_IMPORTED_CONFIG "RELWITHDEBINFO=!Release;MINSIZEREL=!Release") + endif() +endif() +set(__remap_warnings "") +macro(ocv_map_imported_config target) + if(DEFINED OPENCV_MAP_IMPORTED_CONFIG) # list, "RELWITHDEBINFO=Release;MINSIZEREL=Release" + get_target_property(__available_configurations ${target} IMPORTED_CONFIGURATIONS) + foreach(remap ${OPENCV_MAP_IMPORTED_CONFIG}) + if(remap MATCHES "^(.+)=(!?)([^!]+)$") + set(__remap_config "${CMAKE_MATCH_1}") + set(__final_config "${CMAKE_MATCH_3}") + set(__force_flag "${CMAKE_MATCH_2}") + string(TOUPPER "${__final_config}" __final_config_upper) + string(TOUPPER "${__remap_config}" __remap_config_upper) + if(";${__available_configurations};" MATCHES ";${__remap_config_upper};" AND NOT "${__force_flag}" STREQUAL "!") + # configuration already exists, skip remap + set(__remap_warnings "${__remap_warnings}... Configuration already exists ${__remap_config} (skip mapping ${__remap_config} => ${__final_config}) (available configurations: ${__available_configurations})\n") + continue() + endif() + if(__available_configurations AND NOT ";${__available_configurations};" MATCHES ";${__final_config_upper};") + # skip, configuration is not available + if(NOT "${__force_flag}" STREQUAL "!") + set(__remap_warnings "${__remap_warnings}... Configuration is not available '${__final_config}' for ${target}, build may fail (available configurations: ${__available_configurations})\n") + endif() + endif() + set_target_properties(${target} PROPERTIES + MAP_IMPORTED_CONFIG_${__remap_config} "${__final_config}" + ) + else() + message(WARNING "Invalid entry of OPENCV_MAP_IMPORTED_CONFIG: '${remap}' (${OPENCV_MAP_IMPORTED_CONFIG})") + endif() + endforeach() + endif() +endmacro() + + +# ============================================================== +# Form list of modules (components) to find +# ============================================================== +if(NOT OpenCV_FIND_COMPONENTS) + set(OpenCV_FIND_COMPONENTS ${OpenCV_LIB_COMPONENTS}) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_java) + if(GTest_FOUND OR GTEST_FOUND) + list(REMOVE_ITEM OpenCV_FIND_COMPONENTS opencv_ts) + endif() +endif() + +set(OpenCV_WORLD_COMPONENTS ) + +# expand short module names and see if requested components exist +foreach(__cvcomponent ${OpenCV_FIND_COMPONENTS}) + # Store the name of the original component so we can set the + # OpenCV__FOUND variable which can be checked by the user. + set (__original_cvcomponent ${__cvcomponent}) + if(NOT __cvcomponent MATCHES "^opencv_") + set(__cvcomponent opencv_${__cvcomponent}) + endif() + list(FIND OpenCV_LIB_COMPONENTS ${__cvcomponent} __cvcomponentIdx) + if(__cvcomponentIdx LESS 0) + if(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + # Either the component is required or the user did not set any components at + # all. In the latter case, the OpenCV_FIND_REQUIRED_ variable + # will not be defined since it is not set by this config. So let's assume + # the implicitly set components are always required. + if(NOT DEFINED OpenCV_FIND_REQUIRED_${__original_cvcomponent} OR + OpenCV_FIND_REQUIRED_${__original_cvcomponent}) + message(FATAL_ERROR "${__cvcomponent} is required but was not found") + elseif(NOT OpenCV_FIND_QUIETLY) + # The component was marked as optional using OPTIONAL_COMPONENTS + message(WARNING "Optional component ${__cvcomponent} was not found") + endif() + endif(_OpenCV_HANDLE_COMPONENTS_MANUALLY) + #indicate that module is NOT found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND "${__cvcomponentUP}_FOUND-NOTFOUND") + set(OpenCV_${__original_cvcomponent}_FOUND FALSE) + else() + # Not using list(APPEND) here, because OpenCV_LIBS may not exist yet. + # Also not clearing OpenCV_LIBS anywhere, so that multiple calls + # to find_package(OpenCV) with different component lists add up. + set(OpenCV_LIBS ${OpenCV_LIBS} "${__cvcomponent}") + #indicate that module is found + string(TOUPPER "${__cvcomponent}" __cvcomponentUP) + set(${__cvcomponentUP}_FOUND 1) + set(OpenCV_${__original_cvcomponent}_FOUND TRUE) + endif() + if(OpenCV_SHARED AND ";${OpenCV_WORLD_COMPONENTS};" MATCHES ";${__cvcomponent};" AND NOT TARGET ${__cvcomponent}) + get_target_property(__implib_dbg opencv_world IMPORTED_IMPLIB_DEBUG) + get_target_property(__implib_release opencv_world IMPORTED_IMPLIB_RELEASE) + get_target_property(__location_dbg opencv_world IMPORTED_LOCATION_DEBUG) + get_target_property(__location_release opencv_world IMPORTED_LOCATION_RELEASE) + get_target_property(__include_dir opencv_world INTERFACE_INCLUDE_DIRECTORIES) + add_library(${__cvcomponent} SHARED IMPORTED) + set_target_properties(${__cvcomponent} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${__include_dir}") + if(__location_dbg) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_DEBUG "${__implib_dbg}" + IMPORTED_LINK_INTERFACE_LIBRARIES_DEBUG "" + IMPORTED_LOCATION_DEBUG "${__location_dbg}" + ) + endif() + if(__location_release) + set_property(TARGET ${__cvcomponent} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(${__cvcomponent} PROPERTIES + IMPORTED_IMPLIB_RELEASE "${__implib_release}" + IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "" + IMPORTED_LOCATION_RELEASE "${__location_release}" + ) + endif() + endif() + if(TARGET ${__cvcomponent}) + ocv_map_imported_config(${__cvcomponent}) + endif() +endforeach() + +if(__remap_warnings AND NOT OpenCV_FIND_QUIETLY) + message("OpenCV: configurations remap warnings:\n${__remap_warnings}OpenCV: Check variable OPENCV_MAP_IMPORTED_CONFIG=${OPENCV_MAP_IMPORTED_CONFIG}") +endif() + +# ============================================================== +# Compatibility stuff +# ============================================================== +set(OpenCV_LIBRARIES ${OpenCV_LIBS}) + +# Require C++11 features for OpenCV modules +if(CMAKE_VERSION VERSION_LESS "3.1") + if(NOT OpenCV_FIND_QUIETLY AND NOT OPENCV_HIDE_WARNING_COMPILE_FEATURES) + message(STATUS "OpenCV: CMake version is low (${CMAKE_VERSION}, required 3.1+). Can't enable C++11 features: https://github.com/opencv/opencv/issues/13000") + endif() +else() + set(__target opencv_core) + if(TARGET opencv_world) + set(__target opencv_world) + endif() + set(__compile_features cxx_std_11) # CMake 3.8+ + if(DEFINED OPENCV_COMPILE_FEATURES) + set(__compile_features ${OPENCV_COMPILE_FEATURES}) # custom override + elseif(CMAKE_VERSION VERSION_LESS "3.8") + set(__compile_features cxx_auto_type cxx_rvalue_references cxx_lambdas) + endif() + if(__compile_features) + # Simulate exported result of target_compile_features(opencv_core PUBLIC ...) + set_target_properties(${__target} PROPERTIES + INTERFACE_COMPILE_FEATURES "${__compile_features}" + ) + endif() + unset(__target) + unset(__compile_features) +endif() + +# +# Some macros for samples +# +macro(ocv_check_dependencies) + set(OCV_DEPENDENCIES_FOUND TRUE) + foreach(d ${ARGN}) + if(NOT TARGET ${d}) + message(WARNING "OpenCV: Can't resolve dependency: ${d}") + set(OCV_DEPENDENCIES_FOUND FALSE) + break() + endif() + endforeach() +endmacro() + +# adds include directories in such way that directories from the OpenCV source tree go first +function(ocv_include_directories) + set(__add_before "") + file(TO_CMAKE_PATH "${OpenCV_INSTALL_PATH}" __baseDir) + foreach(dir ${ARGN}) + get_filename_component(__abs_dir "${dir}" ABSOLUTE) + if("${__abs_dir}" MATCHES "^${__baseDir}") + list(APPEND __add_before "${dir}") + else() + include_directories(AFTER SYSTEM "${dir}") + endif() + endforeach() + include_directories(BEFORE ${__add_before}) +endfunction() + +macro(ocv_include_modules) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_include_modules_recurse) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +macro(ocv_target_link_libraries) + target_link_libraries(${ARGN}) +endmacro() + +# remove all matching elements from the list +macro(ocv_list_filterout lst regex) + foreach(item ${${lst}}) + if(item MATCHES "${regex}") + list(REMOVE_ITEM ${lst} "${item}") + endif() + endforeach() +endmacro() + +# We do not actually need REQUIRED_VARS to be checked for. Just use the +# installation directory for the status. +find_package_handle_standard_args(OpenCV REQUIRED_VARS OpenCV_INSTALL_PATH + VERSION_VAR OpenCV_VERSION ${_OpenCV_FPHSA_ARGS}) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVModules-release.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVModules-release.cmake new file mode 100644 index 0000000..8520e8d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVModules-release.cmake @@ -0,0 +1,69 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "opencv_core" for configuration "Release" +set_property(TARGET opencv_core APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_core PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_core.a" + ) + +list(APPEND _cmake_import_check_targets opencv_core ) +list(APPEND _cmake_import_check_files_for_opencv_core "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_core.a" ) + +# Import target "opencv_imgproc" for configuration "Release" +set_property(TARGET opencv_imgproc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_imgproc PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_imgproc.a" + ) + +list(APPEND _cmake_import_check_targets opencv_imgproc ) +list(APPEND _cmake_import_check_files_for_opencv_imgproc "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_imgproc.a" ) + +# Import target "opencv_photo" for configuration "Release" +set_property(TARGET opencv_photo APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_photo PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_photo.a" + ) + +list(APPEND _cmake_import_check_targets opencv_photo ) +list(APPEND _cmake_import_check_files_for_opencv_photo "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_photo.a" ) + +# Import target "opencv_video" for configuration "Release" +set_property(TARGET opencv_video APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_video PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_video.a" + ) + +list(APPEND _cmake_import_check_targets opencv_video ) +list(APPEND _cmake_import_check_files_for_opencv_video "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_video.a" ) + +# Import target "opencv_features2d" for configuration "Release" +set_property(TARGET opencv_features2d APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_features2d PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_features2d.a" + ) + +list(APPEND _cmake_import_check_targets opencv_features2d ) +list(APPEND _cmake_import_check_files_for_opencv_features2d "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_features2d.a" ) + +# Import target "opencv_highgui" for configuration "Release" +set_property(TARGET opencv_highgui APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(opencv_highgui PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_highgui.a" + ) + +list(APPEND _cmake_import_check_targets opencv_highgui ) +list(APPEND _cmake_import_check_files_for_opencv_highgui "${_IMPORT_PREFIX}/sdk/native/staticlibs/x86_64/libopencv_highgui.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVModules.cmake b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVModules.cmake new file mode 100644 index 0000000..a27dd7b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/abi-x86_64/OpenCVModules.cmake @@ -0,0 +1,142 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.23) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS opencv_core opencv_imgproc opencv_photo opencv_video opencv_features2d opencv_highgui) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target opencv_core +add_library(opencv_core STATIC IMPORTED) + +set_target_properties(opencv_core PROPERTIES + INTERFACE_LINK_LIBRARIES "\$;\$;\$" +) + +# Create imported target opencv_imgproc +add_library(opencv_imgproc STATIC IMPORTED) + +set_target_properties(opencv_imgproc PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_core;\$;\$;\$" +) + +# Create imported target opencv_photo +add_library(opencv_photo STATIC IMPORTED) + +set_target_properties(opencv_photo PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_video +add_library(opencv_video STATIC IMPORTED) + +set_target_properties(opencv_video PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_features2d +add_library(opencv_features2d STATIC IMPORTED) + +set_target_properties(opencv_features2d PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +# Create imported target opencv_highgui +add_library(opencv_highgui STATIC IMPORTED) + +set_target_properties(opencv_highgui PROPERTIES + INTERFACE_LINK_LIBRARIES "opencv_core;opencv_imgproc;opencv_core;opencv_imgproc;\$;\$;\$" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/OpenCVModules-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core.hpp new file mode 100644 index 0000000..f7807e3 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core.hpp @@ -0,0 +1,3354 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2015, Intel Corporation, all rights reserved. +// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. +// Copyright (C) 2015, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_HPP +#define OPENCV_CORE_HPP + +#ifndef __cplusplus +# error core.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/base.hpp" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/traits.hpp" +#include "opencv2/core/matx.hpp" +#include "opencv2/core/types.hpp" +#include "opencv2/core/mat.hpp" +#include "opencv2/core/persistence.hpp" + +/** +@defgroup core Core functionality +@{ + @defgroup core_basic Basic structures + @defgroup core_c C structures and operations + @{ + @defgroup core_c_glue Connections with C++ + @} + @defgroup core_array Operations on arrays + @defgroup core_async Asynchronous API + @defgroup core_xml XML/YAML Persistence + @defgroup core_cluster Clustering + @defgroup core_utils Utility and system functions and macros + @{ + @defgroup core_logging Logging facilities + @defgroup core_utils_sse SSE utilities + @defgroup core_utils_neon NEON utilities + @defgroup core_utils_vsx VSX utilities + @defgroup core_utils_softfloat Softfloat support + @defgroup core_utils_samples Utility functions for OpenCV samples + @} + @defgroup core_opengl OpenGL interoperability + @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters + @defgroup core_optim Optimization Algorithms + @defgroup core_directx DirectX interoperability + @defgroup core_eigen Eigen support + @defgroup core_opencl OpenCL support + @defgroup core_va_intel Intel VA-API/OpenCL (CL-VA) interoperability + @defgroup core_hal Hardware Acceleration Layer + @{ + @defgroup core_hal_functions Functions + @defgroup core_hal_interface Interface + @defgroup core_hal_intrin Universal intrinsics + @{ + @defgroup core_hal_intrin_impl Private implementation helpers + @} + @defgroup core_lowlevel_api Low-level API for external libraries / plugins + @} + @defgroup core_parallel Parallel Processing + @{ + @defgroup core_parallel_backend Parallel backends API + @} +@} + */ + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +/*! @brief Class passed to an error. + +This class encapsulates all or almost all necessary +information about the error happened in the program. The exception is +usually constructed and thrown implicitly via CV_Error and CV_Error_ macros. +@see error + */ +class CV_EXPORTS Exception : public std::exception +{ +public: + /*! + Default constructor + */ + Exception(); + /*! + Full constructor. Normally the constructor is not called explicitly. + Instead, the macros CV_Error(), CV_Error_() and CV_Assert() are used. + */ + Exception(int _code, const String& _err, const String& _func, const String& _file, int _line); + virtual ~Exception() throw(); + + /*! + \return the error description and the context as a text string. + */ + virtual const char *what() const throw() CV_OVERRIDE; + void formatMessage(); + + String msg; ///< the formatted error message + + int code; ///< error code @see CVStatus + String err; ///< error description + String func; ///< function name. Available only when the compiler supports getting it + String file; ///< source file name where the error has occurred + int line; ///< line number in the source file where the error has occurred +}; + +/*! @brief Signals an error and raises the exception. + +By default the function prints information about the error to stderr, +then it either stops if cv::setBreakOnError() had been called before or raises the exception. +It is possible to alternate error processing by using #redirectError(). +@param exc the exception raisen. +@deprecated drop this version + */ +CV_EXPORTS CV_NORETURN void error(const Exception& exc); + +enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently + SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted + //!< independently; this flag and the previous one are + //!< mutually exclusive. + SORT_ASCENDING = 0, //!< each matrix row is sorted in the ascending + //!< order. + SORT_DESCENDING = 16 //!< each matrix row is sorted in the + //!< descending order; this flag and the previous one are also + //!< mutually exclusive. + }; + +//! @} core_utils + +//! @addtogroup core +//! @{ + +//! Covariation flags +enum CovarFlags { + /** The output covariance matrix is calculated as: + \f[\texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...],\f] + The covariance matrix will be nsamples x nsamples. Such an unusual covariance matrix is used + for fast PCA of a set of very large vectors (see, for example, the EigenFaces technique for + face recognition). Eigenvalues of this "scrambled" matrix match the eigenvalues of the true + covariance matrix. The "true" eigenvectors can be easily calculated from the eigenvectors of + the "scrambled" covariance matrix. */ + COVAR_SCRAMBLED = 0, + /**The output covariance matrix is calculated as: + \f[\texttt{scale} \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...] \cdot [ \texttt{vects} [0]- \texttt{mean} , \texttt{vects} [1]- \texttt{mean} ,...]^T,\f] + covar will be a square matrix of the same size as the total number of elements in each input + vector. One and only one of #COVAR_SCRAMBLED and #COVAR_NORMAL must be specified.*/ + COVAR_NORMAL = 1, + /** If the flag is specified, the function does not calculate mean from + the input vectors but, instead, uses the passed mean vector. This is useful if mean has been + pre-calculated or known in advance, or if the covariance matrix is calculated by parts. In + this case, mean is not a mean vector of the input sub-set of vectors but rather the mean + vector of the whole set.*/ + COVAR_USE_AVG = 2, + /** If the flag is specified, the covariance matrix is scaled. In the + "normal" mode, scale is 1./nsamples . In the "scrambled" mode, scale is the reciprocal of the + total number of elements in each input vector. By default (if the flag is not specified), the + covariance matrix is not scaled ( scale=1 ).*/ + COVAR_SCALE = 4, + /** If the flag is + specified, all the input vectors are stored as rows of the samples matrix. mean should be a + single-row vector in this case.*/ + COVAR_ROWS = 8, + /** If the flag is + specified, all the input vectors are stored as columns of the samples matrix. mean should be a + single-column vector in this case.*/ + COVAR_COLS = 16 +}; + +//! @addtogroup core_cluster +//! @{ + +//! k-Means flags +enum KmeansFlags { + /** Select random initial centers in each attempt.*/ + KMEANS_RANDOM_CENTERS = 0, + /** Use kmeans++ center initialization by Arthur and Vassilvitskii [Arthur2007].*/ + KMEANS_PP_CENTERS = 2, + /** During the first (and possibly the only) attempt, use the + user-supplied labels instead of computing them from the initial centers. For the second and + further attempts, use the random or semi-random centers. Use one of KMEANS_\*_CENTERS flag + to specify the exact method.*/ + KMEANS_USE_INITIAL_LABELS = 1 +}; + +//! @} core_cluster + +//! @addtogroup core_array +//! @{ + +enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix. + REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix. + REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix. + REDUCE_MIN = 3 //!< the output is the minimum (column/row-wise) of all rows/columns of the matrix. + }; + +//! @} core_array + +/** @brief Swaps two matrices +*/ +CV_EXPORTS void swap(Mat& a, Mat& b); +/** @overload */ +CV_EXPORTS void swap( UMat& a, UMat& b ); + +//! @} core + +//! @addtogroup core_array +//! @{ + +/** @brief Computes the source location of an extrapolated pixel. + +The function computes and returns the coordinate of a donor pixel corresponding to the specified +extrapolated pixel when using the specified extrapolation border mode. For example, if you use +cv::BORDER_WRAP mode in the horizontal direction, cv::BORDER_REFLECT_101 in the vertical direction and +want to compute value of the "virtual" pixel Point(-5, 100) in a floating-point image img , it +looks like: +@code{.cpp} + float val = img.at(borderInterpolate(100, img.rows, cv::BORDER_REFLECT_101), + borderInterpolate(-5, img.cols, cv::BORDER_WRAP)); +@endcode +Normally, the function is not called directly. It is used inside filtering functions and also in +copyMakeBorder. +@param p 0-based coordinate of the extrapolated pixel along one of the axes, likely \<0 or \>= len +@param len Length of the array along the corresponding axis. +@param borderType Border type, one of the #BorderTypes, except for #BORDER_TRANSPARENT and +#BORDER_ISOLATED . When borderType==#BORDER_CONSTANT , the function always returns -1, regardless +of p and len. + +@sa copyMakeBorder +*/ +CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType); + +/** @example samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp +An example using copyMakeBorder function. +Check @ref tutorial_copyMakeBorder "the corresponding tutorial" for more details +*/ + +/** @brief Forms a border around an image. + +The function copies the source image into the middle of the destination image. The areas to the +left, to the right, above and below the copied source image will be filled with extrapolated +pixels. This is not what filtering functions based on it do (they extrapolate pixels on-fly), but +what other more complex functions, including your own, may do to simplify image boundary handling. + +The function supports the mode when src is already in the middle of dst . In this case, the +function does not copy src itself but simply constructs the border, for example: + +@code{.cpp} + // let border be the same in all directions + int border=2; + // constructs a larger image to fit both the image and the border + Mat gray_buf(rgb.rows + border*2, rgb.cols + border*2, rgb.depth()); + // select the middle part of it w/o copying data + Mat gray(gray_canvas, Rect(border, border, rgb.cols, rgb.rows)); + // convert image from RGB to grayscale + cvtColor(rgb, gray, COLOR_RGB2GRAY); + // form a border in-place + copyMakeBorder(gray, gray_buf, border, border, + border, border, BORDER_REPLICATE); + // now do some custom filtering ... + ... +@endcode +@note When the source image is a part (ROI) of a bigger image, the function will try to use the +pixels outside of the ROI to form a border. To disable this feature and always do extrapolation, as +if src was not a ROI, use borderType | #BORDER_ISOLATED. + +@param src Source image. +@param dst Destination image of the same type as src and the size Size(src.cols+left+right, +src.rows+top+bottom) . +@param top the top pixels +@param bottom the bottom pixels +@param left the left pixels +@param right Parameter specifying how many pixels in each direction from the source image rectangle +to extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs +to be built. +@param borderType Border type. See borderInterpolate for details. +@param value Border value if borderType==BORDER_CONSTANT . + +@sa borderInterpolate +*/ +CV_EXPORTS_W void copyMakeBorder(InputArray src, OutputArray dst, + int top, int bottom, int left, int right, + int borderType, const Scalar& value = Scalar() ); + +/** @brief Calculates the per-element sum of two arrays or an array and a scalar. + +The function add calculates: +- Sum of two arrays when both input arrays have the same size and the same number of channels: +\f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) + \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f] +- Sum of an array and a scalar when src2 is constructed from Scalar or has the same number of +elements as `src1.channels()`: +\f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) + \texttt{src2} ) \quad \texttt{if mask}(I) \ne0\f] +- Sum of a scalar and an array when src1 is constructed from Scalar or has the same number of +elements as `src2.channels()`: +\f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1} + \texttt{src2}(I) ) \quad \texttt{if mask}(I) \ne0\f] +where `I` is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +The first function in the list above can be replaced with matrix expressions: +@code{.cpp} + dst = src1 + src2; + dst += src1; // equivalent to add(dst, src1, dst); +@endcode +The input arrays and the output array can all have the same or different depths. For example, you +can add a 16-bit unsigned array to a 8-bit signed array and store the sum as a 32-bit +floating-point array. Depth of the output array is determined by the dtype parameter. In the second +and third cases above, as well as in the first case, when src1.depth() == src2.depth(), dtype can +be set to the default -1. In this case, the output array will have the same depth as the input +array, be it src1, src2 or both. +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and number of channels as the input array(s); the +depth is defined by dtype or src1/src2. +@param mask optional operation mask - 8-bit single channel array, that specifies elements of the +output array to be changed. +@param dtype optional depth of the output array (see the discussion below). +@sa subtract, addWeighted, scaleAdd, Mat::convertTo +*/ +CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1); + +/** @brief Calculates the per-element difference between two arrays or array and a scalar. + +The function subtract calculates: +- Difference between two arrays, when both input arrays have the same size and the same number of +channels: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) - \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f] +- Difference between an array and a scalar, when src2 is constructed from Scalar or has the same +number of elements as `src1.channels()`: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1}(I) - \texttt{src2} ) \quad \texttt{if mask}(I) \ne0\f] +- Difference between a scalar and an array, when src1 is constructed from Scalar or has the same +number of elements as `src2.channels()`: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src1} - \texttt{src2}(I) ) \quad \texttt{if mask}(I) \ne0\f] +- The reverse difference between a scalar and an array in the case of `SubRS`: + \f[\texttt{dst}(I) = \texttt{saturate} ( \texttt{src2} - \texttt{src1}(I) ) \quad \texttt{if mask}(I) \ne0\f] +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +The first function in the list above can be replaced with matrix expressions: +@code{.cpp} + dst = src1 - src2; + dst -= src1; // equivalent to subtract(dst, src1, dst); +@endcode +The input arrays and the output array can all have the same or different depths. For example, you +can subtract to 8-bit unsigned arrays and store the difference in a 16-bit signed array. Depth of +the output array is determined by dtype parameter. In the second and third cases above, as well as +in the first case, when src1.depth() == src2.depth(), dtype can be set to the default -1. In this +case the output array will have the same depth as the input array, be it src1, src2 or both. +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array of the same size and the same number of channels as the input array. +@param mask optional operation mask; this is an 8-bit single channel array that specifies elements +of the output array to be changed. +@param dtype optional depth of the output array +@sa add, addWeighted, scaleAdd, Mat::convertTo + */ +CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, + InputArray mask = noArray(), int dtype = -1); + + +/** @brief Calculates the per-element scaled product of two arrays. + +The function multiply calculates the per-element product of two arrays: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{scale} \cdot \texttt{src1} (I) \cdot \texttt{src2} (I))\f] + +There is also a @ref MatrixExpressions -friendly variant of the first function. See Mat::mul . + +For a not-per-element matrix product, see gemm . + +@note Saturation is not applied when the output array has the depth +CV_32S. You may even get result of an incorrect sign in the case of +overflow. +@param src1 first input array. +@param src2 second input array of the same size and the same type as src1. +@param dst output array of the same size and type as src1. +@param scale optional scale factor. +@param dtype optional depth of the output array +@sa add, subtract, divide, scaleAdd, addWeighted, accumulate, accumulateProduct, accumulateSquare, +Mat::convertTo +*/ +CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, + OutputArray dst, double scale = 1, int dtype = -1); + +/** @brief Performs per-element division of two arrays or a scalar by an array. + +The function cv::divide divides one array by another: +\f[\texttt{dst(I) = saturate(src1(I)*scale/src2(I))}\f] +or a scalar by an array when there is no src1 : +\f[\texttt{dst(I) = saturate(scale/src2(I))}\f] + +Different channels of multi-channel arrays are processed independently. + +For integer types when src2(I) is zero, dst(I) will also be zero. + +@note In case of floating point data there is no special defined behavior for zero src2(I) values. +Regular floating-point division is used. +Expect correct IEEE-754 behaviour for floating-point data (with NaN, Inf result values). + +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array. +@param src2 second input array of the same size and type as src1. +@param scale scalar factor. +@param dst output array of the same size and type as src2. +@param dtype optional depth of the output array; if -1, dst will have depth src2.depth(), but in +case of an array-by-array division, you can only pass -1 when src1.depth()==src2.depth(). +@sa multiply, add, subtract +*/ +CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, + double scale = 1, int dtype = -1); + +/** @overload */ +CV_EXPORTS_W void divide(double scale, InputArray src2, + OutputArray dst, int dtype = -1); + +/** @brief Calculates the sum of a scaled array and another array. + +The function scaleAdd is one of the classical primitive linear algebra operations, known as DAXPY +or SAXPY in [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms). It calculates +the sum of a scaled array and another array: +\f[\texttt{dst} (I)= \texttt{scale} \cdot \texttt{src1} (I) + \texttt{src2} (I)\f] +The function can also be emulated with a matrix expression, for example: +@code{.cpp} + Mat A(3, 3, CV_64F); + ... + A.row(0) = A.row(1)*2 + A.row(2); +@endcode +@param src1 first input array. +@param alpha scale factor for the first array. +@param src2 second input array of the same size and type as src1. +@param dst output array of the same size and type as src1. +@sa add, addWeighted, subtract, Mat::dot, Mat::convertTo +*/ +CV_EXPORTS_W void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst); + +/** @example samples/cpp/tutorial_code/HighGUI/AddingImagesTrackbar.cpp +Check @ref tutorial_trackbar "the corresponding tutorial" for more details +*/ + +/** @brief Calculates the weighted sum of two arrays. + +The function addWeighted calculates the weighted sum of two arrays as follows: +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* \texttt{beta} + \texttt{gamma} )\f] +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. +The function can be replaced with a matrix expression: +@code{.cpp} + dst = src1*alpha + src2*beta + gamma; +@endcode +@note Saturation is not applied when the output array has the depth CV_32S. You may even get +result of an incorrect sign in the case of overflow. +@param src1 first input array. +@param alpha weight of the first array elements. +@param src2 second input array of the same size and channel number as src1. +@param beta weight of the second array elements. +@param gamma scalar added to each sum. +@param dst output array that has the same size and number of channels as the input arrays. +@param dtype optional depth of the output array; when both input arrays have the same depth, dtype +can be set to -1, which will be equivalent to src1.depth(). +@sa add, subtract, scaleAdd, Mat::convertTo +*/ +CV_EXPORTS_W void addWeighted(InputArray src1, double alpha, InputArray src2, + double beta, double gamma, OutputArray dst, int dtype = -1); + +/** @brief Scales, calculates absolute values, and converts the result to 8-bit. + +On each element of the input array, the function convertScaleAbs +performs three operations sequentially: scaling, taking an absolute +value, conversion to an unsigned 8-bit type: +\f[\texttt{dst} (I)= \texttt{saturate\_cast} (| \texttt{src} (I)* \texttt{alpha} + \texttt{beta} |)\f] +In case of multi-channel arrays, the function processes each channel +independently. When the output is not 8-bit, the operation can be +emulated by calling the Mat::convertTo method (or by using matrix +expressions) and then by calculating an absolute value of the result. +For example: +@code{.cpp} + Mat_ A(30,30); + randu(A, Scalar(-100), Scalar(100)); + Mat_ B = A*5 + 3; + B = abs(B); + // Mat_ B = abs(A*5+3) will also do the job, + // but it will allocate a temporary matrix +@endcode +@param src input array. +@param dst output array. +@param alpha optional scale factor. +@param beta optional delta added to the scaled values. +@sa Mat::convertTo, cv::abs(const Mat&) +*/ +CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst, + double alpha = 1, double beta = 0); + +/** @brief Converts an array to half precision floating number. + +This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data. +There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or +CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error. +The format of half precision floating point is defined in IEEE 754-2008. + +@param src input array. +@param dst output array. +*/ +CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst); + +/** @brief Performs a look-up table transform of an array. + +The function LUT fills the output array with values from the look-up table. Indices of the entries +are taken from the input array. That is, the function processes each element of src as follows: +\f[\texttt{dst} (I) \leftarrow \texttt{lut(src(I) + d)}\f] +where +\f[d = \fork{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}\f] +@param src input array of 8-bit elements. +@param lut look-up table of 256 elements; in case of multi-channel input array, the table should +either have a single channel (in this case the same table is used for all channels) or the same +number of channels as in the input array. +@param dst output array of the same size and number of channels as src, and the same depth as lut. +@sa convertScaleAbs, Mat::convertTo +*/ +CV_EXPORTS_W void LUT(InputArray src, InputArray lut, OutputArray dst); + +/** @brief Calculates the sum of array elements. + +The function cv::sum calculates and returns the sum of array elements, +independently for each channel. +@param src input array that must have from 1 to 4 channels. +@sa countNonZero, mean, meanStdDev, norm, minMaxLoc, reduce +*/ +CV_EXPORTS_AS(sumElems) Scalar sum(InputArray src); + +/** @brief Counts non-zero array elements. + +The function returns the number of non-zero elements in src : +\f[\sum _{I: \; \texttt{src} (I) \ne0 } 1\f] +@param src single-channel array. +@sa mean, meanStdDev, norm, minMaxLoc, calcCovarMatrix +*/ +CV_EXPORTS_W int countNonZero( InputArray src ); + +/** @brief Returns the list of locations of non-zero pixels + +Given a binary matrix (likely returned from an operation such +as threshold(), compare(), >, ==, etc, return all of +the non-zero indices as a cv::Mat or std::vector (x,y) +For example: +@code{.cpp} + cv::Mat binaryImage; // input, binary image + cv::Mat locations; // output, locations of non-zero pixels + cv::findNonZero(binaryImage, locations); + + // access pixel coordinates + Point pnt = locations.at(i); +@endcode +or +@code{.cpp} + cv::Mat binaryImage; // input, binary image + vector locations; // output, locations of non-zero pixels + cv::findNonZero(binaryImage, locations); + + // access pixel coordinates + Point pnt = locations[i]; +@endcode +@param src single-channel array +@param idx the output array, type of cv::Mat or std::vector, corresponding to non-zero indices in the input +*/ +CV_EXPORTS_W void findNonZero( InputArray src, OutputArray idx ); + +/** @brief Calculates an average (mean) of array elements. + +The function cv::mean calculates the mean value M of array elements, +independently for each channel, and return it: +\f[\begin{array}{l} N = \sum _{I: \; \texttt{mask} (I) \ne 0} 1 \\ M_c = \left ( \sum _{I: \; \texttt{mask} (I) \ne 0}{ \texttt{mtx} (I)_c} \right )/N \end{array}\f] +When all the mask elements are 0's, the function returns Scalar::all(0) +@param src input array that should have from 1 to 4 channels so that the result can be stored in +Scalar_ . +@param mask optional operation mask. +@sa countNonZero, meanStdDev, norm, minMaxLoc +*/ +CV_EXPORTS_W Scalar mean(InputArray src, InputArray mask = noArray()); + +/** Calculates a mean and standard deviation of array elements. + +The function cv::meanStdDev calculates the mean and the standard deviation M +of array elements independently for each channel and returns it via the +output parameters: +\f[\begin{array}{l} N = \sum _{I, \texttt{mask} (I) \ne 0} 1 \\ \texttt{mean} _c = \frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \texttt{src} (I)_c}{N} \\ \texttt{stddev} _c = \sqrt{\frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \left ( \texttt{src} (I)_c - \texttt{mean} _c \right )^2}{N}} \end{array}\f] +When all the mask elements are 0's, the function returns +mean=stddev=Scalar::all(0). +@note The calculated standard deviation is only the diagonal of the +complete normalized covariance matrix. If the full matrix is needed, you +can reshape the multi-channel array M x N to the single-channel array +M\*N x mtx.channels() (only possible when the matrix is continuous) and +then pass the matrix to calcCovarMatrix . +@param src input array that should have from 1 to 4 channels so that the results can be stored in +Scalar_ 's. +@param mean output parameter: calculated mean value. +@param stddev output parameter: calculated standard deviation. +@param mask optional operation mask. +@sa countNonZero, mean, norm, minMaxLoc, calcCovarMatrix +*/ +CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray mean, OutputArray stddev, + InputArray mask=noArray()); + +/** @brief Calculates the absolute norm of an array. + +This version of #norm calculates the absolute norm of src1. The type of norm to calculate is specified using #NormTypes. + +As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$. +The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$ +is calculated as follows +\f{align*} + \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\ + \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\ + \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2 +\f} +and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is +\f{align*} + \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\ + \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\ + \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5. +\f} +The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$. +It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$. +![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png) + +When the mask parameter is specified and it is not empty, the norm is + +If normType is not specified, #NORM_L2 is used. +calculated only over the region specified by the mask. + +Multi-channel input arrays are treated as single-channel arrays, that is, +the results for all channels are combined. + +Hamming norms can only be calculated with CV_8U depth arrays. + +@param src1 first input array. +@param normType type of the norm (see #NormTypes). +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +*/ +CV_EXPORTS_W double norm(InputArray src1, int normType = NORM_L2, InputArray mask = noArray()); + +/** @brief Calculates an absolute difference norm or a relative difference norm. + +This version of cv::norm calculates the absolute difference norm +or the relative difference norm of arrays src1 and src2. +The type of norm to calculate is specified using #NormTypes. + +@param src1 first input array. +@param src2 second input array of the same size and the same type as src1. +@param normType type of the norm (see #NormTypes). +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +*/ +CV_EXPORTS_W double norm(InputArray src1, InputArray src2, + int normType = NORM_L2, InputArray mask = noArray()); +/** @overload +@param src first input array. +@param normType type of the norm (see #NormTypes). +*/ +CV_EXPORTS double norm( const SparseMat& src, int normType ); + +/** @brief Computes the Peak Signal-to-Noise Ratio (PSNR) image quality metric. + +This function calculates the Peak Signal-to-Noise Ratio (PSNR) image quality metric in decibels (dB), +between two input arrays src1 and src2. The arrays must have the same type. + +The PSNR is calculated as follows: + +\f[ +\texttt{PSNR} = 10 \cdot \log_{10}{\left( \frac{R^2}{MSE} \right) } +\f] + +where R is the maximum integer value of depth (e.g. 255 in the case of CV_8U data) +and MSE is the mean squared error between the two arrays. + +@param src1 first input array. +@param src2 second input array of the same size as src1. +@param R the maximum pixel value (255 by default) + + */ +CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2, double R=255.); + +/** @brief naive nearest neighbor finder + +see http://en.wikipedia.org/wiki/Nearest_neighbor_search +@todo document + */ +CV_EXPORTS_W void batchDistance(InputArray src1, InputArray src2, + OutputArray dist, int dtype, OutputArray nidx, + int normType = NORM_L2, int K = 0, + InputArray mask = noArray(), int update = 0, + bool crosscheck = false); + +/** @brief Normalizes the norm or value range of an array. + +The function cv::normalize normalizes scale and shift the input array elements so that +\f[\| \texttt{dst} \| _{L_p}= \texttt{alpha}\f] +(where p=Inf, 1 or 2) when normType=NORM_INF, NORM_L1, or NORM_L2, respectively; or so that +\f[\min _I \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I \texttt{dst} (I)= \texttt{beta}\f] + +when normType=NORM_MINMAX (for dense arrays only). The optional mask specifies a sub-array to be +normalized. This means that the norm or min-n-max are calculated over the sub-array, and then this +sub-array is modified to be normalized. If you want to only use the mask to calculate the norm or +min-max but modify the whole array, you can use norm and Mat::convertTo. + +In case of sparse matrices, only the non-zero values are analyzed and transformed. Because of this, +the range transformation for sparse matrices is not allowed since it can shift the zero level. + +Possible usage with some positive example data: +@code{.cpp} + vector positiveData = { 2.0, 8.0, 10.0 }; + vector normalizedData_l1, normalizedData_l2, normalizedData_inf, normalizedData_minmax; + + // Norm to probability (total count) + // sum(numbers) = 20.0 + // 2.0 0.1 (2.0/20.0) + // 8.0 0.4 (8.0/20.0) + // 10.0 0.5 (10.0/20.0) + normalize(positiveData, normalizedData_l1, 1.0, 0.0, NORM_L1); + + // Norm to unit vector: ||positiveData|| = 1.0 + // 2.0 0.15 + // 8.0 0.62 + // 10.0 0.77 + normalize(positiveData, normalizedData_l2, 1.0, 0.0, NORM_L2); + + // Norm to max element + // 2.0 0.2 (2.0/10.0) + // 8.0 0.8 (8.0/10.0) + // 10.0 1.0 (10.0/10.0) + normalize(positiveData, normalizedData_inf, 1.0, 0.0, NORM_INF); + + // Norm to range [0.0;1.0] + // 2.0 0.0 (shift to left border) + // 8.0 0.75 (6.0/8.0) + // 10.0 1.0 (shift to right border) + normalize(positiveData, normalizedData_minmax, 1.0, 0.0, NORM_MINMAX); +@endcode + +@param src input array. +@param dst output array of the same size as src . +@param alpha norm value to normalize to or the lower range boundary in case of the range +normalization. +@param beta upper range boundary in case of the range normalization; it is not used for the norm +normalization. +@param norm_type normalization type (see cv::NormTypes). +@param dtype when negative, the output array has the same type as src; otherwise, it has the same +number of channels as src and the depth =CV_MAT_DEPTH(dtype). +@param mask optional operation mask. +@sa norm, Mat::convertTo, SparseMat::convertTo +*/ +CV_EXPORTS_W void normalize( InputArray src, InputOutputArray dst, double alpha = 1, double beta = 0, + int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray()); + +/** @overload +@param src input array. +@param dst output array of the same size as src . +@param alpha norm value to normalize to or the lower range boundary in case of the range +normalization. +@param normType normalization type (see cv::NormTypes). +*/ +CV_EXPORTS void normalize( const SparseMat& src, SparseMat& dst, double alpha, int normType ); + +/** @brief Finds the global minimum and maximum in an array. + +The function cv::minMaxLoc finds the minimum and maximum element values and their positions. The +extremums are searched across the whole array or, if mask is not an empty array, in the specified +array region. + +The function do not work with multi-channel arrays. If you need to find minimum or maximum +elements across all the channels, use Mat::reshape first to reinterpret the array as +single-channel. Or you may extract the particular channel using either extractImageCOI , or +mixChannels , or split . +@param src input single-channel array. +@param minVal pointer to the returned minimum value; NULL is used if not required. +@param maxVal pointer to the returned maximum value; NULL is used if not required. +@param minLoc pointer to the returned minimum location (in 2D case); NULL is used if not required. +@param maxLoc pointer to the returned maximum location (in 2D case); NULL is used if not required. +@param mask optional mask used to select a sub-array. +@sa max, min, reduceArgMin, reduceArgMax, compare, inRange, extractImageCOI, mixChannels, split, Mat::reshape +*/ +CV_EXPORTS_W void minMaxLoc(InputArray src, CV_OUT double* minVal, + CV_OUT double* maxVal = 0, CV_OUT Point* minLoc = 0, + CV_OUT Point* maxLoc = 0, InputArray mask = noArray()); + +/** + * @brief Finds indices of min elements along provided axis + * + * @note + * - If input or output array is not continuous, this function will create an internal copy. + * - NaN handling is left unspecified, see patchNaNs(). + * - The returned index is always in bounds of input matrix. + * + * @param src input single-channel array. + * @param dst output array of type CV_32SC1 with the same dimensionality as src, + * except for axis being reduced - it should be set to 1. + * @param lastIndex whether to get the index of first or last occurrence of min. + * @param axis axis to reduce along. + * @sa reduceArgMax, minMaxLoc, min, max, compare, reduce + */ +CV_EXPORTS_W void reduceArgMin(InputArray src, OutputArray dst, int axis, bool lastIndex = false); + +/** + * @brief Finds indices of max elements along provided axis + * + * @note + * - If input or output array is not continuous, this function will create an internal copy. + * - NaN handling is left unspecified, see patchNaNs(). + * - The returned index is always in bounds of input matrix. + * + * @param src input single-channel array. + * @param dst output array of type CV_32SC1 with the same dimensionality as src, + * except for axis being reduced - it should be set to 1. + * @param lastIndex whether to get the index of first or last occurrence of max. + * @param axis axis to reduce along. + * @sa reduceArgMin, minMaxLoc, min, max, compare, reduce + */ +CV_EXPORTS_W void reduceArgMax(InputArray src, OutputArray dst, int axis, bool lastIndex = false); + +/** @brief Finds the global minimum and maximum in an array + +The function cv::minMaxIdx finds the minimum and maximum element values and their positions. The +extremums are searched across the whole array or, if mask is not an empty array, in the specified +array region. The function does not work with multi-channel arrays. If you need to find minimum or +maximum elements across all the channels, use Mat::reshape first to reinterpret the array as +single-channel. Or you may extract the particular channel using either extractImageCOI , or +mixChannels , or split . In case of a sparse matrix, the minimum is found among non-zero elements +only. +@note When minIdx is not NULL, it must have at least 2 elements (as well as maxIdx), even if src is +a single-row or single-column matrix. In OpenCV (following MATLAB) each array has at least 2 +dimensions, i.e. single-column matrix is Mx1 matrix (and therefore minIdx/maxIdx will be +(i1,0)/(i2,0)) and single-row matrix is 1xN matrix (and therefore minIdx/maxIdx will be +(0,j1)/(0,j2)). +@param src input single-channel array. +@param minVal pointer to the returned minimum value; NULL is used if not required. +@param maxVal pointer to the returned maximum value; NULL is used if not required. +@param minIdx pointer to the returned minimum location (in nD case); NULL is used if not required; +Otherwise, it must point to an array of src.dims elements, the coordinates of the minimum element +in each dimension are stored there sequentially. +@param maxIdx pointer to the returned maximum location (in nD case). NULL is used if not required. +@param mask specified array region +*/ +CV_EXPORTS void minMaxIdx(InputArray src, double* minVal, double* maxVal = 0, + int* minIdx = 0, int* maxIdx = 0, InputArray mask = noArray()); + +/** @overload +@param a input single-channel array. +@param minVal pointer to the returned minimum value; NULL is used if not required. +@param maxVal pointer to the returned maximum value; NULL is used if not required. +@param minIdx pointer to the returned minimum location (in nD case); NULL is used if not required; +Otherwise, it must point to an array of src.dims elements, the coordinates of the minimum element +in each dimension are stored there sequentially. +@param maxIdx pointer to the returned maximum location (in nD case). NULL is used if not required. +*/ +CV_EXPORTS void minMaxLoc(const SparseMat& a, double* minVal, + double* maxVal, int* minIdx = 0, int* maxIdx = 0); + +/** @brief Reduces a matrix to a vector. + +The function #reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of +1D vectors and performing the specified operation on the vectors until a single row/column is +obtained. For example, the function can be used to compute horizontal and vertical projections of a +raster image. In case of #REDUCE_MAX and #REDUCE_MIN , the output image should have the same type as the source one. +In case of #REDUCE_SUM and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy. +And multi-channel arrays are also supported in these two reduction modes. + +The following code demonstrates its usage for a single channel matrix. +@snippet snippets/core_reduce.cpp example + +And the following code demonstrates its usage for a two-channel matrix. +@snippet snippets/core_reduce.cpp example2 + +@param src input 2D matrix. +@param dst output vector. Its size and type is defined by dim and dtype parameters. +@param dim dimension index along which the matrix is reduced. 0 means that the matrix is reduced to +a single row. 1 means that the matrix is reduced to a single column. +@param rtype reduction operation that could be one of #ReduceTypes +@param dtype when negative, the output vector will have the same type as the input matrix, +otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()). +@sa repeat, reduceArgMin, reduceArgMax +*/ +CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, int dtype = -1); + +/** @brief Creates one multi-channel array out of several single-channel ones. + +The function cv::merge merges several arrays to make a single multi-channel array. That is, each +element of the output array will be a concatenation of the elements of the input arrays, where +elements of i-th input array are treated as mv[i].channels()-element vectors. + +The function cv::split does the reverse operation. If you need to shuffle channels in some other +advanced way, use cv::mixChannels. + +The following example shows how to merge 3 single channel matrices into a single 3-channel matrix. +@snippet snippets/core_merge.cpp example + +@param mv input array of matrices to be merged; all the matrices in mv must have the same +size and the same depth. +@param count number of input matrices when mv is a plain C array; it must be greater than zero. +@param dst output array of the same size and the same depth as mv[0]; The number of channels will +be equal to the parameter count. +@sa mixChannels, split, Mat::reshape +*/ +CV_EXPORTS void merge(const Mat* mv, size_t count, OutputArray dst); + +/** @overload +@param mv input vector of matrices to be merged; all the matrices in mv must have the same +size and the same depth. +@param dst output array of the same size and the same depth as mv[0]; The number of channels will +be the total number of channels in the matrix array. + */ +CV_EXPORTS_W void merge(InputArrayOfArrays mv, OutputArray dst); + +/** @brief Divides a multi-channel array into several single-channel arrays. + +The function cv::split splits a multi-channel array into separate single-channel arrays: +\f[\texttt{mv} [c](I) = \texttt{src} (I)_c\f] +If you need to extract a single channel or do some other sophisticated channel permutation, use +mixChannels . + +The following example demonstrates how to split a 3-channel matrix into 3 single channel matrices. +@snippet snippets/core_split.cpp example + +@param src input multi-channel array. +@param mvbegin output array; the number of arrays must match src.channels(); the arrays themselves are +reallocated, if needed. +@sa merge, mixChannels, cvtColor +*/ +CV_EXPORTS void split(const Mat& src, Mat* mvbegin); + +/** @overload +@param m input multi-channel array. +@param mv output vector of arrays; the arrays themselves are reallocated, if needed. +*/ +CV_EXPORTS_W void split(InputArray m, OutputArrayOfArrays mv); + +/** @brief Copies specified channels from input arrays to the specified channels of +output arrays. + +The function cv::mixChannels provides an advanced mechanism for shuffling image channels. + +cv::split,cv::merge,cv::extractChannel,cv::insertChannel and some forms of cv::cvtColor are partial cases of cv::mixChannels. + +In the example below, the code splits a 4-channel BGRA image into a 3-channel BGR (with B and R +channels swapped) and a separate alpha-channel image: +@code{.cpp} + Mat bgra( 100, 100, CV_8UC4, Scalar(255,0,0,255) ); + Mat bgr( bgra.rows, bgra.cols, CV_8UC3 ); + Mat alpha( bgra.rows, bgra.cols, CV_8UC1 ); + + // forming an array of matrices is a quite efficient operation, + // because the matrix data is not copied, only the headers + Mat out[] = { bgr, alpha }; + // bgra[0] -> bgr[2], bgra[1] -> bgr[1], + // bgra[2] -> bgr[0], bgra[3] -> alpha[0] + int from_to[] = { 0,2, 1,1, 2,0, 3,3 }; + mixChannels( &bgra, 1, out, 2, from_to, 4 ); +@endcode +@note Unlike many other new-style C++ functions in OpenCV (see the introduction section and +Mat::create ), cv::mixChannels requires the output arrays to be pre-allocated before calling the +function. +@param src input array or vector of matrices; all of the matrices must have the same size and the +same depth. +@param nsrcs number of matrices in `src`. +@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and +depth must be the same as in `src[0]`. +@param ndsts number of matrices in `dst`. +@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is +a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in +dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to +src[0].channels()-1, the second input image channels are indexed from src[0].channels() to +src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image +channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is +filled with zero . +@param npairs number of index pairs in `fromTo`. +@sa split, merge, extractChannel, insertChannel, cvtColor +*/ +CV_EXPORTS void mixChannels(const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts, + const int* fromTo, size_t npairs); + +/** @overload +@param src input array or vector of matrices; all of the matrices must have the same size and the +same depth. +@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and +depth must be the same as in src[0]. +@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is +a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in +dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to +src[0].channels()-1, the second input image channels are indexed from src[0].channels() to +src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image +channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is +filled with zero . +@param npairs number of index pairs in fromTo. +*/ +CV_EXPORTS void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst, + const int* fromTo, size_t npairs); + +/** @overload +@param src input array or vector of matrices; all of the matrices must have the same size and the +same depth. +@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and +depth must be the same as in src[0]. +@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is +a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in +dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to +src[0].channels()-1, the second input image channels are indexed from src[0].channels() to +src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image +channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is +filled with zero . +*/ +CV_EXPORTS_W void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst, + const std::vector& fromTo); + +/** @brief Extracts a single channel from src (coi is 0-based index) +@param src input array +@param dst output array +@param coi index of channel to extract +@sa mixChannels, split +*/ +CV_EXPORTS_W void extractChannel(InputArray src, OutputArray dst, int coi); + +/** @brief Inserts a single channel to dst (coi is 0-based index) +@param src input array +@param dst output array +@param coi index of channel for insertion +@sa mixChannels, merge +*/ +CV_EXPORTS_W void insertChannel(InputArray src, InputOutputArray dst, int coi); + +/** @brief Flips a 2D array around vertical, horizontal, or both axes. + +The function cv::flip flips the array in one of three different ways (row +and column indices are 0-based): +\f[\texttt{dst} _{ij} = +\left\{ +\begin{array}{l l} +\texttt{src} _{\texttt{src.rows}-i-1,j} & if\; \texttt{flipCode} = 0 \\ +\texttt{src} _{i, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} > 0 \\ +\texttt{src} _{ \texttt{src.rows} -i-1, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} < 0 \\ +\end{array} +\right.\f] +The example scenarios of using the function are the following: +* Vertical flipping of the image (flipCode == 0) to switch between + top-left and bottom-left image origin. This is a typical operation + in video processing on Microsoft Windows\* OS. +* Horizontal flipping of the image with the subsequent horizontal + shift and absolute difference calculation to check for a + vertical-axis symmetry (flipCode \> 0). +* Simultaneous horizontal and vertical flipping of the image with + the subsequent shift and absolute difference calculation to check + for a central symmetry (flipCode \< 0). +* Reversing the order of point arrays (flipCode \> 0 or + flipCode == 0). +@param src input array. +@param dst output array of the same size and type as src. +@param flipCode a flag to specify how to flip the array; 0 means +flipping around the x-axis and positive value (for example, 1) means +flipping around y-axis. Negative value (for example, -1) means flipping +around both axes. +@sa transpose , repeat , completeSymm +*/ +CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode); + +enum RotateFlags { + ROTATE_90_CLOCKWISE = 0, //! A = (cv::Mat_(3, 2) << 1, 4, + 2, 5, + 3, 6); + cv::Mat_ B = (cv::Mat_(3, 2) << 7, 10, + 8, 11, + 9, 12); + + cv::Mat C; + cv::hconcat(A, B, C); + //C: + //[1, 4, 7, 10; + // 2, 5, 8, 11; + // 3, 6, 9, 12] + @endcode + @param src1 first input array to be considered for horizontal concatenation. + @param src2 second input array to be considered for horizontal concatenation. + @param dst output array. It has the same number of rows and depth as the src1 and src2, and the sum of cols of the src1 and src2. + */ +CV_EXPORTS void hconcat(InputArray src1, InputArray src2, OutputArray dst); +/** @overload + @code{.cpp} + std::vector matrices = { cv::Mat(4, 1, CV_8UC1, cv::Scalar(1)), + cv::Mat(4, 1, CV_8UC1, cv::Scalar(2)), + cv::Mat(4, 1, CV_8UC1, cv::Scalar(3)),}; + + cv::Mat out; + cv::hconcat( matrices, out ); + //out: + //[1, 2, 3; + // 1, 2, 3; + // 1, 2, 3; + // 1, 2, 3] + @endcode + @param src input array or vector of matrices. all of the matrices must have the same number of rows and the same depth. + @param dst output array. It has the same number of rows and depth as the src, and the sum of cols of the src. +same depth. + */ +CV_EXPORTS_W void hconcat(InputArrayOfArrays src, OutputArray dst); + +/** @brief Applies vertical concatenation to given matrices. + +The function vertically concatenates two or more cv::Mat matrices (with the same number of cols). +@code{.cpp} + cv::Mat matArray[] = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),}; + + cv::Mat out; + cv::vconcat( matArray, 3, out ); + //out: + //[1, 1, 1, 1; + // 2, 2, 2, 2; + // 3, 3, 3, 3] +@endcode +@param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth. +@param nsrc number of matrices in src. +@param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src. +@sa cv::hconcat(const Mat*, size_t, OutputArray), @sa cv::hconcat(InputArrayOfArrays, OutputArray) and @sa cv::hconcat(InputArray, InputArray, OutputArray) +*/ +CV_EXPORTS void vconcat(const Mat* src, size_t nsrc, OutputArray dst); +/** @overload + @code{.cpp} + cv::Mat_ A = (cv::Mat_(3, 2) << 1, 7, + 2, 8, + 3, 9); + cv::Mat_ B = (cv::Mat_(3, 2) << 4, 10, + 5, 11, + 6, 12); + + cv::Mat C; + cv::vconcat(A, B, C); + //C: + //[1, 7; + // 2, 8; + // 3, 9; + // 4, 10; + // 5, 11; + // 6, 12] + @endcode + @param src1 first input array to be considered for vertical concatenation. + @param src2 second input array to be considered for vertical concatenation. + @param dst output array. It has the same number of cols and depth as the src1 and src2, and the sum of rows of the src1 and src2. + */ +CV_EXPORTS void vconcat(InputArray src1, InputArray src2, OutputArray dst); +/** @overload + @code{.cpp} + std::vector matrices = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)), + cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),}; + + cv::Mat out; + cv::vconcat( matrices, out ); + //out: + //[1, 1, 1, 1; + // 2, 2, 2, 2; + // 3, 3, 3, 3] + @endcode + @param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth + @param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src. +same depth. + */ +CV_EXPORTS_W void vconcat(InputArrayOfArrays src, OutputArray dst); + +/** @brief computes bitwise conjunction of the two arrays (dst = src1 & src2) +Calculates the per-element bit-wise conjunction of two arrays or an +array and a scalar. + +The function cv::bitwise_and calculates the per-element bit-wise logical conjunction for: +* Two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +* An array and a scalar when src2 is constructed from Scalar or has + the same number of elements as `src1.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} (I) \wedge \texttt{src2} \quad \texttt{if mask} (I) \ne0\f] +* A scalar and an array when src1 is constructed from Scalar or has + the same number of elements as `src2.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +In case of floating-point arrays, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel arrays, each channel is processed +independently. In the second and third cases above, the scalar is first +converted to the array type. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as the input +arrays. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, + OutputArray dst, InputArray mask = noArray()); + +/** @brief Calculates the per-element bit-wise disjunction of two arrays or an +array and a scalar. + +The function cv::bitwise_or calculates the per-element bit-wise logical disjunction for: +* Two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +* An array and a scalar when src2 is constructed from Scalar or has + the same number of elements as `src1.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} (I) \vee \texttt{src2} \quad \texttt{if mask} (I) \ne0\f] +* A scalar and an array when src1 is constructed from Scalar or has + the same number of elements as `src2.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +In case of floating-point arrays, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel arrays, each channel is processed +independently. In the second and third cases above, the scalar is first +converted to the array type. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as the input +arrays. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, + OutputArray dst, InputArray mask = noArray()); + +/** @brief Calculates the per-element bit-wise "exclusive or" operation on two +arrays or an array and a scalar. + +The function cv::bitwise_xor calculates the per-element bit-wise logical "exclusive-or" +operation for: +* Two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +* An array and a scalar when src2 is constructed from Scalar or has + the same number of elements as `src1.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} (I) \oplus \texttt{src2} \quad \texttt{if mask} (I) \ne0\f] +* A scalar and an array when src1 is constructed from Scalar or has + the same number of elements as `src2.channels()`: + \f[\texttt{dst} (I) = \texttt{src1} \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f] +In case of floating-point arrays, their machine-specific bit +representations (usually IEEE754-compliant) are used for the operation. +In case of multi-channel arrays, each channel is processed +independently. In the 2nd and 3rd cases above, the scalar is first +converted to the array type. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as the input +arrays. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, + OutputArray dst, InputArray mask = noArray()); + +/** @brief Inverts every bit of an array. + +The function cv::bitwise_not calculates per-element bit-wise inversion of the input +array: +\f[\texttt{dst} (I) = \neg \texttt{src} (I)\f] +In case of a floating-point input array, its machine-specific bit +representation (usually IEEE754-compliant) is used for the operation. In +case of multi-channel arrays, each channel is processed independently. +@param src input array. +@param dst output array that has the same size and type as the input +array. +@param mask optional operation mask, 8-bit single channel array, that +specifies elements of the output array to be changed. +*/ +CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst, + InputArray mask = noArray()); + +/** @brief Calculates the per-element absolute difference between two arrays or between an array and a scalar. + +The function cv::absdiff calculates: +* Absolute difference between two arrays when they have the same + size and type: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1}(I) - \texttt{src2}(I)|)\f] +* Absolute difference between an array and a scalar when the second + array is constructed from Scalar or has as many elements as the + number of channels in `src1`: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1}(I) - \texttt{src2} |)\f] +* Absolute difference between a scalar and an array when the first + array is constructed from Scalar or has as many elements as the + number of channels in `src2`: + \f[\texttt{dst}(I) = \texttt{saturate} (| \texttt{src1} - \texttt{src2}(I) |)\f] + where I is a multi-dimensional index of array elements. In case of + multi-channel arrays, each channel is processed independently. +@note Saturation is not applied when the arrays have the depth CV_32S. +You may even get a negative value in the case of overflow. +@param src1 first input array or a scalar. +@param src2 second input array or a scalar. +@param dst output array that has the same size and type as input arrays. +@sa cv::abs(const Mat&) +*/ +CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst); + +/** @brief This is an overloaded member function, provided for convenience (python) +Copies the matrix to another one. +When the operation mask is specified, if the Mat::create call shown above reallocates the matrix, the newly allocated matrix is initialized with all zeros before copying the data. +@param src source matrix. +@param dst Destination matrix. If it does not have a proper size or type before the operation, it is +reallocated. +@param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix +elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels. +*/ + +void CV_EXPORTS_W copyTo(InputArray src, OutputArray dst, InputArray mask); +/** @brief Checks if array elements lie between the elements of two other arrays. + +The function checks the range as follows: +- For every element of a single-channel input array: + \f[\texttt{dst} (I)= \texttt{lowerb} (I)_0 \leq \texttt{src} (I)_0 \leq \texttt{upperb} (I)_0\f] +- For two-channel arrays: + \f[\texttt{dst} (I)= \texttt{lowerb} (I)_0 \leq \texttt{src} (I)_0 \leq \texttt{upperb} (I)_0 \land \texttt{lowerb} (I)_1 \leq \texttt{src} (I)_1 \leq \texttt{upperb} (I)_1\f] +- and so forth. + +That is, dst (I) is set to 255 (all 1 -bits) if src (I) is within the +specified 1D, 2D, 3D, ... box and 0 otherwise. + +When the lower and/or upper boundary parameters are scalars, the indexes +(I) at lowerb and upperb in the above formulas should be omitted. +@param src first input array. +@param lowerb inclusive lower boundary array or a scalar. +@param upperb inclusive upper boundary array or a scalar. +@param dst output array of the same size as src and CV_8U type. +*/ +CV_EXPORTS_W void inRange(InputArray src, InputArray lowerb, + InputArray upperb, OutputArray dst); + +/** @brief Performs the per-element comparison of two arrays or an array and scalar value. + +The function compares: +* Elements of two arrays when src1 and src2 have the same size: + \f[\texttt{dst} (I) = \texttt{src1} (I) \,\texttt{cmpop}\, \texttt{src2} (I)\f] +* Elements of src1 with a scalar src2 when src2 is constructed from + Scalar or has a single element: + \f[\texttt{dst} (I) = \texttt{src1}(I) \,\texttt{cmpop}\, \texttt{src2}\f] +* src1 with elements of src2 when src1 is constructed from Scalar or + has a single element: + \f[\texttt{dst} (I) = \texttt{src1} \,\texttt{cmpop}\, \texttt{src2} (I)\f] +When the comparison result is true, the corresponding element of output +array is set to 255. The comparison operations can be replaced with the +equivalent matrix expressions: +@code{.cpp} + Mat dst1 = src1 >= src2; + Mat dst2 = src1 < 8; + ... +@endcode +@param src1 first input array or a scalar; when it is an array, it must have a single channel. +@param src2 second input array or a scalar; when it is an array, it must have a single channel. +@param dst output array of type ref CV_8U that has the same size and the same number of channels as + the input arrays. +@param cmpop a flag, that specifies correspondence between the arrays (cv::CmpTypes) +@sa checkRange, min, max, threshold +*/ +CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop); + +/** @brief Calculates per-element minimum of two arrays or an array and a scalar. + +The function cv::min calculates the per-element minimum of two arrays: +\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{src2} (I))\f] +or array and a scalar: +\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{value} )\f] +@param src1 first input array. +@param src2 second input array of the same size and type as src1. +@param dst output array of the same size and type as src1. +@sa max, compare, inRange, minMaxLoc +*/ +CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void min(const Mat& src1, const Mat& src2, Mat& dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void min(const UMat& src1, const UMat& src2, UMat& dst); + +/** @brief Calculates per-element maximum of two arrays or an array and a scalar. + +The function cv::max calculates the per-element maximum of two arrays: +\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{src2} (I))\f] +or array and a scalar: +\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{value} )\f] +@param src1 first input array. +@param src2 second input array of the same size and type as src1 . +@param dst output array of the same size and type as src1. +@sa min, compare, inRange, minMaxLoc, @ref MatrixExpressions +*/ +CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void max(const Mat& src1, const Mat& src2, Mat& dst); +/** @overload +needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare) +*/ +CV_EXPORTS void max(const UMat& src1, const UMat& src2, UMat& dst); + +/** @brief Calculates a square root of array elements. + +The function cv::sqrt calculates a square root of each input array element. +In case of multi-channel arrays, each channel is processed +independently. The accuracy is approximately the same as of the built-in +std::sqrt . +@param src input floating-point array. +@param dst output array of the same size and type as src. +*/ +CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst); + +/** @brief Raises every array element to a power. + +The function cv::pow raises every element of the input array to power : +\f[\texttt{dst} (I) = \fork{\texttt{src}(I)^{power}}{if \(\texttt{power}\) is integer}{|\texttt{src}(I)|^{power}}{otherwise}\f] + +So, for a non-integer power exponent, the absolute values of input array +elements are used. However, it is possible to get true values for +negative values using some extra operations. In the example below, +computing the 5th root of array src shows: +@code{.cpp} + Mat mask = src < 0; + pow(src, 1./5, dst); + subtract(Scalar::all(0), dst, dst, mask); +@endcode +For some values of power, such as integer values, 0.5 and -0.5, +specialized faster algorithms are used. + +Special values (NaN, Inf) are not handled. +@param src input array. +@param power exponent of power. +@param dst output array of the same size and type as src. +@sa sqrt, exp, log, cartToPolar, polarToCart +*/ +CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst); + +/** @brief Calculates the exponent of every array element. + +The function cv::exp calculates the exponent of every element of the input +array: +\f[\texttt{dst} [I] = e^{ src(I) }\f] + +The maximum relative error is about 7e-6 for single-precision input and +less than 1e-10 for double-precision input. Currently, the function +converts denormalized values to zeros on output. Special values (NaN, +Inf) are not handled. +@param src input array. +@param dst output array of the same size and type as src. +@sa log , cartToPolar , polarToCart , phase , pow , sqrt , magnitude +*/ +CV_EXPORTS_W void exp(InputArray src, OutputArray dst); + +/** @brief Calculates the natural logarithm of every array element. + +The function cv::log calculates the natural logarithm of every element of the input array: +\f[\texttt{dst} (I) = \log (\texttt{src}(I)) \f] + +Output on zero, negative and special (NaN, Inf) values is undefined. + +@param src input array. +@param dst output array of the same size and type as src . +@sa exp, cartToPolar, polarToCart, phase, pow, sqrt, magnitude +*/ +CV_EXPORTS_W void log(InputArray src, OutputArray dst); + +/** @brief Calculates x and y coordinates of 2D vectors from their magnitude and angle. + +The function cv::polarToCart calculates the Cartesian coordinates of each 2D +vector represented by the corresponding elements of magnitude and angle: +\f[\begin{array}{l} \texttt{x} (I) = \texttt{magnitude} (I) \cos ( \texttt{angle} (I)) \\ \texttt{y} (I) = \texttt{magnitude} (I) \sin ( \texttt{angle} (I)) \\ \end{array}\f] + +The relative accuracy of the estimated coordinates is about 1e-6. +@param magnitude input floating-point array of magnitudes of 2D vectors; +it can be an empty matrix (=Mat()), in this case, the function assumes +that all the magnitudes are =1; if it is not empty, it must have the +same size and type as angle. +@param angle input floating-point array of angles of 2D vectors. +@param x output array of x-coordinates of 2D vectors; it has the same +size and type as angle. +@param y output array of y-coordinates of 2D vectors; it has the same +size and type as angle. +@param angleInDegrees when true, the input angles are measured in +degrees, otherwise, they are measured in radians. +@sa cartToPolar, magnitude, phase, exp, log, pow, sqrt +*/ +CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, + OutputArray x, OutputArray y, bool angleInDegrees = false); + +/** @brief Calculates the magnitude and angle of 2D vectors. + +The function cv::cartToPolar calculates either the magnitude, angle, or both +for every 2D vector (x(I),y(I)): +\f[\begin{array}{l} \texttt{magnitude} (I)= \sqrt{\texttt{x}(I)^2+\texttt{y}(I)^2} , \\ \texttt{angle} (I)= \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))[ \cdot180 / \pi ] \end{array}\f] + +The angles are calculated with accuracy about 0.3 degrees. For the point +(0,0), the angle is set to 0. +@param x array of x-coordinates; this must be a single-precision or +double-precision floating-point array. +@param y array of y-coordinates, that must have the same size and same type as x. +@param magnitude output array of magnitudes of the same size and type as x. +@param angle output array of angles that has the same size and type as +x; the angles are measured in radians (from 0 to 2\*Pi) or in degrees (0 to 360 degrees). +@param angleInDegrees a flag, indicating whether the angles are measured +in radians (which is by default), or in degrees. +@sa Sobel, Scharr +*/ +CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, + OutputArray magnitude, OutputArray angle, + bool angleInDegrees = false); + +/** @brief Calculates the rotation angle of 2D vectors. + +The function cv::phase calculates the rotation angle of each 2D vector that +is formed from the corresponding elements of x and y : +\f[\texttt{angle} (I) = \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))\f] + +The angle estimation accuracy is about 0.3 degrees. When x(I)=y(I)=0 , +the corresponding angle(I) is set to 0. +@param x input floating-point array of x-coordinates of 2D vectors. +@param y input array of y-coordinates of 2D vectors; it must have the +same size and the same type as x. +@param angle output array of vector angles; it has the same size and +same type as x . +@param angleInDegrees when true, the function calculates the angle in +degrees, otherwise, they are measured in radians. +*/ +CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, + bool angleInDegrees = false); + +/** @brief Calculates the magnitude of 2D vectors. + +The function cv::magnitude calculates the magnitude of 2D vectors formed +from the corresponding elements of x and y arrays: +\f[\texttt{dst} (I) = \sqrt{\texttt{x}(I)^2 + \texttt{y}(I)^2}\f] +@param x floating-point array of x-coordinates of the vectors. +@param y floating-point array of y-coordinates of the vectors; it must +have the same size as x. +@param magnitude output array of the same size and type as x. +@sa cartToPolar, polarToCart, phase, sqrt +*/ +CV_EXPORTS_W void magnitude(InputArray x, InputArray y, OutputArray magnitude); + +/** @brief Checks every element of an input array for invalid values. + +The function cv::checkRange checks that every array element is neither NaN nor infinite. When minVal \> +-DBL_MAX and maxVal \< DBL_MAX, the function also checks that each value is between minVal and +maxVal. In case of multi-channel arrays, each channel is processed independently. If some values +are out of range, position of the first outlier is stored in pos (when pos != NULL). Then, the +function either returns false (when quiet=true) or throws an exception. +@param a input array. +@param quiet a flag, indicating whether the functions quietly return false when the array elements +are out of range or they throw an exception. +@param pos optional output parameter, when not NULL, must be a pointer to array of src.dims +elements. +@param minVal inclusive lower boundary of valid values range. +@param maxVal exclusive upper boundary of valid values range. +*/ +CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0, + double minVal = -DBL_MAX, double maxVal = DBL_MAX); + +/** @brief converts NaNs to the given number +@param a input/output matrix (CV_32F type). +@param val value to convert the NaNs +*/ +CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0); + +/** @brief Performs generalized matrix multiplication. + +The function cv::gemm performs generalized matrix multiplication similar to the +gemm functions in BLAS level 3. For example, +`gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)` +corresponds to +\f[\texttt{dst} = \texttt{alpha} \cdot \texttt{src1} ^T \cdot \texttt{src2} + \texttt{beta} \cdot \texttt{src3} ^T\f] + +In case of complex (two-channel) data, performed a complex matrix +multiplication. + +The function can be replaced with a matrix expression. For example, the +above call can be replaced with: +@code{.cpp} + dst = alpha*src1.t()*src2 + beta*src3.t(); +@endcode +@param src1 first multiplied input matrix that could be real(CV_32FC1, +CV_64FC1) or complex(CV_32FC2, CV_64FC2). +@param src2 second multiplied input matrix of the same type as src1. +@param alpha weight of the matrix product. +@param src3 third optional delta matrix added to the matrix product; it +should have the same type as src1 and src2. +@param beta weight of src3. +@param dst output matrix; it has the proper size and the same type as +input matrices. +@param flags operation flags (cv::GemmFlags) +@sa mulTransposed , transform +*/ +CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha, + InputArray src3, double beta, OutputArray dst, int flags = 0); + +/** @brief Calculates the product of a matrix and its transposition. + +The function cv::mulTransposed calculates the product of src and its +transposition: +\f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} )^T ( \texttt{src} - \texttt{delta} )\f] +if aTa=true , and +\f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} ) ( \texttt{src} - \texttt{delta} )^T\f] +otherwise. The function is used to calculate the covariance matrix. With +zero delta, it can be used as a faster substitute for general matrix +product A\*B when B=A' +@param src input single-channel matrix. Note that unlike gemm, the +function can multiply not only floating-point matrices. +@param dst output square matrix. +@param aTa Flag specifying the multiplication ordering. See the +description below. +@param delta Optional delta matrix subtracted from src before the +multiplication. When the matrix is empty ( delta=noArray() ), it is +assumed to be zero, that is, nothing is subtracted. If it has the same +size as src , it is simply subtracted. Otherwise, it is "repeated" (see +repeat ) to cover the full src and then subtracted. Type of the delta +matrix, when it is not empty, must be the same as the type of created +output matrix. See the dtype parameter description below. +@param scale Optional scale factor for the matrix product. +@param dtype Optional type of the output matrix. When it is negative, +the output matrix will have the same type as src . Otherwise, it will be +type=CV_MAT_DEPTH(dtype) that should be either CV_32F or CV_64F . +@sa calcCovarMatrix, gemm, repeat, reduce +*/ +CV_EXPORTS_W void mulTransposed( InputArray src, OutputArray dst, bool aTa, + InputArray delta = noArray(), + double scale = 1, int dtype = -1 ); + +/** @brief Transposes a matrix. + +The function cv::transpose transposes the matrix src : +\f[\texttt{dst} (i,j) = \texttt{src} (j,i)\f] +@note No complex conjugation is done in case of a complex matrix. It +should be done separately if needed. +@param src input array. +@param dst output array of the same type as src. +*/ +CV_EXPORTS_W void transpose(InputArray src, OutputArray dst); + +/** @brief Transpose for n-dimensional matrices. + * + * @note Input should be continuous single-channel matrix. + * @param src input array. + * @param order a permutation of [0,1,..,N-1] where N is the number of axes of src. + * The i’th axis of dst will correspond to the axis numbered order[i] of the input. + * @param dst output array of the same type as src. + */ +CV_EXPORTS_W void transposeND(InputArray src, const std::vector& order, OutputArray dst); + +/** @brief Performs the matrix transformation of every array element. + +The function cv::transform performs the matrix transformation of every +element of the array src and stores the results in dst : +\f[\texttt{dst} (I) = \texttt{m} \cdot \texttt{src} (I)\f] +(when m.cols=src.channels() ), or +\f[\texttt{dst} (I) = \texttt{m} \cdot [ \texttt{src} (I); 1]\f] +(when m.cols=src.channels()+1 ) + +Every element of the N -channel array src is interpreted as N -element +vector that is transformed using the M x N or M x (N+1) matrix m to +M-element vector - the corresponding element of the output array dst . + +The function may be used for geometrical transformation of +N -dimensional points, arbitrary linear color space transformation (such +as various kinds of RGB to YUV transforms), shuffling the image +channels, and so forth. +@param src input array that must have as many channels (1 to 4) as +m.cols or m.cols-1. +@param dst output array of the same size and depth as src; it has as +many channels as m.rows. +@param m transformation 2x2 or 2x3 floating-point matrix. +@sa perspectiveTransform, getAffineTransform, estimateAffine2D, warpAffine, warpPerspective +*/ +CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m ); + +/** @brief Performs the perspective matrix transformation of vectors. + +The function cv::perspectiveTransform transforms every element of src by +treating it as a 2D or 3D vector, in the following way: +\f[(x, y, z) \rightarrow (x'/w, y'/w, z'/w)\f] +where +\f[(x', y', z', w') = \texttt{mat} \cdot \begin{bmatrix} x & y & z & 1 \end{bmatrix}\f] +and +\f[w = \fork{w'}{if \(w' \ne 0\)}{\infty}{otherwise}\f] + +Here a 3D vector transformation is shown. In case of a 2D vector +transformation, the z component is omitted. + +@note The function transforms a sparse set of 2D or 3D vectors. If you +want to transform an image using perspective transformation, use +warpPerspective . If you have an inverse problem, that is, you want to +compute the most probable perspective transformation out of several +pairs of corresponding points, you can use getPerspectiveTransform or +findHomography . +@param src input two-channel or three-channel floating-point array; each +element is a 2D/3D vector to be transformed. +@param dst output array of the same size and type as src. +@param m 3x3 or 4x4 floating-point transformation matrix. +@sa transform, warpPerspective, getPerspectiveTransform, findHomography +*/ +CV_EXPORTS_W void perspectiveTransform(InputArray src, OutputArray dst, InputArray m ); + +/** @brief Copies the lower or the upper half of a square matrix to its another half. + +The function cv::completeSymm copies the lower or the upper half of a square matrix to +its another half. The matrix diagonal remains unchanged: + - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i > j\f$ if + lowerToUpper=false + - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i < j\f$ if + lowerToUpper=true + +@param m input-output floating-point square matrix. +@param lowerToUpper operation flag; if true, the lower half is copied to +the upper half. Otherwise, the upper half is copied to the lower half. +@sa flip, transpose +*/ +CV_EXPORTS_W void completeSymm(InputOutputArray m, bool lowerToUpper = false); + +/** @brief Initializes a scaled identity matrix. + +The function cv::setIdentity initializes a scaled identity matrix: +\f[\texttt{mtx} (i,j)= \fork{\texttt{value}}{ if \(i=j\)}{0}{otherwise}\f] + +The function can also be emulated using the matrix initializers and the +matrix expressions: +@code + Mat A = Mat::eye(4, 3, CV_32F)*5; + // A will be set to [[5, 0, 0], [0, 5, 0], [0, 0, 5], [0, 0, 0]] +@endcode +@param mtx matrix to initialize (not necessarily square). +@param s value to assign to diagonal elements. +@sa Mat::zeros, Mat::ones, Mat::setTo, Mat::operator= +*/ +CV_EXPORTS_W void setIdentity(InputOutputArray mtx, const Scalar& s = Scalar(1)); + +/** @brief Returns the determinant of a square floating-point matrix. + +The function cv::determinant calculates and returns the determinant of the +specified matrix. For small matrices ( mtx.cols=mtx.rows\<=3 ), the +direct method is used. For larger matrices, the function uses LU +factorization with partial pivoting. + +For symmetric positively-determined matrices, it is also possible to use +eigen decomposition to calculate the determinant. +@param mtx input matrix that must have CV_32FC1 or CV_64FC1 type and +square size. +@sa trace, invert, solve, eigen, @ref MatrixExpressions +*/ +CV_EXPORTS_W double determinant(InputArray mtx); + +/** @brief Returns the trace of a matrix. + +The function cv::trace returns the sum of the diagonal elements of the +matrix mtx . +\f[\mathrm{tr} ( \texttt{mtx} ) = \sum _i \texttt{mtx} (i,i)\f] +@param mtx input matrix. +*/ +CV_EXPORTS_W Scalar trace(InputArray mtx); + +/** @brief Finds the inverse or pseudo-inverse of a matrix. + +The function cv::invert inverts the matrix src and stores the result in dst +. When the matrix src is singular or non-square, the function calculates +the pseudo-inverse matrix (the dst matrix) so that norm(src\*dst - I) is +minimal, where I is an identity matrix. + +In case of the #DECOMP_LU method, the function returns non-zero value if +the inverse has been successfully calculated and 0 if src is singular. + +In case of the #DECOMP_SVD method, the function returns the inverse +condition number of src (the ratio of the smallest singular value to the +largest singular value) and 0 if src is singular. The SVD method +calculates a pseudo-inverse matrix if src is singular. + +Similarly to #DECOMP_LU, the method #DECOMP_CHOLESKY works only with +non-singular square matrices that should also be symmetrical and +positively defined. In this case, the function stores the inverted +matrix in dst and returns non-zero. Otherwise, it returns 0. + +@param src input floating-point M x N matrix. +@param dst output matrix of N x M size and the same type as src. +@param flags inversion method (cv::DecompTypes) +@sa solve, SVD +*/ +CV_EXPORTS_W double invert(InputArray src, OutputArray dst, int flags = DECOMP_LU); + +/** @brief Solves one or more linear systems or least-squares problems. + +The function cv::solve solves a linear system or least-squares problem (the +latter is possible with SVD or QR methods, or by specifying the flag +#DECOMP_NORMAL ): +\f[\texttt{dst} = \arg \min _X \| \texttt{src1} \cdot \texttt{X} - \texttt{src2} \|\f] + +If #DECOMP_LU or #DECOMP_CHOLESKY method is used, the function returns 1 +if src1 (or \f$\texttt{src1}^T\texttt{src1}\f$ ) is non-singular. Otherwise, +it returns 0. In the latter case, dst is not valid. Other methods find a +pseudo-solution in case of a singular left-hand side part. + +@note If you want to find a unity-norm solution of an under-defined +singular system \f$\texttt{src1}\cdot\texttt{dst}=0\f$ , the function solve +will not do the work. Use SVD::solveZ instead. + +@param src1 input matrix on the left-hand side of the system. +@param src2 input matrix on the right-hand side of the system. +@param dst output solution. +@param flags solution (matrix inversion) method (#DecompTypes) +@sa invert, SVD, eigen +*/ +CV_EXPORTS_W bool solve(InputArray src1, InputArray src2, + OutputArray dst, int flags = DECOMP_LU); + +/** @brief Sorts each row or each column of a matrix. + +The function cv::sort sorts each matrix row or each matrix column in +ascending or descending order. So you should pass two operation flags to +get desired behaviour. If you want to sort matrix rows or columns +lexicographically, you can use STL std::sort generic function with the +proper comparison predicate. + +@param src input single-channel array. +@param dst output array of the same size and type as src. +@param flags operation flags, a combination of #SortFlags +@sa sortIdx, randShuffle +*/ +CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags); + +/** @brief Sorts each row or each column of a matrix. + +The function cv::sortIdx sorts each matrix row or each matrix column in the +ascending or descending order. So you should pass two operation flags to +get desired behaviour. Instead of reordering the elements themselves, it +stores the indices of sorted elements in the output array. For example: +@code + Mat A = Mat::eye(3,3,CV_32F), B; + sortIdx(A, B, SORT_EVERY_ROW + SORT_ASCENDING); + // B will probably contain + // (because of equal elements in A some permutations are possible): + // [[1, 2, 0], [0, 2, 1], [0, 1, 2]] +@endcode +@param src input single-channel array. +@param dst output integer array of the same size as src. +@param flags operation flags that could be a combination of cv::SortFlags +@sa sort, randShuffle +*/ +CV_EXPORTS_W void sortIdx(InputArray src, OutputArray dst, int flags); + +/** @brief Finds the real roots of a cubic equation. + +The function solveCubic finds the real roots of a cubic equation: +- if coeffs is a 4-element vector: +\f[\texttt{coeffs} [0] x^3 + \texttt{coeffs} [1] x^2 + \texttt{coeffs} [2] x + \texttt{coeffs} [3] = 0\f] +- if coeffs is a 3-element vector: +\f[x^3 + \texttt{coeffs} [0] x^2 + \texttt{coeffs} [1] x + \texttt{coeffs} [2] = 0\f] + +The roots are stored in the roots array. +@param coeffs equation coefficients, an array of 3 or 4 elements. +@param roots output array of real roots that has 1 or 3 elements. +@return number of real roots. It can be 0, 1 or 2. +*/ +CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots); + +/** @brief Finds the real or complex roots of a polynomial equation. + +The function cv::solvePoly finds real and complex roots of a polynomial equation: +\f[\texttt{coeffs} [n] x^{n} + \texttt{coeffs} [n-1] x^{n-1} + ... + \texttt{coeffs} [1] x + \texttt{coeffs} [0] = 0\f] +@param coeffs array of polynomial coefficients. +@param roots output (complex) array of roots. +@param maxIters maximum number of iterations the algorithm does. +*/ +CV_EXPORTS_W double solvePoly(InputArray coeffs, OutputArray roots, int maxIters = 300); + +/** @brief Calculates eigenvalues and eigenvectors of a symmetric matrix. + +The function cv::eigen calculates just eigenvalues, or eigenvalues and eigenvectors of the symmetric +matrix src: +@code + src*eigenvectors.row(i).t() = eigenvalues.at(i)*eigenvectors.row(i).t() +@endcode + +@note Use cv::eigenNonSymmetric for calculation of real eigenvalues and eigenvectors of non-symmetric matrix. + +@param src input matrix that must have CV_32FC1 or CV_64FC1 type, square size and be symmetrical +(src ^T^ == src). +@param eigenvalues output vector of eigenvalues of the same type as src; the eigenvalues are stored +in the descending order. +@param eigenvectors output matrix of eigenvectors; it has the same size and type as src; the +eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding +eigenvalues. +@sa eigenNonSymmetric, completeSymm , PCA +*/ +CV_EXPORTS_W bool eigen(InputArray src, OutputArray eigenvalues, + OutputArray eigenvectors = noArray()); + +/** @brief Calculates eigenvalues and eigenvectors of a non-symmetric matrix (real eigenvalues only). + +@note Assumes real eigenvalues. + +The function calculates eigenvalues and eigenvectors (optional) of the square matrix src: +@code + src*eigenvectors.row(i).t() = eigenvalues.at(i)*eigenvectors.row(i).t() +@endcode + +@param src input matrix (CV_32FC1 or CV_64FC1 type). +@param eigenvalues output vector of eigenvalues (type is the same type as src). +@param eigenvectors output matrix of eigenvectors (type is the same type as src). The eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues. +@sa eigen +*/ +CV_EXPORTS_W void eigenNonSymmetric(InputArray src, OutputArray eigenvalues, + OutputArray eigenvectors); + +/** @brief Calculates the covariance matrix of a set of vectors. + +The function cv::calcCovarMatrix calculates the covariance matrix and, optionally, the mean vector of +the set of input vectors. +@param samples samples stored as separate matrices +@param nsamples number of samples +@param covar output covariance matrix of the type ctype and square size. +@param mean input or output (depending on the flags) array as the average value of the input vectors. +@param flags operation flags as a combination of #CovarFlags +@param ctype type of the matrixl; it equals 'CV_64F' by default. +@sa PCA, mulTransposed, Mahalanobis +@todo InputArrayOfArrays +*/ +CV_EXPORTS void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, Mat& mean, + int flags, int ctype = CV_64F); + +/** @overload +@note use #COVAR_ROWS or #COVAR_COLS flag +@param samples samples stored as rows/columns of a single matrix. +@param covar output covariance matrix of the type ctype and square size. +@param mean input or output (depending on the flags) array as the average value of the input vectors. +@param flags operation flags as a combination of #CovarFlags +@param ctype type of the matrixl; it equals 'CV_64F' by default. +*/ +CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar, + InputOutputArray mean, int flags, int ctype = CV_64F); + +/** wrap PCA::operator() */ +CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, int maxComponents = 0); + +/** wrap PCA::operator() and add eigenvalues output parameter */ +CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, OutputArray eigenvalues, + int maxComponents = 0); + +/** wrap PCA::operator() */ +CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, double retainedVariance); + +/** wrap PCA::operator() and add eigenvalues output parameter */ +CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, OutputArray eigenvalues, + double retainedVariance); + +/** wrap PCA::project */ +CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean, + InputArray eigenvectors, OutputArray result); + +/** wrap PCA::backProject */ +CV_EXPORTS_W void PCABackProject(InputArray data, InputArray mean, + InputArray eigenvectors, OutputArray result); + +/** wrap SVD::compute */ +CV_EXPORTS_W void SVDecomp( InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags = 0 ); + +/** wrap SVD::backSubst */ +CV_EXPORTS_W void SVBackSubst( InputArray w, InputArray u, InputArray vt, + InputArray rhs, OutputArray dst ); + +/** @brief Calculates the Mahalanobis distance between two vectors. + +The function cv::Mahalanobis calculates and returns the weighted distance between two vectors: +\f[d( \texttt{vec1} , \texttt{vec2} )= \sqrt{\sum_{i,j}{\texttt{icovar(i,j)}\cdot(\texttt{vec1}(I)-\texttt{vec2}(I))\cdot(\texttt{vec1(j)}-\texttt{vec2(j)})} }\f] +The covariance matrix may be calculated using the #calcCovarMatrix function and then inverted using +the invert function (preferably using the #DECOMP_SVD method, as the most accurate). +@param v1 first 1D input vector. +@param v2 second 1D input vector. +@param icovar inverse covariance matrix. +*/ +CV_EXPORTS_W double Mahalanobis(InputArray v1, InputArray v2, InputArray icovar); + +/** @brief Performs a forward or inverse Discrete Fourier transform of a 1D or 2D floating-point array. + +The function cv::dft performs one of the following: +- Forward the Fourier transform of a 1D vector of N elements: + \f[Y = F^{(N)} \cdot X,\f] + where \f$F^{(N)}_{jk}=\exp(-2\pi i j k/N)\f$ and \f$i=\sqrt{-1}\f$ +- Inverse the Fourier transform of a 1D vector of N elements: + \f[\begin{array}{l} X'= \left (F^{(N)} \right )^{-1} \cdot Y = \left (F^{(N)} \right )^* \cdot y \\ X = (1/N) \cdot X, \end{array}\f] + where \f$F^*=\left(\textrm{Re}(F^{(N)})-\textrm{Im}(F^{(N)})\right)^T\f$ +- Forward the 2D Fourier transform of a M x N matrix: + \f[Y = F^{(M)} \cdot X \cdot F^{(N)}\f] +- Inverse the 2D Fourier transform of a M x N matrix: + \f[\begin{array}{l} X'= \left (F^{(M)} \right )^* \cdot Y \cdot \left (F^{(N)} \right )^* \\ X = \frac{1}{M \cdot N} \cdot X' \end{array}\f] + +In case of real (single-channel) data, the output spectrum of the forward Fourier transform or input +spectrum of the inverse Fourier transform can be represented in a packed format called *CCS* +(complex-conjugate-symmetrical). It was borrowed from IPL (Intel\* Image Processing Library). Here +is how 2D *CCS* spectrum looks: +\f[\begin{bmatrix} Re Y_{0,0} & Re Y_{0,1} & Im Y_{0,1} & Re Y_{0,2} & Im Y_{0,2} & \cdots & Re Y_{0,N/2-1} & Im Y_{0,N/2-1} & Re Y_{0,N/2} \\ Re Y_{1,0} & Re Y_{1,1} & Im Y_{1,1} & Re Y_{1,2} & Im Y_{1,2} & \cdots & Re Y_{1,N/2-1} & Im Y_{1,N/2-1} & Re Y_{1,N/2} \\ Im Y_{1,0} & Re Y_{2,1} & Im Y_{2,1} & Re Y_{2,2} & Im Y_{2,2} & \cdots & Re Y_{2,N/2-1} & Im Y_{2,N/2-1} & Im Y_{1,N/2} \\ \hdotsfor{9} \\ Re Y_{M/2-1,0} & Re Y_{M-3,1} & Im Y_{M-3,1} & \hdotsfor{3} & Re Y_{M-3,N/2-1} & Im Y_{M-3,N/2-1}& Re Y_{M/2-1,N/2} \\ Im Y_{M/2-1,0} & Re Y_{M-2,1} & Im Y_{M-2,1} & \hdotsfor{3} & Re Y_{M-2,N/2-1} & Im Y_{M-2,N/2-1}& Im Y_{M/2-1,N/2} \\ Re Y_{M/2,0} & Re Y_{M-1,1} & Im Y_{M-1,1} & \hdotsfor{3} & Re Y_{M-1,N/2-1} & Im Y_{M-1,N/2-1}& Re Y_{M/2,N/2} \end{bmatrix}\f] + +In case of 1D transform of a real vector, the output looks like the first row of the matrix above. + +So, the function chooses an operation mode depending on the flags and size of the input array: +- If #DFT_ROWS is set or the input array has a single row or single column, the function + performs a 1D forward or inverse transform of each row of a matrix when #DFT_ROWS is set. + Otherwise, it performs a 2D transform. +- If the input array is real and #DFT_INVERSE is not set, the function performs a forward 1D or + 2D transform: + - When #DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as + input. + - When #DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as + input. In case of 2D transform, it uses the packed format as shown above. In case of a + single 1D transform, it looks like the first row of the matrix above. In case of + multiple 1D transforms (when using the #DFT_ROWS flag), each row of the output matrix + looks like the first row of the matrix above. +- If the input array is complex and either #DFT_INVERSE or #DFT_REAL_OUTPUT are not set, the + output is a complex array of the same size as input. The function performs a forward or + inverse 1D or 2D transform of the whole input array or each row of the input array + independently, depending on the flags DFT_INVERSE and DFT_ROWS. +- When #DFT_INVERSE is set and the input array is real, or it is complex but #DFT_REAL_OUTPUT + is set, the output is a real array of the same size as input. The function performs a 1D or 2D + inverse transformation of the whole input array or each individual row, depending on the flags + #DFT_INVERSE and #DFT_ROWS. + +If #DFT_SCALE is set, the scaling is done after the transformation. + +Unlike dct , the function supports arrays of arbitrary size. But only those arrays are processed +efficiently, whose sizes can be factorized in a product of small prime numbers (2, 3, and 5 in the +current implementation). Such an efficient DFT size can be calculated using the getOptimalDFTSize +method. + +The sample below illustrates how to calculate a DFT-based convolution of two 2D real arrays: +@code + void convolveDFT(InputArray A, InputArray B, OutputArray C) + { + // reallocate the output array if needed + C.create(abs(A.rows - B.rows)+1, abs(A.cols - B.cols)+1, A.type()); + Size dftSize; + // calculate the size of DFT transform + dftSize.width = getOptimalDFTSize(A.cols + B.cols - 1); + dftSize.height = getOptimalDFTSize(A.rows + B.rows - 1); + + // allocate temporary buffers and initialize them with 0's + Mat tempA(dftSize, A.type(), Scalar::all(0)); + Mat tempB(dftSize, B.type(), Scalar::all(0)); + + // copy A and B to the top-left corners of tempA and tempB, respectively + Mat roiA(tempA, Rect(0,0,A.cols,A.rows)); + A.copyTo(roiA); + Mat roiB(tempB, Rect(0,0,B.cols,B.rows)); + B.copyTo(roiB); + + // now transform the padded A & B in-place; + // use "nonzeroRows" hint for faster processing + dft(tempA, tempA, 0, A.rows); + dft(tempB, tempB, 0, B.rows); + + // multiply the spectrums; + // the function handles packed spectrum representations well + mulSpectrums(tempA, tempB, tempA); + + // transform the product back from the frequency domain. + // Even though all the result rows will be non-zero, + // you need only the first C.rows of them, and thus you + // pass nonzeroRows == C.rows + dft(tempA, tempA, DFT_INVERSE + DFT_SCALE, C.rows); + + // now copy the result back to C. + tempA(Rect(0, 0, C.cols, C.rows)).copyTo(C); + + // all the temporary buffers will be deallocated automatically + } +@endcode +To optimize this sample, consider the following approaches: +- Since nonzeroRows != 0 is passed to the forward transform calls and since A and B are copied to + the top-left corners of tempA and tempB, respectively, it is not necessary to clear the whole + tempA and tempB. It is only necessary to clear the tempA.cols - A.cols ( tempB.cols - B.cols) + rightmost columns of the matrices. +- This DFT-based convolution does not have to be applied to the whole big arrays, especially if B + is significantly smaller than A or vice versa. Instead, you can calculate convolution by parts. + To do this, you need to split the output array C into multiple tiles. For each tile, estimate + which parts of A and B are required to calculate convolution in this tile. If the tiles in C are + too small, the speed will decrease a lot because of repeated work. In the ultimate case, when + each tile in C is a single pixel, the algorithm becomes equivalent to the naive convolution + algorithm. If the tiles are too big, the temporary arrays tempA and tempB become too big and + there is also a slowdown because of bad cache locality. So, there is an optimal tile size + somewhere in the middle. +- If different tiles in C can be calculated in parallel and, thus, the convolution is done by + parts, the loop can be threaded. + +All of the above improvements have been implemented in #matchTemplate and #filter2D . Therefore, by +using them, you can get the performance even better than with the above theoretically optimal +implementation. Though, those two functions actually calculate cross-correlation, not convolution, +so you need to "flip" the second convolution operand B vertically and horizontally using flip . +@note +- An example using the discrete fourier transform can be found at + opencv_source_code/samples/cpp/dft.cpp +- (Python) An example using the dft functionality to perform Wiener deconvolution can be found + at opencv_source/samples/python/deconvolution.py +- (Python) An example rearranging the quadrants of a Fourier image can be found at + opencv_source/samples/python/dft.py +@param src input array that could be real or complex. +@param dst output array whose size and type depends on the flags . +@param flags transformation flags, representing a combination of the #DftFlags +@param nonzeroRows when the parameter is not zero, the function assumes that only the first +nonzeroRows rows of the input array (#DFT_INVERSE is not set) or only the first nonzeroRows of the +output array (#DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the +rows more efficiently and save some time; this technique is very useful for calculating array +cross-correlation or convolution using DFT. +@sa dct , getOptimalDFTSize , mulSpectrums, filter2D , matchTemplate , flip , cartToPolar , +magnitude , phase +*/ +CV_EXPORTS_W void dft(InputArray src, OutputArray dst, int flags = 0, int nonzeroRows = 0); + +/** @brief Calculates the inverse Discrete Fourier Transform of a 1D or 2D array. + +idft(src, dst, flags) is equivalent to dft(src, dst, flags | #DFT_INVERSE) . +@note None of dft and idft scales the result by default. So, you should pass #DFT_SCALE to one of +dft or idft explicitly to make these transforms mutually inverse. +@sa dft, dct, idct, mulSpectrums, getOptimalDFTSize +@param src input floating-point real or complex array. +@param dst output array whose size and type depend on the flags. +@param flags operation flags (see dft and #DftFlags). +@param nonzeroRows number of dst rows to process; the rest of the rows have undefined content (see +the convolution sample in dft description. +*/ +CV_EXPORTS_W void idft(InputArray src, OutputArray dst, int flags = 0, int nonzeroRows = 0); + +/** @brief Performs a forward or inverse discrete Cosine transform of 1D or 2D array. + +The function cv::dct performs a forward or inverse discrete Cosine transform (DCT) of a 1D or 2D +floating-point array: +- Forward Cosine transform of a 1D vector of N elements: + \f[Y = C^{(N)} \cdot X\f] + where + \f[C^{(N)}_{jk}= \sqrt{\alpha_j/N} \cos \left ( \frac{\pi(2k+1)j}{2N} \right )\f] + and + \f$\alpha_0=1\f$, \f$\alpha_j=2\f$ for *j \> 0*. +- Inverse Cosine transform of a 1D vector of N elements: + \f[X = \left (C^{(N)} \right )^{-1} \cdot Y = \left (C^{(N)} \right )^T \cdot Y\f] + (since \f$C^{(N)}\f$ is an orthogonal matrix, \f$C^{(N)} \cdot \left(C^{(N)}\right)^T = I\f$ ) +- Forward 2D Cosine transform of M x N matrix: + \f[Y = C^{(N)} \cdot X \cdot \left (C^{(N)} \right )^T\f] +- Inverse 2D Cosine transform of M x N matrix: + \f[X = \left (C^{(N)} \right )^T \cdot X \cdot C^{(N)}\f] + +The function chooses the mode of operation by looking at the flags and size of the input array: +- If (flags & #DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it + is an inverse 1D or 2D transform. +- If (flags & #DCT_ROWS) != 0 , the function performs a 1D transform of each row. +- If the array is a single column or a single row, the function performs a 1D transform. +- If none of the above is true, the function performs a 2D transform. + +@note Currently dct supports even-size arrays (2, 4, 6 ...). For data analysis and approximation, you +can pad the array when necessary. +Also, the function performance depends very much, and not monotonically, on the array size (see +getOptimalDFTSize ). In the current implementation DCT of a vector of size N is calculated via DFT +of a vector of size N/2 . Thus, the optimal DCT size N1 \>= N can be calculated as: +@code + size_t getOptimalDCTSize(size_t N) { return 2*getOptimalDFTSize((N+1)/2); } + N1 = getOptimalDCTSize(N); +@endcode +@param src input floating-point array. +@param dst output array of the same size and type as src . +@param flags transformation flags as a combination of cv::DftFlags (DCT_*) +@sa dft , getOptimalDFTSize , idct +*/ +CV_EXPORTS_W void dct(InputArray src, OutputArray dst, int flags = 0); + +/** @brief Calculates the inverse Discrete Cosine Transform of a 1D or 2D array. + +idct(src, dst, flags) is equivalent to dct(src, dst, flags | DCT_INVERSE). +@param src input floating-point single-channel array. +@param dst output array of the same size and type as src. +@param flags operation flags. +@sa dct, dft, idft, getOptimalDFTSize +*/ +CV_EXPORTS_W void idct(InputArray src, OutputArray dst, int flags = 0); + +/** @brief Performs the per-element multiplication of two Fourier spectrums. + +The function cv::mulSpectrums performs the per-element multiplication of the two CCS-packed or complex +matrices that are results of a real or complex Fourier transform. + +The function, together with dft and idft , may be used to calculate convolution (pass conjB=false ) +or correlation (pass conjB=true ) of two arrays rapidly. When the arrays are complex, they are +simply multiplied (per element) with an optional conjugation of the second-array elements. When the +arrays are real, they are assumed to be CCS-packed (see dft for details). +@param a first input array. +@param b second input array of the same size and type as src1 . +@param c output array of the same size and type as src1 . +@param flags operation flags; currently, the only supported flag is cv::DFT_ROWS, which indicates that +each row of src1 and src2 is an independent 1D Fourier spectrum. If you do not want to use this flag, then simply add a `0` as value. +@param conjB optional flag that conjugates the second input array before the multiplication (true) +or not (false). +*/ +CV_EXPORTS_W void mulSpectrums(InputArray a, InputArray b, OutputArray c, + int flags, bool conjB = false); + +/** @brief Returns the optimal DFT size for a given vector size. + +DFT performance is not a monotonic function of a vector size. Therefore, when you calculate +convolution of two arrays or perform the spectral analysis of an array, it usually makes sense to +pad the input data with zeros to get a bit larger array that can be transformed much faster than the +original one. Arrays whose size is a power-of-two (2, 4, 8, 16, 32, ...) are the fastest to process. +Though, the arrays whose size is a product of 2's, 3's, and 5's (for example, 300 = 5\*5\*3\*2\*2) +are also processed quite efficiently. + +The function cv::getOptimalDFTSize returns the minimum number N that is greater than or equal to vecsize +so that the DFT of a vector of size N can be processed efficiently. In the current implementation N += 2 ^p^ \* 3 ^q^ \* 5 ^r^ for some integer p, q, r. + +The function returns a negative number if vecsize is too large (very close to INT_MAX ). + +While the function cannot be used directly to estimate the optimal vector size for DCT transform +(since the current DCT implementation supports only even-size vectors), it can be easily processed +as getOptimalDFTSize((vecsize+1)/2)\*2. +@param vecsize vector size. +@sa dft , dct , idft , idct , mulSpectrums +*/ +CV_EXPORTS_W int getOptimalDFTSize(int vecsize); + +/** @brief Returns the default random number generator. + +The function cv::theRNG returns the default random number generator. For each thread, there is a +separate random number generator, so you can use the function safely in multi-thread environments. +If you just need to get a single random number using this generator or initialize an array, you can +use randu or randn instead. But if you are going to generate many random numbers inside a loop, it +is much faster to use this function to retrieve the generator and then use RNG::operator _Tp() . +@sa RNG, randu, randn +*/ +CV_EXPORTS RNG& theRNG(); + +/** @brief Sets state of default random number generator. + +The function cv::setRNGSeed sets state of default random number generator to custom value. +@param seed new state for default random number generator +@sa RNG, randu, randn +*/ +CV_EXPORTS_W void setRNGSeed(int seed); + +/** @brief Generates a single uniformly-distributed random number or an array of random numbers. + +Non-template variant of the function fills the matrix dst with uniformly-distributed +random numbers from the specified range: +\f[\texttt{low} _c \leq \texttt{dst} (I)_c < \texttt{high} _c\f] +@param dst output array of random numbers; the array must be pre-allocated. +@param low inclusive lower boundary of the generated random numbers. +@param high exclusive upper boundary of the generated random numbers. +@sa RNG, randn, theRNG +*/ +CV_EXPORTS_W void randu(InputOutputArray dst, InputArray low, InputArray high); + +/** @brief Fills the array with normally distributed random numbers. + +The function cv::randn fills the matrix dst with normally distributed random numbers with the specified +mean vector and the standard deviation matrix. The generated random numbers are clipped to fit the +value range of the output array data type. +@param dst output array of random numbers; the array must be pre-allocated and have 1 to 4 channels. +@param mean mean value (expectation) of the generated random numbers. +@param stddev standard deviation of the generated random numbers; it can be either a vector (in +which case a diagonal standard deviation matrix is assumed) or a square matrix. +@sa RNG, randu +*/ +CV_EXPORTS_W void randn(InputOutputArray dst, InputArray mean, InputArray stddev); + +/** @brief Shuffles the array elements randomly. + +The function cv::randShuffle shuffles the specified 1D array by randomly choosing pairs of elements and +swapping them. The number of such swap operations will be dst.rows\*dst.cols\*iterFactor . +@param dst input/output numerical 1D array. +@param iterFactor scale factor that determines the number of random swap operations (see the details +below). +@param rng optional random number generator used for shuffling; if it is zero, theRNG () is used +instead. +@sa RNG, sort +*/ +CV_EXPORTS_W void randShuffle(InputOutputArray dst, double iterFactor = 1., RNG* rng = 0); + +/** @brief Principal Component Analysis + +The class is used to calculate a special basis for a set of vectors. The +basis will consist of eigenvectors of the covariance matrix calculated +from the input set of vectors. The class %PCA can also transform +vectors to/from the new coordinate space defined by the basis. Usually, +in this new coordinate system, each vector from the original set (and +any linear combination of such vectors) can be quite accurately +approximated by taking its first few components, corresponding to the +eigenvectors of the largest eigenvalues of the covariance matrix. +Geometrically it means that you calculate a projection of the vector to +a subspace formed by a few eigenvectors corresponding to the dominant +eigenvalues of the covariance matrix. And usually such a projection is +very close to the original vector. So, you can represent the original +vector from a high-dimensional space with a much shorter vector +consisting of the projected vector's coordinates in the subspace. Such a +transformation is also known as Karhunen-Loeve Transform, or KLT. +See http://en.wikipedia.org/wiki/Principal_component_analysis + +The sample below is the function that takes two matrices. The first +function stores a set of vectors (a row per vector) that is used to +calculate PCA. The second function stores another "test" set of vectors +(a row per vector). First, these vectors are compressed with PCA, then +reconstructed back, and then the reconstruction error norm is computed +and printed for each vector. : + +@code{.cpp} +using namespace cv; + +PCA compressPCA(const Mat& pcaset, int maxComponents, + const Mat& testset, Mat& compressed) +{ + PCA pca(pcaset, // pass the data + Mat(), // we do not have a pre-computed mean vector, + // so let the PCA engine to compute it + PCA::DATA_AS_ROW, // indicate that the vectors + // are stored as matrix rows + // (use PCA::DATA_AS_COL if the vectors are + // the matrix columns) + maxComponents // specify, how many principal components to retain + ); + // if there is no test data, just return the computed basis, ready-to-use + if( !testset.data ) + return pca; + CV_Assert( testset.cols == pcaset.cols ); + + compressed.create(testset.rows, maxComponents, testset.type()); + + Mat reconstructed; + for( int i = 0; i < testset.rows; i++ ) + { + Mat vec = testset.row(i), coeffs = compressed.row(i), reconstructed; + // compress the vector, the result will be stored + // in the i-th row of the output matrix + pca.project(vec, coeffs); + // and then reconstruct it + pca.backProject(coeffs, reconstructed); + // and measure the error + printf("%d. diff = %g\n", i, norm(vec, reconstructed, NORM_L2)); + } + return pca; +} +@endcode +@sa calcCovarMatrix, mulTransposed, SVD, dft, dct +*/ +class CV_EXPORTS PCA +{ +public: + enum Flags { DATA_AS_ROW = 0, //!< indicates that the input samples are stored as matrix rows + DATA_AS_COL = 1, //!< indicates that the input samples are stored as matrix columns + USE_AVG = 2 //! + }; + + /** @brief default constructor + + The default constructor initializes an empty %PCA structure. The other + constructors initialize the structure and call PCA::operator()(). + */ + PCA(); + + /** @overload + @param data input samples stored as matrix rows or matrix columns. + @param mean optional mean value; if the matrix is empty (@c noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout (PCA::Flags) + @param maxComponents maximum number of components that %PCA should + retain; by default, all the components are retained. + */ + PCA(InputArray data, InputArray mean, int flags, int maxComponents = 0); + + /** @overload + @param data input samples stored as matrix rows or matrix columns. + @param mean optional mean value; if the matrix is empty (noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout (PCA::Flags) + @param retainedVariance Percentage of variance that PCA should retain. + Using this parameter will let the PCA decided how many components to + retain but it will always keep at least 2. + */ + PCA(InputArray data, InputArray mean, int flags, double retainedVariance); + + /** @brief performs %PCA + + The operator performs %PCA of the supplied dataset. It is safe to reuse + the same PCA structure for multiple datasets. That is, if the structure + has been previously used with another dataset, the existing internal + data is reclaimed and the new @ref eigenvalues, @ref eigenvectors and @ref + mean are allocated and computed. + + The computed @ref eigenvalues are sorted from the largest to the smallest and + the corresponding @ref eigenvectors are stored as eigenvectors rows. + + @param data input samples stored as the matrix rows or as the matrix + columns. + @param mean optional mean value; if the matrix is empty (noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout. (Flags) + @param maxComponents maximum number of components that PCA should + retain; by default, all the components are retained. + */ + PCA& operator()(InputArray data, InputArray mean, int flags, int maxComponents = 0); + + /** @overload + @param data input samples stored as the matrix rows or as the matrix + columns. + @param mean optional mean value; if the matrix is empty (noArray()), + the mean is computed from the data. + @param flags operation flags; currently the parameter is only used to + specify the data layout. (PCA::Flags) + @param retainedVariance Percentage of variance that %PCA should retain. + Using this parameter will let the %PCA decided how many components to + retain but it will always keep at least 2. + */ + PCA& operator()(InputArray data, InputArray mean, int flags, double retainedVariance); + + /** @brief Projects vector(s) to the principal component subspace. + + The methods project one or more vectors to the principal component + subspace, where each vector projection is represented by coefficients in + the principal component basis. The first form of the method returns the + matrix that the second form writes to the result. So the first form can + be used as a part of expression while the second form can be more + efficient in a processing loop. + @param vec input vector(s); must have the same dimensionality and the + same layout as the input data used at %PCA phase, that is, if + DATA_AS_ROW are specified, then `vec.cols==data.cols` + (vector dimensionality) and `vec.rows` is the number of vectors to + project, and the same is true for the PCA::DATA_AS_COL case. + */ + Mat project(InputArray vec) const; + + /** @overload + @param vec input vector(s); must have the same dimensionality and the + same layout as the input data used at PCA phase, that is, if + DATA_AS_ROW are specified, then `vec.cols==data.cols` + (vector dimensionality) and `vec.rows` is the number of vectors to + project, and the same is true for the PCA::DATA_AS_COL case. + @param result output vectors; in case of PCA::DATA_AS_COL, the + output matrix has as many columns as the number of input vectors, this + means that `result.cols==vec.cols` and the number of rows match the + number of principal components (for example, `maxComponents` parameter + passed to the constructor). + */ + void project(InputArray vec, OutputArray result) const; + + /** @brief Reconstructs vectors from their PC projections. + + The methods are inverse operations to PCA::project. They take PC + coordinates of projected vectors and reconstruct the original vectors. + Unless all the principal components have been retained, the + reconstructed vectors are different from the originals. But typically, + the difference is small if the number of components is large enough (but + still much smaller than the original vector dimensionality). As a + result, PCA is used. + @param vec coordinates of the vectors in the principal component + subspace, the layout and size are the same as of PCA::project output + vectors. + */ + Mat backProject(InputArray vec) const; + + /** @overload + @param vec coordinates of the vectors in the principal component + subspace, the layout and size are the same as of PCA::project output + vectors. + @param result reconstructed vectors; the layout and size are the same as + of PCA::project input vectors. + */ + void backProject(InputArray vec, OutputArray result) const; + + /** @brief write PCA objects + + Writes @ref eigenvalues @ref eigenvectors and @ref mean to specified FileStorage + */ + void write(FileStorage& fs) const; + + /** @brief load PCA objects + + Loads @ref eigenvalues @ref eigenvectors and @ref mean from specified FileNode + */ + void read(const FileNode& fn); + + Mat eigenvectors; //!< eigenvectors of the covariation matrix + Mat eigenvalues; //!< eigenvalues of the covariation matrix + Mat mean; //!< mean value subtracted before the projection and added after the back projection +}; + +/** @example samples/cpp/pca.cpp +An example using %PCA for dimensionality reduction while maintaining an amount of variance +*/ + +/** @example samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp +Check @ref tutorial_introduction_to_pca "the corresponding tutorial" for more details +*/ + +/** +@brief Linear Discriminant Analysis +@todo document this class +*/ +class CV_EXPORTS LDA +{ +public: + /** @brief constructor + Initializes a LDA with num_components (default 0). + */ + explicit LDA(int num_components = 0); + + /** Initializes and performs a Discriminant Analysis with Fisher's + Optimization Criterion on given data in src and corresponding labels + in labels. If 0 (or less) number of components are given, they are + automatically determined for given data in computation. + */ + LDA(InputArrayOfArrays src, InputArray labels, int num_components = 0); + + /** Serializes this object to a given filename. + */ + void save(const String& filename) const; + + /** Deserializes this object from a given filename. + */ + void load(const String& filename); + + /** Serializes this object to a given cv::FileStorage. + */ + void save(FileStorage& fs) const; + + /** Deserializes this object from a given cv::FileStorage. + */ + void load(const FileStorage& node); + + /** destructor + */ + ~LDA(); + + /** Compute the discriminants for data in src (row aligned) and labels. + */ + void compute(InputArrayOfArrays src, InputArray labels); + + /** Projects samples into the LDA subspace. + src may be one or more row aligned samples. + */ + Mat project(InputArray src); + + /** Reconstructs projections from the LDA subspace. + src may be one or more row aligned projections. + */ + Mat reconstruct(InputArray src); + + /** Returns the eigenvectors of this LDA. + */ + Mat eigenvectors() const { return _eigenvectors; } + + /** Returns the eigenvalues of this LDA. + */ + Mat eigenvalues() const { return _eigenvalues; } + + static Mat subspaceProject(InputArray W, InputArray mean, InputArray src); + static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src); + +protected: + int _num_components; + Mat _eigenvectors; + Mat _eigenvalues; + void lda(InputArrayOfArrays src, InputArray labels); +}; + +/** @brief Singular Value Decomposition + +Class for computing Singular Value Decomposition of a floating-point +matrix. The Singular Value Decomposition is used to solve least-square +problems, under-determined linear systems, invert matrices, compute +condition numbers, and so on. + +If you want to compute a condition number of a matrix or an absolute value of +its determinant, you do not need `u` and `vt`. You can pass +flags=SVD::NO_UV|... . Another flag SVD::FULL_UV indicates that full-size u +and vt must be computed, which is not necessary most of the time. + +@sa invert, solve, eigen, determinant +*/ +class CV_EXPORTS SVD +{ +public: + enum Flags { + /** allow the algorithm to modify the decomposed matrix; it can save space and speed up + processing. currently ignored. */ + MODIFY_A = 1, + /** indicates that only a vector of singular values `w` is to be processed, while u and vt + will be set to empty matrices */ + NO_UV = 2, + /** when the matrix is not square, by default the algorithm produces u and vt matrices of + sufficiently large size for the further A reconstruction; if, however, FULL_UV flag is + specified, u and vt will be full-size square orthogonal matrices.*/ + FULL_UV = 4 + }; + + /** @brief the default constructor + + initializes an empty SVD structure + */ + SVD(); + + /** @overload + initializes an empty SVD structure and then calls SVD::operator() + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param flags operation flags (SVD::Flags) + */ + SVD( InputArray src, int flags = 0 ); + + /** @brief the operator that performs SVD. The previously allocated u, w and vt are released. + + The operator performs the singular value decomposition of the supplied + matrix. The u,`vt` , and the vector of singular values w are stored in + the structure. The same SVD structure can be reused many times with + different matrices. Each time, if needed, the previous u,`vt` , and w + are reclaimed and the new matrices are created, which is all handled by + Mat::create. + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param flags operation flags (SVD::Flags) + */ + SVD& operator ()( InputArray src, int flags = 0 ); + + /** @brief decomposes matrix and stores the results to user-provided matrices + + The methods/functions perform SVD of matrix. Unlike SVD::SVD constructor + and SVD::operator(), they store the results to the user-provided + matrices: + + @code{.cpp} + Mat A, w, u, vt; + SVD::compute(A, w, u, vt); + @endcode + + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param w calculated singular values + @param u calculated left singular vectors + @param vt transposed matrix of right singular vectors + @param flags operation flags - see SVD::Flags. + */ + static void compute( InputArray src, OutputArray w, + OutputArray u, OutputArray vt, int flags = 0 ); + + /** @overload + computes singular values of a matrix + @param src decomposed matrix. The depth has to be CV_32F or CV_64F. + @param w calculated singular values + @param flags operation flags - see SVD::Flags. + */ + static void compute( InputArray src, OutputArray w, int flags = 0 ); + + /** @brief performs back substitution + */ + static void backSubst( InputArray w, InputArray u, + InputArray vt, InputArray rhs, + OutputArray dst ); + + /** @brief solves an under-determined singular linear system + + The method finds a unit-length solution x of a singular linear system + A\*x = 0. Depending on the rank of A, there can be no solutions, a + single solution or an infinite number of solutions. In general, the + algorithm solves the following problem: + \f[dst = \arg \min _{x: \| x \| =1} \| src \cdot x \|\f] + @param src left-hand-side matrix. + @param dst found solution. + */ + static void solveZ( InputArray src, OutputArray dst ); + + /** @brief performs a singular value back substitution. + + The method calculates a back substitution for the specified right-hand + side: + + \f[\texttt{x} = \texttt{vt} ^T \cdot diag( \texttt{w} )^{-1} \cdot \texttt{u} ^T \cdot \texttt{rhs} \sim \texttt{A} ^{-1} \cdot \texttt{rhs}\f] + + Using this technique you can either get a very accurate solution of the + convenient linear system, or the best (in the least-squares terms) + pseudo-solution of an overdetermined linear system. + + @param rhs right-hand side of a linear system (u\*w\*v')\*dst = rhs to + be solved, where A has been previously decomposed. + + @param dst found solution of the system. + + @note Explicit SVD with the further back substitution only makes sense + if you need to solve many linear systems with the same left-hand side + (for example, src ). If all you need is to solve a single system + (possibly with multiple rhs immediately available), simply call solve + add pass #DECOMP_SVD there. It does absolutely the same thing. + */ + void backSubst( InputArray rhs, OutputArray dst ) const; + + /** @todo document */ + template static + void compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt ); + + /** @todo document */ + template static + void compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w ); + + /** @todo document */ + template static + void backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u, const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs, Matx<_Tp, n, nb>& dst ); + + Mat u, w, vt; +}; + +/** @brief Random Number Generator + +Random number generator. It encapsulates the state (currently, a 64-bit +integer) and has methods to return scalar random values and to fill +arrays with random values. Currently it supports uniform and Gaussian +(normal) distributions. The generator uses Multiply-With-Carry +algorithm, introduced by G. Marsaglia ( + ). +Gaussian-distribution random numbers are generated using the Ziggurat +algorithm ( ), +introduced by G. Marsaglia and W. W. Tsang. +*/ +class CV_EXPORTS RNG +{ +public: + enum { UNIFORM = 0, + NORMAL = 1 + }; + + /** @brief constructor + + These are the RNG constructors. The first form sets the state to some + pre-defined value, equal to 2\*\*32-1 in the current implementation. The + second form sets the state to the specified value. If you passed state=0 + , the constructor uses the above default value instead to avoid the + singular random number sequence, consisting of all zeros. + */ + RNG(); + /** @overload + @param state 64-bit value used to initialize the RNG. + */ + RNG(uint64 state); + /**The method updates the state using the MWC algorithm and returns the + next 32-bit random number.*/ + unsigned next(); + + /**Each of the methods updates the state using the MWC algorithm and + returns the next random number of the specified type. In case of integer + types, the returned number is from the available value range for the + specified type. In case of floating-point types, the returned value is + from [0,1) range. + */ + operator uchar(); + /** @overload */ + operator schar(); + /** @overload */ + operator ushort(); + /** @overload */ + operator short(); + /** @overload */ + operator unsigned(); + /** @overload */ + operator int(); + /** @overload */ + operator float(); + /** @overload */ + operator double(); + + /** @brief returns a random integer sampled uniformly from [0, N). + + The methods transform the state using the MWC algorithm and return the + next random number. The first form is equivalent to RNG::next . The + second form returns the random number modulo N , which means that the + result is in the range [0, N) . + */ + unsigned operator ()(); + /** @overload + @param N upper non-inclusive boundary of the returned random number. + */ + unsigned operator ()(unsigned N); + + /** @brief returns uniformly distributed integer random number from [a,b) range + + The methods transform the state using the MWC algorithm and return the + next uniformly-distributed random number of the specified type, deduced + from the input parameter type, from the range [a, b) . There is a nuance + illustrated by the following sample: + + @code{.cpp} + RNG rng; + + // always produces 0 + double a = rng.uniform(0, 1); + + // produces double from [0, 1) + double a1 = rng.uniform((double)0, (double)1); + + // produces float from [0, 1) + float b = rng.uniform(0.f, 1.f); + + // produces double from [0, 1) + double c = rng.uniform(0., 1.); + + // may cause compiler error because of ambiguity: + // RNG::uniform(0, (int)0.999999)? or RNG::uniform((double)0, 0.99999)? + double d = rng.uniform(0, 0.999999); + @endcode + + The compiler does not take into account the type of the variable to + which you assign the result of RNG::uniform . The only thing that + matters to the compiler is the type of a and b parameters. So, if you + want a floating-point random number, but the range boundaries are + integer numbers, either put dots in the end, if they are constants, or + use explicit type cast operators, as in the a1 initialization above. + @param a lower inclusive boundary of the returned random number. + @param b upper non-inclusive boundary of the returned random number. + */ + int uniform(int a, int b); + /** @overload */ + float uniform(float a, float b); + /** @overload */ + double uniform(double a, double b); + + /** @brief Fills arrays with random numbers. + + @param mat 2D or N-dimensional matrix; currently matrices with more than + 4 channels are not supported by the methods, use Mat::reshape as a + possible workaround. + @param distType distribution type, RNG::UNIFORM or RNG::NORMAL. + @param a first distribution parameter; in case of the uniform + distribution, this is an inclusive lower boundary, in case of the normal + distribution, this is a mean value. + @param b second distribution parameter; in case of the uniform + distribution, this is a non-inclusive upper boundary, in case of the + normal distribution, this is a standard deviation (diagonal of the + standard deviation matrix or the full standard deviation matrix). + @param saturateRange pre-saturation flag; for uniform distribution only; + if true, the method will first convert a and b to the acceptable value + range (according to the mat datatype) and then will generate uniformly + distributed random numbers within the range [saturate(a), saturate(b)), + if saturateRange=false, the method will generate uniformly distributed + random numbers in the original range [a, b) and then will saturate them, + it means, for example, that + theRNG().fill(mat_8u, RNG::UNIFORM, -DBL_MAX, DBL_MAX) will likely + produce array mostly filled with 0's and 255's, since the range (0, 255) + is significantly smaller than [-DBL_MAX, DBL_MAX). + + Each of the methods fills the matrix with the random values from the + specified distribution. As the new numbers are generated, the RNG state + is updated accordingly. In case of multiple-channel images, every + channel is filled independently, which means that RNG cannot generate + samples from the multi-dimensional Gaussian distribution with + non-diagonal covariance matrix directly. To do that, the method + generates samples from multi-dimensional standard Gaussian distribution + with zero mean and identity covariation matrix, and then transforms them + using transform to get samples from the specified Gaussian distribution. + */ + void fill( InputOutputArray mat, int distType, InputArray a, InputArray b, bool saturateRange = false ); + + /** @brief Returns the next random number sampled from the Gaussian distribution + @param sigma standard deviation of the distribution. + + The method transforms the state using the MWC algorithm and returns the + next random number from the Gaussian distribution N(0,sigma) . That is, + the mean value of the returned random numbers is zero and the standard + deviation is the specified sigma . + */ + double gaussian(double sigma); + + uint64 state; + + bool operator ==(const RNG& other) const; +}; + +/** @brief Mersenne Twister random number generator + +Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c +@todo document +*/ +class CV_EXPORTS RNG_MT19937 +{ +public: + RNG_MT19937(); + RNG_MT19937(unsigned s); + void seed(unsigned s); + + unsigned next(); + + operator int(); + operator unsigned(); + operator float(); + operator double(); + + unsigned operator ()(unsigned N); + unsigned operator ()(); + + /** @brief returns uniformly distributed integer random number from [a,b) range*/ + int uniform(int a, int b); + /** @brief returns uniformly distributed floating-point random number from [a,b) range*/ + float uniform(float a, float b); + /** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range*/ + double uniform(double a, double b); + +private: + enum PeriodParameters {N = 624, M = 397}; + unsigned state[N]; + int mti; +}; + +//! @} core_array + +//! @addtogroup core_cluster +//! @{ + +/** @example samples/cpp/kmeans.cpp +An example on K-means clustering +*/ + +/** @brief Finds centers of clusters and groups input samples around the clusters. + +The function kmeans implements a k-means algorithm that finds the centers of cluster_count clusters +and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ contains a +0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix. + +@note +- (Python) An example on K-means clustering can be found at + opencv_source_code/samples/python/kmeans.py +@param data Data for clustering. An array of N-Dimensional points with float coordinates is needed. +Examples of this array can be: +- Mat points(count, 2, CV_32F); +- Mat points(count, 1, CV_32FC2); +- Mat points(1, count, CV_32FC2); +- std::vector\ points(sampleCount); +@param K Number of clusters to split the set by. +@param bestLabels Input/output integer array that stores the cluster indices for every sample. +@param criteria The algorithm termination criteria, that is, the maximum number of iterations and/or +the desired accuracy. The accuracy is specified as criteria.epsilon. As soon as each of the cluster +centers moves by less than criteria.epsilon on some iteration, the algorithm stops. +@param attempts Flag to specify the number of times the algorithm is executed using different +initial labellings. The algorithm returns the labels that yield the best compactness (see the last +function parameter). +@param flags Flag that can take values of cv::KmeansFlags +@param centers Output matrix of the cluster centers, one row per each cluster center. +@return The function returns the compactness measure that is computed as +\f[\sum _i \| \texttt{samples} _i - \texttt{centers} _{ \texttt{labels} _i} \| ^2\f] +after every attempt. The best (minimum) value is chosen and the corresponding labels and the +compactness value are returned by the function. Basically, you can use only the core of the +function, set the number of attempts to 1, initialize labels each time using a custom algorithm, +pass them with the ( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best +(most-compact) clustering. +*/ +CV_EXPORTS_W double kmeans( InputArray data, int K, InputOutputArray bestLabels, + TermCriteria criteria, int attempts, + int flags, OutputArray centers = noArray() ); + +//! @} core_cluster + +//! @addtogroup core_basic +//! @{ + +/////////////////////////////// Formatted output of cv::Mat /////////////////////////// + +/** @todo document */ +class CV_EXPORTS Formatted +{ +public: + virtual const char* next() = 0; + virtual void reset() = 0; + virtual ~Formatted(); +}; + +/** @todo document */ +class CV_EXPORTS Formatter +{ +public: + enum FormatType { + FMT_DEFAULT = 0, + FMT_MATLAB = 1, + FMT_CSV = 2, + FMT_PYTHON = 3, + FMT_NUMPY = 4, + FMT_C = 5 + }; + + virtual ~Formatter(); + + virtual Ptr format(const Mat& mtx) const = 0; + + virtual void set16fPrecision(int p = 4) = 0; + virtual void set32fPrecision(int p = 8) = 0; + virtual void set64fPrecision(int p = 16) = 0; + virtual void setMultiline(bool ml = true) = 0; + + static Ptr get(Formatter::FormatType fmt = FMT_DEFAULT); + +}; + +static inline +String& operator << (String& out, Ptr fmtd) +{ + fmtd->reset(); + for(const char* str = fmtd->next(); str; str = fmtd->next()) + out += cv::String(str); + return out; +} + +static inline +String& operator << (String& out, const Mat& mtx) +{ + return out << Formatter::get()->format(mtx); +} + +//////////////////////////////////////// Algorithm //////////////////////////////////// + +class CV_EXPORTS Algorithm; + +template struct ParamType {}; + + +/** @brief This is a base class for all more or less complex algorithms in OpenCV + +especially for classes of algorithms, for which there can be multiple implementations. The examples +are stereo correspondence (for which there are algorithms like block matching, semi-global block +matching, graph-cut etc.), background subtraction (which can be done using mixture-of-gaussians +models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck +etc.). + +Here is example of SimpleBlobDetector use in your application via Algorithm interface: +@snippet snippets/core_various.cpp Algorithm +*/ +class CV_EXPORTS_W Algorithm +{ +public: + Algorithm(); + virtual ~Algorithm(); + + /** @brief Clears the algorithm state + */ + CV_WRAP virtual void clear() {} + + /** @brief Stores algorithm parameters in a file storage + */ + virtual void write(FileStorage& fs) const { CV_UNUSED(fs); } + + /** @brief simplified API for language bindings + * @overload + */ + CV_WRAP void write(const Ptr& fs, const String& name = String()) const; + + /** @brief Reads algorithm parameters from a file storage + */ + CV_WRAP virtual void read(const FileNode& fn) { CV_UNUSED(fn); } + + /** @brief Returns true if the Algorithm is empty (e.g. in the very beginning or after unsuccessful read + */ + CV_WRAP virtual bool empty() const { return false; } + + /** @brief Reads algorithm from the file node + + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + cv::FileStorage fsRead("example.xml", FileStorage::READ); + Ptr svm = Algorithm::read(fsRead.root()); + @endcode + In order to make this method work, the derived class must overwrite Algorithm::read(const + FileNode& fn) and also have static create() method without parameters + (or with all the optional parameters) + */ + template static Ptr<_Tp> read(const FileNode& fn) + { + Ptr<_Tp> obj = _Tp::create(); + obj->read(fn); + return !obj->empty() ? obj : Ptr<_Tp>(); + } + + /** @brief Loads algorithm from the file + + @param filename Name of the file to read. + @param objname The optional name of the node to read (if empty, the first top-level node will be used) + + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + Ptr svm = Algorithm::load("my_svm_model.xml"); + @endcode + In order to make this method work, the derived class must overwrite Algorithm::read(const + FileNode& fn). + */ + template static Ptr<_Tp> load(const String& filename, const String& objname=String()) + { + FileStorage fs(filename, FileStorage::READ); + CV_Assert(fs.isOpened()); + FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname]; + if (fn.empty()) return Ptr<_Tp>(); + Ptr<_Tp> obj = _Tp::create(); + obj->read(fn); + return !obj->empty() ? obj : Ptr<_Tp>(); + } + + /** @brief Loads algorithm from a String + + @param strModel The string variable containing the model you want to load. + @param objname The optional name of the node to read (if empty, the first top-level node will be used) + + This is static template method of Algorithm. It's usage is following (in the case of SVM): + @code + Ptr svm = Algorithm::loadFromString(myStringModel); + @endcode + */ + template static Ptr<_Tp> loadFromString(const String& strModel, const String& objname=String()) + { + FileStorage fs(strModel, FileStorage::READ + FileStorage::MEMORY); + FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname]; + Ptr<_Tp> obj = _Tp::create(); + obj->read(fn); + return !obj->empty() ? obj : Ptr<_Tp>(); + } + + /** Saves the algorithm to a file. + In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */ + CV_WRAP virtual void save(const String& filename) const; + + /** Returns the algorithm string identifier. + This string is used as top level xml/yml node tag when the object is saved to a file or string. */ + CV_WRAP virtual String getDefaultName() const; + +protected: + void writeFormat(FileStorage& fs) const; +}; + +enum struct Param { + INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7, + UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12 +}; + + + +template<> struct ParamType +{ + typedef bool const_param_type; + typedef bool member_type; + + static const Param type = Param::BOOLEAN; +}; + +template<> struct ParamType +{ + typedef int const_param_type; + typedef int member_type; + + static const Param type = Param::INT; +}; + +template<> struct ParamType +{ + typedef double const_param_type; + typedef double member_type; + + static const Param type = Param::REAL; +}; + +template<> struct ParamType +{ + typedef const String& const_param_type; + typedef String member_type; + + static const Param type = Param::STRING; +}; + +template<> struct ParamType +{ + typedef const Mat& const_param_type; + typedef Mat member_type; + + static const Param type = Param::MAT; +}; + +template<> struct ParamType > +{ + typedef const std::vector& const_param_type; + typedef std::vector member_type; + + static const Param type = Param::MAT_VECTOR; +}; + +template<> struct ParamType +{ + typedef const Ptr& const_param_type; + typedef Ptr member_type; + + static const Param type = Param::ALGORITHM; +}; + +template<> struct ParamType +{ + typedef float const_param_type; + typedef float member_type; + + static const Param type = Param::FLOAT; +}; + +template<> struct ParamType +{ + typedef unsigned const_param_type; + typedef unsigned member_type; + + static const Param type = Param::UNSIGNED_INT; +}; + +template<> struct ParamType +{ + typedef uint64 const_param_type; + typedef uint64 member_type; + + static const Param type = Param::UINT64; +}; + +template<> struct ParamType +{ + typedef uchar const_param_type; + typedef uchar member_type; + + static const Param type = Param::UCHAR; +}; + +template<> struct ParamType +{ + typedef const Scalar& const_param_type; + typedef Scalar member_type; + + static const Param type = Param::SCALAR; +}; + +template +struct ParamType<_Tp, typename std::enable_if< std::is_enum<_Tp>::value >::type> +{ + typedef typename std::underlying_type<_Tp>::type const_param_type; + typedef typename std::underlying_type<_Tp>::type member_type; + + static const Param type = Param::INT; +}; + +//! @} core_basic + +} //namespace cv + +#include "opencv2/core/operations.hpp" +#include "opencv2/core/cvstd.inl.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/core/optim.hpp" +#include "opencv2/core/ovx.hpp" + +#endif /*OPENCV_CORE_HPP*/ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/affine.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/affine.hpp new file mode 100644 index 0000000..1806382 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/affine.hpp @@ -0,0 +1,678 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_AFFINE3_HPP +#define OPENCV_CORE_AFFINE3_HPP + +#ifdef __cplusplus + +#include + +namespace cv +{ + +//! @addtogroup core +//! @{ + + /** @brief Affine transform + * + * It represents a 4x4 homogeneous transformation matrix \f$T\f$ + * + * \f[T = + * \begin{bmatrix} + * R & t\\ + * 0 & 1\\ + * \end{bmatrix} + * \f] + * + * where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector. + * + * You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector, + * which is converted to a 3x3 rotation matrix by the Rodrigues formula. + * + * To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation + * angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use + * + * @code + * cv::Vec3f r, t; + * cv::Affine3f T(r, t); + * @endcode + * + * If you already have the rotation matrix \f$R\f$, then you can use + * + * @code + * cv::Matx33f R; + * cv::Affine3f T(R, t); + * @endcode + * + * To extract the rotation matrix \f$R\f$ from \f$T\f$, use + * + * @code + * cv::Matx33f R = T.rotation(); + * @endcode + * + * To extract the translation vector \f$t\f$ from \f$T\f$, use + * + * @code + * cv::Vec3f t = T.translation(); + * @endcode + * + * To extract the rotation vector \f$r\f$ from \f$T\f$, use + * + * @code + * cv::Vec3f r = T.rvec(); + * @endcode + * + * Note that since the mapping from rotation vectors to rotation matrices + * is many to one. The returned rotation vector is not necessarily the one + * you used before to set the matrix. + * + * If you have two transformations \f$T = T_1 * T_2\f$, use + * + * @code + * cv::Affine3f T, T1, T2; + * T = T2.concatenate(T1); + * @endcode + * + * To get the inverse transform of \f$T\f$, use + * + * @code + * cv::Affine3f T, T_inv; + * T_inv = T.inv(); + * @endcode + * + */ + template + class Affine3 + { + public: + typedef T float_type; + typedef Matx Mat3; + typedef Matx Mat4; + typedef Vec Vec3; + + //! Default constructor. It represents a 4x4 identity matrix. + Affine3(); + + //! Augmented affine matrix + Affine3(const Mat4& affine); + + /** + * The resulting 4x4 matrix is + * + * \f[ + * \begin{bmatrix} + * R & t\\ + * 0 & 1\\ + * \end{bmatrix} + * \f] + * + * @param R 3x3 rotation matrix. + * @param t 3x1 translation vector. + */ + Affine3(const Mat3& R, const Vec3& t = Vec3::all(0)); + + /** + * Rodrigues vector. + * + * The last row of the current matrix is set to [0,0,0,1]. + * + * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length + * indicates the rotation angle in radian (using right hand rule). + * @param t 3x1 translation vector. + */ + Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0)); + + /** + * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix. + * + * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4. + * + * @param data 1-channel matrix. + * when it is 4x4, it is copied to the current matrix and t is not used. + * When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used. + * When it is 3x3, it is copied to the upper left 3x3 part of the current matrix. + * When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used + * to compute a 3x3 rotation matrix. + * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4. + */ + explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0)); + + //! From 16-element array + explicit Affine3(const float_type* vals); + + //! Create an 4x4 identity transform + static Affine3 Identity(); + + /** + * Rotation matrix. + * + * Copy the rotation matrix to the upper left 3x3 part of the current matrix. + * The remaining elements of the current matrix are not changed. + * + * @param R 3x3 rotation matrix. + * + */ + void rotation(const Mat3& R); + + /** + * Rodrigues vector. + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and + * its length indicates the rotation angle in radian (using the right thumb convention). + */ + void rotation(const Vec3& rvec); + + /** + * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix. + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param data 1-channel matrix. + * When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix. + * When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula + * is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix. + */ + void rotation(const Mat& data); + + /** + * Copy the 3x3 matrix L to the upper left part of the current matrix + * + * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected. + * + * @param L 3x3 matrix. + */ + void linear(const Mat3& L); + + /** + * Copy t to the first three elements of the last column of the current matrix + * + * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected. + * + * @param t 3x1 translation vector. + */ + void translation(const Vec3& t); + + //! @return the upper left 3x3 part + Mat3 rotation() const; + + //! @return the upper left 3x3 part + Mat3 linear() const; + + //! @return the upper right 3x1 part + Vec3 translation() const; + + //! Rodrigues vector. + //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix. + //! @warning Since the mapping between rotation vectors and rotation matrices is many to one, + //! this function returns only one rotation vector that represents the current rotation matrix, + //! which is not necessarily the same one set by `rotation(const Vec3& rvec)`. + Vec3 rvec() const; + + //! @return the inverse of the current matrix. + Affine3 inv(int method = cv::DECOMP_SVD) const; + + //! a.rotate(R) is equivalent to Affine(R, 0) * a; + Affine3 rotate(const Mat3& R) const; + + //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a; + Affine3 rotate(const Vec3& rvec) const; + + //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix + Affine3 translate(const Vec3& t) const; + + //! a.concatenate(affine) is equivalent to affine * a; + Affine3 concatenate(const Affine3& affine) const; + + template operator Affine3() const; + + template Affine3 cast() const; + + Mat4 matrix; + +#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H + Affine3(const Eigen::Transform& affine); + Affine3(const Eigen::Transform& affine); + operator Eigen::Transform() const; + operator Eigen::Transform() const; +#endif + }; + + template static + Affine3 operator*(const Affine3& affine1, const Affine3& affine2); + + //! V is a 3-element vector with member fields x, y and z + template static + V operator*(const Affine3& affine, const V& vector); + + typedef Affine3 Affine3f; + typedef Affine3 Affine3d; + + static Vec3f operator*(const Affine3f& affine, const Vec3f& vector); + static Vec3d operator*(const Affine3d& affine, const Vec3d& vector); + + template class DataType< Affine3<_Tp> > + { + public: + typedef Affine3<_Tp> value_type; + typedef Affine3::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 16, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; + }; + + namespace traits { + template + struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; }; + template + struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; }; + } // namespace + +//! @} core + +} + +//! @cond IGNORED + +/////////////////////////////////////////////////////////////////////////////////// +// Implementation + +template inline +cv::Affine3::Affine3() + : matrix(Mat4::eye()) +{} + +template inline +cv::Affine3::Affine3(const Mat4& affine) + : matrix(affine) +{} + +template inline +cv::Affine3::Affine3(const Mat3& R, const Vec3& t) +{ + rotation(R); + translation(t); + matrix.val[12] = matrix.val[13] = matrix.val[14] = 0; + matrix.val[15] = 1; +} + +template inline +cv::Affine3::Affine3(const Vec3& _rvec, const Vec3& t) +{ + rotation(_rvec); + translation(t); + matrix.val[12] = matrix.val[13] = matrix.val[14] = 0; + matrix.val[15] = 1; +} + +template inline +cv::Affine3::Affine3(const cv::Mat& data, const Vec3& t) +{ + CV_Assert(data.type() == cv::traits::Type::value); + CV_Assert(data.channels() == 1); + + if (data.cols == 4 && data.rows == 4) + { + data.copyTo(matrix); + return; + } + else if (data.cols == 4 && data.rows == 3) + { + rotation(data(Rect(0, 0, 3, 3))); + translation(data(Rect(3, 0, 1, 3))); + } + else + { + rotation(data); + translation(t); + } + + matrix.val[12] = matrix.val[13] = matrix.val[14] = 0; + matrix.val[15] = 1; +} + +template inline +cv::Affine3::Affine3(const float_type* vals) : matrix(vals) +{} + +template inline +cv::Affine3 cv::Affine3::Identity() +{ + return Affine3(cv::Affine3::Mat4::eye()); +} + +template inline +void cv::Affine3::rotation(const Mat3& R) +{ + linear(R); +} + +template inline +void cv::Affine3::rotation(const Vec3& _rvec) +{ + double theta = norm(_rvec); + + if (theta < DBL_EPSILON) + rotation(Mat3::eye()); + else + { + double c = std::cos(theta); + double s = std::sin(theta); + double c1 = 1. - c; + double itheta = (theta != 0) ? 1./theta : 0.; + + Point3_ r = _rvec*itheta; + + Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z ); + Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 ); + + // R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x] + // where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0] + Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x; + + rotation(R); + } +} + +//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix; +template inline +void cv::Affine3::rotation(const cv::Mat& data) +{ + CV_Assert(data.type() == cv::traits::Type::value); + CV_Assert(data.channels() == 1); + + if (data.cols == 3 && data.rows == 3) + { + Mat3 R; + data.copyTo(R); + rotation(R); + } + else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3)) + { + Vec3 _rvec; + data.reshape(1, 3).copyTo(_rvec); + rotation(_rvec); + } + else + CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1"); +} + +template inline +void cv::Affine3::linear(const Mat3& L) +{ + matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1]; matrix.val[ 2] = L.val[2]; + matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4]; matrix.val[ 6] = L.val[5]; + matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7]; matrix.val[10] = L.val[8]; +} + +template inline +void cv::Affine3::translation(const Vec3& t) +{ + matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2]; +} + +template inline +typename cv::Affine3::Mat3 cv::Affine3::rotation() const +{ + return linear(); +} + +template inline +typename cv::Affine3::Mat3 cv::Affine3::linear() const +{ + typename cv::Affine3::Mat3 R; + R.val[0] = matrix.val[0]; R.val[1] = matrix.val[1]; R.val[2] = matrix.val[ 2]; + R.val[3] = matrix.val[4]; R.val[4] = matrix.val[5]; R.val[5] = matrix.val[ 6]; + R.val[6] = matrix.val[8]; R.val[7] = matrix.val[9]; R.val[8] = matrix.val[10]; + return R; +} + +template inline +typename cv::Affine3::Vec3 cv::Affine3::translation() const +{ + return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]); +} + +template inline +typename cv::Affine3::Vec3 cv::Affine3::rvec() const +{ + cv::Vec3d w; + cv::Matx33d u, vt, R = rotation(); + cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A); + R = u * vt; + + double rx = R.val[7] - R.val[5]; + double ry = R.val[2] - R.val[6]; + double rz = R.val[3] - R.val[1]; + + double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25); + double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5; + c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c; + double theta = std::acos(c); + + if( s < 1e-5 ) + { + if( c > 0 ) + rx = ry = rz = 0; + else + { + double t; + t = (R.val[0] + 1) * 0.5; + rx = std::sqrt(std::max(t, 0.0)); + t = (R.val[4] + 1) * 0.5; + ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0); + t = (R.val[8] + 1) * 0.5; + rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0); + + if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) ) + rz = -rz; + theta /= std::sqrt(rx*rx + ry*ry + rz*rz); + rx *= theta; + ry *= theta; + rz *= theta; + } + } + else + { + double vth = 1/(2*s); + vth *= theta; + rx *= vth; ry *= vth; rz *= vth; + } + + return cv::Vec3d(rx, ry, rz); +} + +template inline +cv::Affine3 cv::Affine3::inv(int method) const +{ + return matrix.inv(method); +} + +template inline +cv::Affine3 cv::Affine3::rotate(const Mat3& R) const +{ + Mat3 Lc = linear(); + Vec3 tc = translation(); + Mat4 result; + result.val[12] = result.val[13] = result.val[14] = 0; + result.val[15] = 1; + + for(int j = 0; j < 3; ++j) + { + for(int i = 0; i < 3; ++i) + { + float_type value = 0; + for(int k = 0; k < 3; ++k) + value += R(j, k) * Lc(k, i); + result(j, i) = value; + } + + result(j, 3) = R.row(j).dot(tc.t()); + } + return result; +} + +template inline +cv::Affine3 cv::Affine3::rotate(const Vec3& _rvec) const +{ + return rotate(Affine3f(_rvec).rotation()); +} + +template inline +cv::Affine3 cv::Affine3::translate(const Vec3& t) const +{ + Mat4 m = matrix; + m.val[ 3] += t[0]; + m.val[ 7] += t[1]; + m.val[11] += t[2]; + return m; +} + +template inline +cv::Affine3 cv::Affine3::concatenate(const Affine3& affine) const +{ + return (*this).rotate(affine.rotation()).translate(affine.translation()); +} + +template template inline +cv::Affine3::operator Affine3() const +{ + return Affine3(matrix); +} + +template template inline +cv::Affine3 cv::Affine3::cast() const +{ + return Affine3(matrix); +} + +template inline +cv::Affine3 cv::operator*(const cv::Affine3& affine1, const cv::Affine3& affine2) +{ + return affine2.concatenate(affine1); +} + +template inline +V cv::operator*(const cv::Affine3& affine, const V& v) +{ + const typename Affine3::Mat4& m = affine.matrix; + + V r; + r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3]; + r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7]; + r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11]; + return r; +} + +static inline +cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v) +{ + const cv::Matx44f& m = affine.matrix; + cv::Vec3f r; + r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3]; + r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7]; + r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11]; + return r; +} + +static inline +cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v) +{ + const cv::Matx44d& m = affine.matrix; + cv::Vec3d r; + r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3]; + r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7]; + r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11]; + return r; +} + + + +#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H + +template inline +cv::Affine3::Affine3(const Eigen::Transform& affine) +{ + cv::Mat(4, 4, cv::traits::Type::value, affine.matrix().data()).copyTo(matrix); +} + +template inline +cv::Affine3::Affine3(const Eigen::Transform& affine) +{ + Eigen::Transform a = affine; + cv::Mat(4, 4, cv::traits::Type::value, a.matrix().data()).copyTo(matrix); +} + +template inline +cv::Affine3::operator Eigen::Transform() const +{ + Eigen::Transform r; + cv::Mat hdr(4, 4, cv::traits::Type::value, r.matrix().data()); + cv::Mat(matrix, false).copyTo(hdr); + return r; +} + +template inline +cv::Affine3::operator Eigen::Transform() const +{ + return this->operator Eigen::Transform(); +} + +#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */ + +//! @endcond + +#endif /* __cplusplus */ + +#endif /* OPENCV_CORE_AFFINE3_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/async.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/async.hpp new file mode 100644 index 0000000..d1d55e9 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/async.hpp @@ -0,0 +1,105 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ASYNC_HPP +#define OPENCV_CORE_ASYNC_HPP + +#include + +#ifdef CV_CXX11 +//#include +#include +#endif + +namespace cv { + +/** @addtogroup core_async + +@{ +*/ + + +/** @brief Returns result of asynchronous operations + +Result_Object has attached asynchronous state. +Assignment operator doesn't clone asynchronous state (it is shared between all instances). + +Result can be fetched via get() method only once. + +*/ +class CV_EXPORTS_W AsyncArray +{ +public: + ~AsyncArray() CV_NOEXCEPT; + CV_WRAP AsyncArray() CV_NOEXCEPT; + AsyncArray(const AsyncArray& o) CV_NOEXCEPT; + AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT; + CV_WRAP void release() CV_NOEXCEPT; + + /** Fetch the result. + @param[out] dst destination array + + Waits for result until container has valid result. + Throws exception if exception was stored as a result. + + Throws exception on invalid container state. + + @note Result or stored exception can be fetched only once. + */ + CV_WRAP void get(OutputArray dst) const; + + /** Retrieving the result with timeout + @param[out] dst destination array + @param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait + + @returns true if result is ready, false if the timeout has expired + + @note Result or stored exception can be fetched only once. + */ + bool get(OutputArray dst, int64 timeoutNs) const; + + CV_WRAP inline + bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); } + + bool wait_for(int64 timeoutNs) const; + + CV_WRAP inline + bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); } + + CV_WRAP bool valid() const CV_NOEXCEPT; + +#ifdef CV_CXX11 + inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; } + inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; } + + template + inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout) + { + return get(dst, (int64)(std::chrono::nanoseconds(timeout).count())); + } + + template + inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout) + { + return wait_for((int64)(std::chrono::nanoseconds(timeout).count())); + } + +#if 0 + std::future getFutureMat() const; + std::future getFutureUMat() const; +#endif +#endif + + + // PImpl + struct Impl; friend struct Impl; + inline void* _getImpl() const CV_NOEXCEPT { return p; } +protected: + Impl* p; +}; + + +//! @} +} // namespace +#endif // OPENCV_CORE_ASYNC_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/base.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/base.hpp new file mode 100644 index 0000000..21a61a4 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/base.hpp @@ -0,0 +1,664 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2014, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_BASE_HPP +#define OPENCV_CORE_BASE_HPP + +#ifndef __cplusplus +# error base.hpp header must be compiled as C++ +#endif + +#include "opencv2/opencv_modules.hpp" + +#include +#include + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" + +namespace cv +{ + +//! @addtogroup core_utils +//! @{ + +namespace Error { +//! error codes +enum Code { + StsOk= 0, //!< everything is ok + StsBackTrace= -1, //!< pseudo error for back trace + StsError= -2, //!< unknown /unspecified error + StsInternal= -3, //!< internal error (bad state) + StsNoMem= -4, //!< insufficient memory + StsBadArg= -5, //!< function arg/param is bad + StsBadFunc= -6, //!< unsupported function + StsNoConv= -7, //!< iteration didn't converge + StsAutoTrace= -8, //!< tracing + HeaderIsNull= -9, //!< image header is NULL + BadImageSize= -10, //!< image size is invalid + BadOffset= -11, //!< offset is invalid + BadDataPtr= -12, //!< + BadStep= -13, //!< image step is wrong, this may happen for a non-continuous matrix. + BadModelOrChSeq= -14, //!< + BadNumChannels= -15, //!< bad number of channels, for example, some functions accept only single channel matrices. + BadNumChannel1U= -16, //!< + BadDepth= -17, //!< input image depth is not supported by the function + BadAlphaChannel= -18, //!< + BadOrder= -19, //!< number of dimensions is out of range + BadOrigin= -20, //!< incorrect input origin + BadAlign= -21, //!< incorrect input align + BadCallBack= -22, //!< + BadTileSize= -23, //!< + BadCOI= -24, //!< input COI is not supported + BadROISize= -25, //!< incorrect input roi + MaskIsTiled= -26, //!< + StsNullPtr= -27, //!< null pointer + StsVecLengthErr= -28, //!< incorrect vector length + StsFilterStructContentErr= -29, //!< incorrect filter structure content + StsKernelStructContentErr= -30, //!< incorrect transform kernel content + StsFilterOffsetErr= -31, //!< incorrect filter offset value + StsBadSize= -201, //!< the input/output structure size is incorrect + StsDivByZero= -202, //!< division by zero + StsInplaceNotSupported= -203, //!< in-place operation is not supported + StsObjectNotFound= -204, //!< request can't be completed + StsUnmatchedFormats= -205, //!< formats of input/output arrays differ + StsBadFlag= -206, //!< flag is wrong or not supported + StsBadPoint= -207, //!< bad CvPoint + StsBadMask= -208, //!< bad format of mask (neither 8uC1 nor 8sC1) + StsUnmatchedSizes= -209, //!< sizes of input/output structures do not match + StsUnsupportedFormat= -210, //!< the data format/type is not supported by the function + StsOutOfRange= -211, //!< some of parameters are out of range + StsParseError= -212, //!< invalid syntax/structure of the parsed file + StsNotImplemented= -213, //!< the requested function/feature is not implemented + StsBadMemBlock= -214, //!< an allocated block has been corrupted + StsAssert= -215, //!< assertion failed + GpuNotSupported= -216, //!< no CUDA support + GpuApiCallError= -217, //!< GPU API call error + OpenGlNotSupported= -218, //!< no OpenGL support + OpenGlApiCallError= -219, //!< OpenGL API call error + OpenCLApiCallError= -220, //!< OpenCL API call error + OpenCLDoubleNotSupported= -221, + OpenCLInitError= -222, //!< OpenCL initialization error + OpenCLNoAMDBlasFft= -223 +}; +} //Error + +//! @} core_utils + +//! @addtogroup core_array +//! @{ + +//! matrix decomposition types +enum DecompTypes { + /** Gaussian elimination with the optimal pivot element chosen. */ + DECOMP_LU = 0, + /** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix + src1 can be singular */ + DECOMP_SVD = 1, + /** eigenvalue decomposition; the matrix src1 must be symmetrical */ + DECOMP_EIG = 2, + /** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively + defined */ + DECOMP_CHOLESKY = 3, + /** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */ + DECOMP_QR = 4, + /** while all the previous flags are mutually exclusive, this flag can be used together with + any of the previous; it means that the normal equations + \f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are + solved instead of the original system + \f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */ + DECOMP_NORMAL = 16 +}; + +/** norm types + +src1 and src2 denote input arrays. +*/ + +enum NormTypes { + /** + \f[ + norm = \forkthree + {\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } + {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) } + {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) } + \f] + */ + NORM_INF = 1, + /** + \f[ + norm = \forkthree + {\| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\)} + { \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) } + \f]*/ + NORM_L1 = 2, + /** + \f[ + norm = \forkthree + { \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) } + { \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) } + { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) } + \f] + */ + NORM_L2 = 4, + /** + \f[ + norm = \forkthree + { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if \(\texttt{normType} = \texttt{NORM_L2SQR}\)} + { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} = \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if \(\texttt{normType} = \texttt{NORM_L2SQR}\) } + { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) } + \f] + */ + NORM_L2SQR = 5, + /** + In the case of one input array, calculates the Hamming distance of the array from zero, + In the case of two input arrays, calculates the Hamming distance between the arrays. + */ + NORM_HAMMING = 6, + /** + Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will + be added and treated as a single bit to be used in the same calculation as NORM_HAMMING. + */ + NORM_HAMMING2 = 7, + NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags + NORM_RELATIVE = 8, //!< flag + NORM_MINMAX = 32 //!< flag + }; + +//! comparison types +enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2. + CMP_GT = 1, //!< src1 is greater than src2. + CMP_GE = 2, //!< src1 is greater than or equal to src2. + CMP_LT = 3, //!< src1 is less than src2. + CMP_LE = 4, //!< src1 is less than or equal to src2. + CMP_NE = 5 //!< src1 is unequal to src2. + }; + +//! generalized matrix multiplication flags +enum GemmFlags { GEMM_1_T = 1, //!< transposes src1 + GEMM_2_T = 2, //!< transposes src2 + GEMM_3_T = 4 //!< transposes src3 + }; + +enum DftFlags { + /** performs an inverse 1D or 2D transform instead of the default forward + transform. */ + DFT_INVERSE = 1, + /** scales the result: divide it by the number of array elements. Normally, it is + combined with DFT_INVERSE. */ + DFT_SCALE = 2, + /** performs a forward or inverse transform of every individual row of the input + matrix; this flag enables you to transform multiple vectors simultaneously and can be used to + decrease the overhead (which is sometimes several times larger than the processing itself) to + perform 3D and higher-dimensional transformations and so forth.*/ + DFT_ROWS = 4, + /** performs a forward transformation of 1D or 2D real array; the result, + though being a complex array, has complex-conjugate symmetry (*CCS*, see the function + description below for details), and such an array can be packed into a real array of the same + size as input, which is the fastest option and which is what the function does by default; + however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) - + pass the flag to enable the function to produce a full-size complex output array. */ + DFT_COMPLEX_OUTPUT = 16, + /** performs an inverse transformation of a 1D or 2D complex array; the + result is normally a complex array of the same size, however, if the input array has + conjugate-complex symmetry (for example, it is a result of forward transformation with + DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not + check whether the input is symmetrical or not, you can pass the flag and then the function + will assume the symmetry and produce the real output array (note that when the input is packed + into a real array and inverse transformation is executed, the function treats the input as a + packed complex-conjugate symmetrical array, and the output will also be a real array). */ + DFT_REAL_OUTPUT = 32, + /** specifies that input is complex input. If this flag is set, the input must have 2 channels. + On the other hand, for backwards compatibility reason, if input has 2 channels, input is + already considered complex. */ + DFT_COMPLEX_INPUT = 64, + /** performs an inverse 1D or 2D transform instead of the default forward transform. */ + DCT_INVERSE = DFT_INVERSE, + /** performs a forward or inverse transform of every individual row of the input + matrix. This flag enables you to transform multiple vectors simultaneously and can be used to + decrease the overhead (which is sometimes several times larger than the processing itself) to + perform 3D and higher-dimensional transforms and so forth.*/ + DCT_ROWS = DFT_ROWS +}; + +//! Various border types, image boundaries are denoted with `|` +//! @see borderInterpolate, copyMakeBorder +enum BorderTypes { + BORDER_CONSTANT = 0, //!< `iiiiii|abcdefgh|iiiiiii` with some specified `i` + BORDER_REPLICATE = 1, //!< `aaaaaa|abcdefgh|hhhhhhh` + BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb` + BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg` + BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba` + BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno` + + BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 + BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101 + BORDER_ISOLATED = 16 //!< do not look outside of ROI +}; + +//! @} core_array + +//! @addtogroup core_utils +//! @{ + +/*! @brief Signals an error and raises the exception. + +By default the function prints information about the error to stderr, +then it either stops if setBreakOnError() had been called before or raises the exception. +It is possible to alternate error processing by using redirectError(). +@param _code - error code (Error::Code) +@param _err - error description +@param _func - function name. Available only when the compiler supports getting it +@param _file - source file name where the error has occurred +@param _line - line number in the source file where the error has occurred +@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert + */ +CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line); + +#ifdef CV_STATIC_ANALYSIS + +// In practice, some macro are not processed correctly (noreturn is not detected). +// We need to use simplified definition for them. +#define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0) +#define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0) +#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0) + +#else // CV_STATIC_ANALYSIS + +/** @brief Call the error handler. + +Currently, the error handler prints the error code and the error message to the standard +error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that +the execution stack and all the parameters can be analyzed by the debugger. In the Release +configuration, the exception is thrown. + +@param code one of Error::Code +@param msg error message +*/ +#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ ) + +/** @brief Call the error handler. + +This macro can be used to construct an error message on-fly to include some dynamic information, +for example: +@code + // note the extra parentheses around the formatted text message + CV_Error_(Error::StsOutOfRange, + ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue)); +@endcode +@param code one of Error::Code +@param args printf-like formatted error message in parentheses +*/ +#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ ) + +/** @brief Checks a condition at runtime and throws exception if it fails + +The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros +raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release +configurations while CV_DbgAssert is only retained in the Debug configuration. +*/ +#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0) + +#endif // CV_STATIC_ANALYSIS + +//! @cond IGNORED +#if !defined(__OPENCV_BUILD) // TODO: backward compatibility only +#ifndef CV_ErrorNoReturn +#define CV_ErrorNoReturn CV_Error +#endif +#ifndef CV_ErrorNoReturn_ +#define CV_ErrorNoReturn_ CV_Error_ +#endif +#endif + +#define CV_Assert_1 CV_Assert +#define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ )) +#define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ )) +#define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ )) +#define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ )) +#define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ )) +#define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ )) +#define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ )) +#define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ )) +#define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ )) + +#define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0) + +//! @endcond + +#if defined _DEBUG || defined CV_STATIC_ANALYSIS +# define CV_DbgAssert(expr) CV_Assert(expr) +#else +/** replaced with CV_Assert(expr) in Debug configuration */ +# define CV_DbgAssert(expr) +#endif + +/* + * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor + * bit count of A exclusive XOR'ed with B + */ +struct CV_EXPORTS Hamming +{ + static const NormTypes normType = NORM_HAMMING; + typedef unsigned char ValueType; + typedef int ResultType; + + /** this will count the bits in a ^ b + */ + ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const; +}; + +typedef Hamming HammingLUT; + +/////////////////////////////////// inline norms //////////////////////////////////// + +template inline _Tp cv_abs(_Tp x) { return std::abs(x); } +inline int cv_abs(uchar x) { return x; } +inline int cv_abs(schar x) { return std::abs(x); } +inline int cv_abs(ushort x) { return x; } +inline int cv_abs(short x) { return std::abs(x); } + +template static inline +_AccTp normL2Sqr(const _Tp* a, int n) +{ + _AccTp s = 0; + int i=0; +#if CV_ENABLE_UNROLLED + for( ; i <= n - 4; i += 4 ) + { + _AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3]; + s += v0*v0 + v1*v1 + v2*v2 + v3*v3; + } +#endif + for( ; i < n; i++ ) + { + _AccTp v = a[i]; + s += v*v; + } + return s; +} + +template static inline +_AccTp normL1(const _Tp* a, int n) +{ + _AccTp s = 0; + int i = 0; +#if CV_ENABLE_UNROLLED + for(; i <= n - 4; i += 4 ) + { + s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) + + (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]); + } +#endif + for( ; i < n; i++ ) + s += cv_abs(a[i]); + return s; +} + +template static inline +_AccTp normInf(const _Tp* a, int n) +{ + _AccTp s = 0; + for( int i = 0; i < n; i++ ) + s = std::max(s, (_AccTp)cv_abs(a[i])); + return s; +} + +template static inline +_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n) +{ + _AccTp s = 0; + int i= 0; +#if CV_ENABLE_UNROLLED + for(; i <= n - 4; i += 4 ) + { + _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]); + s += v0*v0 + v1*v1 + v2*v2 + v3*v3; + } +#endif + for( ; i < n; i++ ) + { + _AccTp v = _AccTp(a[i] - b[i]); + s += v*v; + } + return s; +} + +static inline float normL2Sqr(const float* a, const float* b, int n) +{ + float s = 0.f; + for( int i = 0; i < n; i++ ) + { + float v = a[i] - b[i]; + s += v*v; + } + return s; +} + +template static inline +_AccTp normL1(const _Tp* a, const _Tp* b, int n) +{ + _AccTp s = 0; + int i= 0; +#if CV_ENABLE_UNROLLED + for(; i <= n - 4; i += 4 ) + { + _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]); + s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3); + } +#endif + for( ; i < n; i++ ) + { + _AccTp v = _AccTp(a[i] - b[i]); + s += std::abs(v); + } + return s; +} + +inline float normL1(const float* a, const float* b, int n) +{ + float s = 0.f; + for( int i = 0; i < n; i++ ) + { + s += std::abs(a[i] - b[i]); + } + return s; +} + +inline int normL1(const uchar* a, const uchar* b, int n) +{ + int s = 0; + for( int i = 0; i < n; i++ ) + { + s += std::abs(a[i] - b[i]); + } + return s; +} + +template static inline +_AccTp normInf(const _Tp* a, const _Tp* b, int n) +{ + _AccTp s = 0; + for( int i = 0; i < n; i++ ) + { + _AccTp v0 = a[i] - b[i]; + s = std::max(s, std::abs(v0)); + } + return s; +} + +/** @brief Computes the cube root of an argument. + + The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly. + NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for + single-precision data. + @param val A function argument. + */ +CV_EXPORTS_W float cubeRoot(float val); + +/** @overload + +cubeRoot with argument of `double` type calls `std::cbrt(double)` +*/ +static inline +double cubeRoot(double val) +{ + return std::cbrt(val); +} + +/** @brief Calculates the angle of a 2D vector in degrees. + + The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured + in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees. + @param x x-coordinate of the vector. + @param y y-coordinate of the vector. + */ +CV_EXPORTS_W float fastAtan2(float y, float x); + +/** proxy for hal::LU */ +CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n); +/** proxy for hal::LU */ +CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n); +/** proxy for hal::Cholesky */ +CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n); +/** proxy for hal::Cholesky */ +CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n); + +////////////////// forward declarations for important OpenCV types ////////////////// + +//! @cond IGNORED + +template class Vec; +template class Matx; + +template class Complex; +template class Point_; +template class Point3_; +template class Size_; +template class Rect_; +template class Scalar_; + +class CV_EXPORTS RotatedRect; +class CV_EXPORTS Range; +class CV_EXPORTS TermCriteria; +class CV_EXPORTS KeyPoint; +class CV_EXPORTS DMatch; +class CV_EXPORTS RNG; + +class CV_EXPORTS Mat; +class CV_EXPORTS MatExpr; + +class CV_EXPORTS UMat; + +class CV_EXPORTS SparseMat; +typedef Mat MatND; + +template class Mat_; +template class SparseMat_; + +class CV_EXPORTS MatConstIterator; +class CV_EXPORTS SparseMatIterator; +class CV_EXPORTS SparseMatConstIterator; +template class MatIterator_; +template class MatConstIterator_; +template class SparseMatIterator_; +template class SparseMatConstIterator_; + +namespace ogl +{ + class CV_EXPORTS Buffer; + class CV_EXPORTS Texture2D; + class CV_EXPORTS Arrays; +} + +namespace cuda +{ + class CV_EXPORTS GpuMat; + class CV_EXPORTS HostMem; + class CV_EXPORTS Stream; + class CV_EXPORTS Event; +} + +namespace cudev +{ + template class GpuMat_; +} + +namespace ipp +{ +CV_EXPORTS unsigned long long getIppFeatures(); +CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL, + int line = 0); +CV_EXPORTS int getIppStatus(); +CV_EXPORTS String getIppErrorLocation(); +CV_EXPORTS_W bool useIPP(); +CV_EXPORTS_W void setUseIPP(bool flag); +CV_EXPORTS_W String getIppVersion(); + +// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results +// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests. +CV_EXPORTS_W bool useIPP_NotExact(); +CV_EXPORTS_W void setUseIPP_NotExact(bool flag); +#ifndef DISABLE_OPENCV_3_COMPATIBILITY +static inline bool useIPP_NE() { return useIPP_NotExact(); } +static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); } +#endif + +} // ipp + +//! @endcond + +//! @} core_utils + + + + +} // cv + +#include "opencv2/core/neon_utils.hpp" +#include "opencv2/core/vsx_utils.hpp" +#include "opencv2/core/check.hpp" + +#endif //OPENCV_CORE_BASE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/bindings_utils.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/bindings_utils.hpp new file mode 100644 index 0000000..1c14334 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/bindings_utils.hpp @@ -0,0 +1,287 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP +#define OPENCV_CORE_BINDINGS_UTILS_HPP + +#include +#include +#include + +#include + +namespace cv { namespace utils { +//! @addtogroup core_utils +//! @{ + +CV_EXPORTS_W String dumpInputArray(InputArray argument); + +CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument); + +CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument); + +CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument); + +CV_WRAP static inline +String dumpBool(bool argument) +{ + return (argument) ? String("Bool: True") : String("Bool: False"); +} + +CV_WRAP static inline +String dumpInt(int argument) +{ + return cv::format("Int: %d", argument); +} + +CV_WRAP static inline +String dumpSizeT(size_t argument) +{ + std::ostringstream oss("size_t: ", std::ios::ate); + oss << argument; + return oss.str(); +} + +CV_WRAP static inline +String dumpFloat(float argument) +{ + return cv::format("Float: %.2f", argument); +} + +CV_WRAP static inline +String dumpDouble(double argument) +{ + return cv::format("Double: %.2f", argument); +} + +CV_WRAP static inline +String dumpCString(const char* argument) +{ + return cv::format("String: %s", argument); +} + +CV_WRAP static inline +String dumpString(const String& argument) +{ + return cv::format("String: %s", argument.c_str()); +} + +CV_WRAP static inline +String testOverloadResolution(int value, const Point& point = Point(42, 24)) +{ + return format("overload (int=%d, point=(x=%d, y=%d))", value, point.x, + point.y); +} + +CV_WRAP static inline +String testOverloadResolution(const Rect& rect) +{ + return format("overload (rect=(x=%d, y=%d, w=%d, h=%d))", rect.x, rect.y, + rect.width, rect.height); +} + +CV_WRAP static inline +String dumpRect(const Rect& argument) +{ + return format("rect: (x=%d, y=%d, w=%d, h=%d)", argument.x, argument.y, + argument.width, argument.height); +} + +CV_WRAP static inline +String dumpTermCriteria(const TermCriteria& argument) +{ + return format("term_criteria: (type=%d, max_count=%d, epsilon=%lf", + argument.type, argument.maxCount, argument.epsilon); +} + +CV_WRAP static inline +String dumpRotatedRect(const RotatedRect& argument) +{ + return format("rotated_rect: (c_x=%f, c_y=%f, w=%f, h=%f, a=%f)", + argument.center.x, argument.center.y, argument.size.width, + argument.size.height, argument.angle); +} + +CV_WRAP static inline +RotatedRect testRotatedRect(float x, float y, float w, float h, float angle) +{ + return RotatedRect(Point2f(x, y), Size2f(w, h), angle); +} + +CV_WRAP static inline +std::vector testRotatedRectVector(float x, float y, float w, float h, float angle) +{ + std::vector result; + for (int i = 0; i < 10; i++) + result.push_back(RotatedRect(Point2f(x + i, y + 2 * i), Size2f(w, h), angle + 10 * i)); + return result; +} + +CV_WRAP static inline +String dumpRange(const Range& argument) +{ + if (argument == Range::all()) + { + return "range: all"; + } + else + { + return format("range: (s=%d, e=%d)", argument.start, argument.end); + } +} + +CV_WRAP static inline +int testOverwriteNativeMethod(int argument) +{ + return argument; +} + +CV_WRAP static inline +String testReservedKeywordConversion(int positional_argument, int lambda = 2, int from = 3) +{ + return format("arg=%d, lambda=%d, from=%d", positional_argument, lambda, from); +} + +CV_EXPORTS_W String dumpVectorOfInt(const std::vector& vec); + +CV_EXPORTS_W String dumpVectorOfDouble(const std::vector& vec); + +CV_EXPORTS_W String dumpVectorOfRect(const std::vector& vec); + +CV_WRAP static inline +void generateVectorOfRect(size_t len, CV_OUT std::vector& vec) +{ + vec.resize(len); + if (len > 0) + { + RNG rng(12345); + Mat tmp(static_cast(len), 1, CV_32SC4); + rng.fill(tmp, RNG::UNIFORM, 10, 20); + tmp.copyTo(vec); + } +} + +CV_WRAP static inline +void generateVectorOfInt(size_t len, CV_OUT std::vector& vec) +{ + vec.resize(len); + if (len > 0) + { + RNG rng(554433); + Mat tmp(static_cast(len), 1, CV_32SC1); + rng.fill(tmp, RNG::UNIFORM, -10, 10); + tmp.copyTo(vec); + } +} + +CV_WRAP static inline +void generateVectorOfMat(size_t len, int rows, int cols, int dtype, CV_OUT std::vector& vec) +{ + vec.resize(len); + if (len > 0) + { + RNG rng(65431); + for (size_t i = 0; i < len; ++i) + { + vec[i].create(rows, cols, dtype); + rng.fill(vec[i], RNG::UNIFORM, 0, 10); + } + } +} + +CV_WRAP static inline +AsyncArray testAsyncArray(InputArray argument) +{ + AsyncPromise p; + p.setValue(argument); + return p.getArrayResult(); +} + +CV_WRAP static inline +AsyncArray testAsyncException() +{ + AsyncPromise p; + return p.getArrayResult(); +} + +namespace nested { +CV_WRAP static inline bool testEchoBooleanFunction(bool flag) { + return flag; +} + +class CV_EXPORTS_W CV_WRAP_AS(ExportClassName) OriginalClassName +{ +public: + struct CV_EXPORTS_W_SIMPLE Params + { + CV_PROP_RW int int_value; + CV_PROP_RW float float_value; + + CV_WRAP explicit Params(int int_param = 123, float float_param = 3.5f) + { + int_value = int_param; + float_value = float_param; + } + }; + + explicit OriginalClassName(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + params_ = params; + } + + CV_WRAP int getIntParam() const + { + return params_.int_value; + } + + CV_WRAP float getFloatParam() const + { + return params_.float_value; + } + + CV_WRAP static std::string originalName() + { + return "OriginalClassName"; + } + + CV_WRAP static Ptr + create(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + return makePtr(params); + } + +private: + OriginalClassName::Params params_; +}; + +typedef OriginalClassName::Params OriginalClassName_Params; +} // namespace nested + +namespace fs { + CV_EXPORTS_W cv::String getCacheDirectoryForDownloads(); +} // namespace fs + +//! @} // core_utils +} // namespace cv::utils + +//! @cond IGNORED + +CV_WRAP static inline +int setLogLevel(int level) +{ + // NB: Binding generators doesn't work with enums properly yet, so we define separate overload here + return cv::utils::logging::setLogLevel((cv::utils::logging::LogLevel)level); +} + +CV_WRAP static inline +int getLogLevel() +{ + return cv::utils::logging::getLogLevel(); +} + +//! @endcond IGNORED + +} // namespaces cv / utils + +#endif // OPENCV_CORE_BINDINGS_UTILS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/bufferpool.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/bufferpool.hpp new file mode 100644 index 0000000..4698e5d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/bufferpool.hpp @@ -0,0 +1,40 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. + +#ifndef OPENCV_CORE_BUFFER_POOL_HPP +#define OPENCV_CORE_BUFFER_POOL_HPP + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4265) +#endif + +namespace cv +{ + +//! @addtogroup core +//! @{ + +class BufferPoolController +{ +protected: + ~BufferPoolController() { } +public: + virtual size_t getReservedSize() const = 0; + virtual size_t getMaxReservedSize() const = 0; + virtual void setMaxReservedSize(size_t size) = 0; + virtual void freeAllReservedBuffers() = 0; +}; + +//! @} + +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // OPENCV_CORE_BUFFER_POOL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/check.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/check.hpp new file mode 100644 index 0000000..a32b811 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/check.hpp @@ -0,0 +1,160 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_CHECK_HPP +#define OPENCV_CORE_CHECK_HPP + +#include + +namespace cv { + +/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "" */ +CV_EXPORTS const char* depthToString(int depth); + +/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "" */ +CV_EXPORTS String typeToString(int type); + + +//! @cond IGNORED +namespace detail { + +/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */ +CV_EXPORTS const char* depthToString_(int depth); + +/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */ +CV_EXPORTS cv::String typeToString_(int type); + +enum TestOp { + TEST_CUSTOM = 0, + TEST_EQ = 1, + TEST_NE = 2, + TEST_LE = 3, + TEST_LT = 4, + TEST_GE = 5, + TEST_GT = 6, + CV__LAST_TEST_OP +}; + +struct CheckContext { + const char* func; + const char* file; + int line; + enum TestOp testOp; + const char* message; + const char* p1_str; + const char* p2_str; +}; + +#ifndef CV__CHECK_FILENAME +# define CV__CHECK_FILENAME __FILE__ +#endif + +#ifndef CV__CHECK_FUNCTION +# if defined _MSC_VER +# define CV__CHECK_FUNCTION __FUNCSIG__ +# elif defined __GNUC__ +# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__ +# else +# define CV__CHECK_FUNCTION "" +# endif +#endif + +#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__) +#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \ + static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \ + { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, "" message, "" p1_str, "" p2_str } + +CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_ v1, const Size_ v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx); + +CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_ v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx); +CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx); + + +#define CV__TEST_EQ(v1, v2) ((v1) == (v2)) +#define CV__TEST_NE(v1, v2) ((v1) != (v2)) +#define CV__TEST_LE(v1, v2) ((v1) <= (v2)) +#define CV__TEST_LT(v1, v2) ((v1) < (v2)) +#define CV__TEST_GE(v1, v2) ((v1) >= (v2)) +#define CV__TEST_GT(v1, v2) ((v1) > (v2)) + +#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \ + if(CV__TEST_##op((v1), (v2))) ; else { \ + CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \ + cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \ + } \ +} while (0) + +#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \ + if(!!(test_expr)) ; else { \ + CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \ + cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \ + } \ +} while (0) + +} // namespace +//! @endcond + + +/// Supported values of these types: int, float, double +#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg) +#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg) + +/// Check with additional "decoding" of type values in error message +#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg) +/// Check with additional "decoding" of depth values in error message +#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg) + +#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg) + +/// Example: type == CV_8UC1 || type == CV_8UC3 +#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg) + +/// Example: depth == CV_32F || depth == CV_64F +#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg) + +/// Example: v == A || v == B +#define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg) + +/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1") +// TODO define pretty-printers + +#ifndef NDEBUG +#define CV_DbgCheck(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg) +#define CV_DbgCheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg) +#define CV_DbgCheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg) +#else +#define CV_DbgCheck(v, test_expr, msg) do { } while (0) +#define CV_DbgCheckEQ(v1, v2, msg) do { } while (0) +#define CV_DbgCheckNE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckLE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckLT(v1, v2, msg) do { } while (0) +#define CV_DbgCheckGE(v1, v2, msg) do { } while (0) +#define CV_DbgCheckGT(v1, v2, msg) do { } while (0) +#endif + +} // namespace + +#endif // OPENCV_CORE_CHECK_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/core.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/core.hpp new file mode 100644 index 0000000..4389183 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/core.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/core.hpp" diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/core_c.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/core_c.h new file mode 100644 index 0000000..7b686b8 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/core_c.h @@ -0,0 +1,3128 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#ifndef OPENCV_CORE_C_H +#define OPENCV_CORE_C_H + +#include "opencv2/core/types_c.h" + +#ifdef __cplusplus +/* disable MSVC warning C4190 / clang-cl -Wreturn-type-c-linkage: + 'function' has C-linkage specified, but returns UDT 'typename' + which is incompatible with C + + It is OK to disable it because we only extend few plain structures with + C++ constructors for simpler interoperability with C++ API of the library +*/ +# if defined(__clang__) + // handle clang on Linux and clang-cl (i. e. clang on Windows) first +# pragma GCC diagnostic ignored "-Wreturn-type-c-linkage" +# elif defined(_MSC_VER) + // then handle MSVC +# pragma warning(disable:4190) +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** @addtogroup core_c + @{ +*/ + +/****************************************************************************************\ +* Array allocation, deallocation, initialization and access to elements * +\****************************************************************************************/ + +/** `malloc` wrapper. + If there is no enough memory, the function + (as well as other OpenCV functions that call cvAlloc) + raises an error. */ +CVAPI(void*) cvAlloc( size_t size ); + +/** `free` wrapper. + Here and further all the memory releasing functions + (that all call cvFree) take double pointer in order to + to clear pointer to the data after releasing it. + Passing pointer to NULL pointer is Ok: nothing happens in this case +*/ +CVAPI(void) cvFree_( void* ptr ); +#define cvFree(ptr) (cvFree_(*(ptr)), *(ptr)=0) + +/** @brief Creates an image header but does not allocate the image data. + +@param size Image width and height +@param depth Image depth (see cvCreateImage ) +@param channels Number of channels (see cvCreateImage ) + */ +CVAPI(IplImage*) cvCreateImageHeader( CvSize size, int depth, int channels ); + +/** @brief Initializes an image header that was previously allocated. + +The returned IplImage\* points to the initialized header. +@param image Image header to initialize +@param size Image width and height +@param depth Image depth (see cvCreateImage ) +@param channels Number of channels (see cvCreateImage ) +@param origin Top-left IPL_ORIGIN_TL or bottom-left IPL_ORIGIN_BL +@param align Alignment for image rows, typically 4 or 8 bytes + */ +CVAPI(IplImage*) cvInitImageHeader( IplImage* image, CvSize size, int depth, + int channels, int origin CV_DEFAULT(0), + int align CV_DEFAULT(4)); + +/** @brief Creates an image header and allocates the image data. + +This function call is equivalent to the following code: +@code + header = cvCreateImageHeader(size, depth, channels); + cvCreateData(header); +@endcode +@param size Image width and height +@param depth Bit depth of image elements. See IplImage for valid depths. +@param channels Number of channels per pixel. See IplImage for details. This function only creates +images with interleaved channels. + */ +CVAPI(IplImage*) cvCreateImage( CvSize size, int depth, int channels ); + +/** @brief Deallocates an image header. + +This call is an analogue of : +@code + if(image ) + { + iplDeallocate(*image, IPL_IMAGE_HEADER | IPL_IMAGE_ROI); + *image = 0; + } +@endcode +but it does not use IPL functions by default (see the CV_TURN_ON_IPL_COMPATIBILITY macro). +@param image Double pointer to the image header + */ +CVAPI(void) cvReleaseImageHeader( IplImage** image ); + +/** @brief Deallocates the image header and the image data. + +This call is a shortened form of : +@code + if(*image ) + { + cvReleaseData(*image); + cvReleaseImageHeader(image); + } +@endcode +@param image Double pointer to the image header +*/ +CVAPI(void) cvReleaseImage( IplImage** image ); + +/** Creates a copy of IPL image (widthStep may differ) */ +CVAPI(IplImage*) cvCloneImage( const IplImage* image ); + +/** @brief Sets the channel of interest in an IplImage. + +If the ROI is set to NULL and the coi is *not* 0, the ROI is allocated. Most OpenCV functions do +*not* support the COI setting, so to process an individual image/matrix channel one may copy (via +cvCopy or cvSplit) the channel to a separate image/matrix, process it and then copy the result +back (via cvCopy or cvMerge) if needed. +@param image A pointer to the image header +@param coi The channel of interest. 0 - all channels are selected, 1 - first channel is selected, +etc. Note that the channel indices become 1-based. + */ +CVAPI(void) cvSetImageCOI( IplImage* image, int coi ); + +/** @brief Returns the index of the channel of interest. + +Returns the channel of interest of in an IplImage. Returned values correspond to the coi in +cvSetImageCOI. +@param image A pointer to the image header + */ +CVAPI(int) cvGetImageCOI( const IplImage* image ); + +/** @brief Sets an image Region Of Interest (ROI) for a given rectangle. + +If the original image ROI was NULL and the rect is not the whole image, the ROI structure is +allocated. + +Most OpenCV functions support the use of ROI and treat the image rectangle as a separate image. For +example, all of the pixel coordinates are counted from the top-left (or bottom-left) corner of the +ROI, not the original image. +@param image A pointer to the image header +@param rect The ROI rectangle + */ +CVAPI(void) cvSetImageROI( IplImage* image, CvRect rect ); + +/** @brief Resets the image ROI to include the entire image and releases the ROI structure. + +This produces a similar result to the following, but in addition it releases the ROI structure. : +@code + cvSetImageROI(image, cvRect(0, 0, image->width, image->height )); + cvSetImageCOI(image, 0); +@endcode +@param image A pointer to the image header + */ +CVAPI(void) cvResetImageROI( IplImage* image ); + +/** @brief Returns the image ROI. + +If there is no ROI set, cvRect(0,0,image-\>width,image-\>height) is returned. +@param image A pointer to the image header + */ +CVAPI(CvRect) cvGetImageROI( const IplImage* image ); + +/** @brief Creates a matrix header but does not allocate the matrix data. + +The function allocates a new matrix header and returns a pointer to it. The matrix data can then be +allocated using cvCreateData or set explicitly to user-allocated data via cvSetData. +@param rows Number of rows in the matrix +@param cols Number of columns in the matrix +@param type Type of the matrix elements, see cvCreateMat + */ +CVAPI(CvMat*) cvCreateMatHeader( int rows, int cols, int type ); + +#define CV_AUTOSTEP 0x7fffffff + +/** @brief Initializes a pre-allocated matrix header. + +This function is often used to process raw data with OpenCV matrix functions. For example, the +following code computes the matrix product of two matrices, stored as ordinary arrays: +@code + double a[] = { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12 }; + + double b[] = { 1, 5, 9, + 2, 6, 10, + 3, 7, 11, + 4, 8, 12 }; + + double c[9]; + CvMat Ma, Mb, Mc ; + + cvInitMatHeader(&Ma, 3, 4, CV_64FC1, a); + cvInitMatHeader(&Mb, 4, 3, CV_64FC1, b); + cvInitMatHeader(&Mc, 3, 3, CV_64FC1, c); + + cvMatMulAdd(&Ma, &Mb, 0, &Mc); + // the c array now contains the product of a (3x4) and b (4x3) +@endcode +@param mat A pointer to the matrix header to be initialized +@param rows Number of rows in the matrix +@param cols Number of columns in the matrix +@param type Type of the matrix elements, see cvCreateMat . +@param data Optional: data pointer assigned to the matrix header +@param step Optional: full row width in bytes of the assigned data. By default, the minimal +possible step is used which assumes there are no gaps between subsequent rows of the matrix. + */ +CVAPI(CvMat*) cvInitMatHeader( CvMat* mat, int rows, int cols, + int type, void* data CV_DEFAULT(NULL), + int step CV_DEFAULT(CV_AUTOSTEP) ); + +/** @brief Creates a matrix header and allocates the matrix data. + +The function call is equivalent to the following code: +@code + CvMat* mat = cvCreateMatHeader(rows, cols, type); + cvCreateData(mat); +@endcode +@param rows Number of rows in the matrix +@param cols Number of columns in the matrix +@param type The type of the matrix elements in the form +CV_\\C\ , where S=signed, U=unsigned, F=float. For +example, CV _ 8UC1 means the elements are 8-bit unsigned and the there is 1 channel, and CV _ +32SC2 means the elements are 32-bit signed and there are 2 channels. + */ +CVAPI(CvMat*) cvCreateMat( int rows, int cols, int type ); + +/** @brief Deallocates a matrix. + +The function decrements the matrix data reference counter and deallocates matrix header. If the data +reference counter is 0, it also deallocates the data. : +@code + if(*mat ) + cvDecRefData(*mat); + cvFree((void**)mat); +@endcode +@param mat Double pointer to the matrix + */ +CVAPI(void) cvReleaseMat( CvMat** mat ); + +/** @brief Decrements an array data reference counter. + +The function decrements the data reference counter in a CvMat or CvMatND if the reference counter + +pointer is not NULL. If the counter reaches zero, the data is deallocated. In the current +implementation the reference counter is not NULL only if the data was allocated using the +cvCreateData function. The counter will be NULL in other cases such as: external data was assigned +to the header using cvSetData, header is part of a larger matrix or image, or the header was +converted from an image or n-dimensional matrix header. +@param arr Pointer to an array header + */ +CV_INLINE void cvDecRefData( CvArr* arr ) +{ + if( CV_IS_MAT( arr )) + { + CvMat* mat = (CvMat*)arr; + mat->data.ptr = NULL; + if( mat->refcount != NULL && --*mat->refcount == 0 ) + cvFree( &mat->refcount ); + mat->refcount = NULL; + } + else if( CV_IS_MATND( arr )) + { + CvMatND* mat = (CvMatND*)arr; + mat->data.ptr = NULL; + if( mat->refcount != NULL && --*mat->refcount == 0 ) + cvFree( &mat->refcount ); + mat->refcount = NULL; + } +} + +/** @brief Increments array data reference counter. + +The function increments CvMat or CvMatND data reference counter and returns the new counter value if +the reference counter pointer is not NULL, otherwise it returns zero. +@param arr Array header + */ +CV_INLINE int cvIncRefData( CvArr* arr ) +{ + int refcount = 0; + if( CV_IS_MAT( arr )) + { + CvMat* mat = (CvMat*)arr; + if( mat->refcount != NULL ) + refcount = ++*mat->refcount; + } + else if( CV_IS_MATND( arr )) + { + CvMatND* mat = (CvMatND*)arr; + if( mat->refcount != NULL ) + refcount = ++*mat->refcount; + } + return refcount; +} + + +/** Creates an exact copy of the input matrix (except, may be, step value) */ +CVAPI(CvMat*) cvCloneMat( const CvMat* mat ); + + +/** @brief Returns matrix header corresponding to the rectangular sub-array of input image or matrix. + +The function returns header, corresponding to a specified rectangle of the input array. In other + +words, it allows the user to treat a rectangular part of input array as a stand-alone array. ROI is +taken into account by the function so the sub-array of ROI is actually extracted. +@param arr Input array +@param submat Pointer to the resultant sub-array header +@param rect Zero-based coordinates of the rectangle of interest + */ +CVAPI(CvMat*) cvGetSubRect( const CvArr* arr, CvMat* submat, CvRect rect ); +#define cvGetSubArr cvGetSubRect + +/** @brief Returns array row or row span. + +The function returns the header, corresponding to a specified row/row span of the input array. +cvGetRow(arr, submat, row) is a shortcut for cvGetRows(arr, submat, row, row+1). +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param start_row Zero-based index of the starting row (inclusive) of the span +@param end_row Zero-based index of the ending row (exclusive) of the span +@param delta_row Index step in the row span. That is, the function extracts every delta_row -th +row from start_row and up to (but not including) end_row . + */ +CVAPI(CvMat*) cvGetRows( const CvArr* arr, CvMat* submat, + int start_row, int end_row, + int delta_row CV_DEFAULT(1)); + +/** @overload +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param row Zero-based index of the selected row +*/ +CV_INLINE CvMat* cvGetRow( const CvArr* arr, CvMat* submat, int row ) +{ + return cvGetRows( arr, submat, row, row + 1, 1 ); +} + + +/** @brief Returns one of more array columns. + +The function returns the header, corresponding to a specified column span of the input array. That + +is, no data is copied. Therefore, any modifications of the submatrix will affect the original array. +If you need to copy the columns, use cvCloneMat. cvGetCol(arr, submat, col) is a shortcut for +cvGetCols(arr, submat, col, col+1). +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param start_col Zero-based index of the starting column (inclusive) of the span +@param end_col Zero-based index of the ending column (exclusive) of the span + */ +CVAPI(CvMat*) cvGetCols( const CvArr* arr, CvMat* submat, + int start_col, int end_col ); + +/** @overload +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param col Zero-based index of the selected column +*/ +CV_INLINE CvMat* cvGetCol( const CvArr* arr, CvMat* submat, int col ) +{ + return cvGetCols( arr, submat, col, col + 1 ); +} + +/** @brief Returns one of array diagonals. + +The function returns the header, corresponding to a specified diagonal of the input array. +@param arr Input array +@param submat Pointer to the resulting sub-array header +@param diag Index of the array diagonal. Zero value corresponds to the main diagonal, -1 +corresponds to the diagonal above the main, 1 corresponds to the diagonal below the main, and so +forth. + */ +CVAPI(CvMat*) cvGetDiag( const CvArr* arr, CvMat* submat, + int diag CV_DEFAULT(0)); + +/** low-level scalar <-> raw data conversion functions */ +CVAPI(void) cvScalarToRawData( const CvScalar* scalar, void* data, int type, + int extend_to_12 CV_DEFAULT(0) ); + +CVAPI(void) cvRawDataToScalar( const void* data, int type, CvScalar* scalar ); + +/** @brief Creates a new matrix header but does not allocate the matrix data. + +The function allocates a header for a multi-dimensional dense array. The array data can further be +allocated using cvCreateData or set explicitly to user-allocated data via cvSetData. +@param dims Number of array dimensions +@param sizes Array of dimension sizes +@param type Type of array elements, see cvCreateMat + */ +CVAPI(CvMatND*) cvCreateMatNDHeader( int dims, const int* sizes, int type ); + +/** @brief Creates the header and allocates the data for a multi-dimensional dense array. + +This function call is equivalent to the following code: +@code + CvMatND* mat = cvCreateMatNDHeader(dims, sizes, type); + cvCreateData(mat); +@endcode +@param dims Number of array dimensions. This must not exceed CV_MAX_DIM (32 by default, but can be +changed at build time). +@param sizes Array of dimension sizes. +@param type Type of array elements, see cvCreateMat . + */ +CVAPI(CvMatND*) cvCreateMatND( int dims, const int* sizes, int type ); + +/** @brief Initializes a pre-allocated multi-dimensional array header. + +@param mat A pointer to the array header to be initialized +@param dims The number of array dimensions +@param sizes An array of dimension sizes +@param type Type of array elements, see cvCreateMat +@param data Optional data pointer assigned to the matrix header + */ +CVAPI(CvMatND*) cvInitMatNDHeader( CvMatND* mat, int dims, const int* sizes, + int type, void* data CV_DEFAULT(NULL) ); + +/** @brief Deallocates a multi-dimensional array. + +The function decrements the array data reference counter and releases the array header. If the +reference counter reaches 0, it also deallocates the data. : +@code + if(*mat ) + cvDecRefData(*mat); + cvFree((void**)mat); +@endcode +@param mat Double pointer to the array + */ +CV_INLINE void cvReleaseMatND( CvMatND** mat ) +{ + cvReleaseMat( (CvMat**)mat ); +} + +/** Creates a copy of CvMatND (except, may be, steps) */ +CVAPI(CvMatND*) cvCloneMatND( const CvMatND* mat ); + +/** @brief Creates sparse array. + +The function allocates a multi-dimensional sparse array. Initially the array contain no elements, +that is PtrND and other related functions will return 0 for every index. +@param dims Number of array dimensions. In contrast to the dense matrix, the number of dimensions is +practically unlimited (up to \f$2^{16}\f$ ). +@param sizes Array of dimension sizes +@param type Type of array elements. The same as for CvMat + */ +CVAPI(CvSparseMat*) cvCreateSparseMat( int dims, const int* sizes, int type ); + +/** @brief Deallocates sparse array. + +The function releases the sparse array and clears the array pointer upon exit. +@param mat Double pointer to the array + */ +CVAPI(void) cvReleaseSparseMat( CvSparseMat** mat ); + +/** Creates a copy of CvSparseMat (except, may be, zero items) */ +CVAPI(CvSparseMat*) cvCloneSparseMat( const CvSparseMat* mat ); + +/** @brief Initializes sparse array elements iterator. + +The function initializes iterator of sparse array elements and returns pointer to the first element, +or NULL if the array is empty. +@param mat Input array +@param mat_iterator Initialized iterator + */ +CVAPI(CvSparseNode*) cvInitSparseMatIterator( const CvSparseMat* mat, + CvSparseMatIterator* mat_iterator ); + +/** @brief Returns the next sparse matrix element + +The function moves iterator to the next sparse matrix element and returns pointer to it. In the +current version there is no any particular order of the elements, because they are stored in the +hash table. The sample below demonstrates how to iterate through the sparse matrix: +@code + // print all the non-zero sparse matrix elements and compute their sum + double sum = 0; + int i, dims = cvGetDims(sparsemat); + CvSparseMatIterator it; + CvSparseNode* node = cvInitSparseMatIterator(sparsemat, &it); + + for(; node != 0; node = cvGetNextSparseNode(&it)) + { + int* idx = CV_NODE_IDX(array, node); + float val = *(float*)CV_NODE_VAL(array, node); + printf("M"); + for(i = 0; i < dims; i++ ) + printf("[%d]", idx[i]); + printf("=%g\n", val); + + sum += val; + } + + printf("nTotal sum = %g\n", sum); +@endcode +@param mat_iterator Sparse array iterator + */ +CV_INLINE CvSparseNode* cvGetNextSparseNode( CvSparseMatIterator* mat_iterator ) +{ + if( mat_iterator->node->next ) + return mat_iterator->node = mat_iterator->node->next; + else + { + int idx; + for( idx = ++mat_iterator->curidx; idx < mat_iterator->mat->hashsize; idx++ ) + { + CvSparseNode* node = (CvSparseNode*)mat_iterator->mat->hashtable[idx]; + if( node ) + { + mat_iterator->curidx = idx; + return mat_iterator->node = node; + } + } + return NULL; + } +} + + +#define CV_MAX_ARR 10 + +/** matrix iterator: used for n-ary operations on dense arrays */ +typedef struct CvNArrayIterator +{ + int count; /**< number of arrays */ + int dims; /**< number of dimensions to iterate */ + CvSize size; /**< maximal common linear size: { width = size, height = 1 } */ + uchar* ptr[CV_MAX_ARR]; /**< pointers to the array slices */ + int stack[CV_MAX_DIM]; /**< for internal use */ + CvMatND* hdr[CV_MAX_ARR]; /**< pointers to the headers of the + matrices that are processed */ +} +CvNArrayIterator; + +#define CV_NO_DEPTH_CHECK 1 +#define CV_NO_CN_CHECK 2 +#define CV_NO_SIZE_CHECK 4 + +/** initializes iterator that traverses through several arrays simultaneously + (the function together with cvNextArraySlice is used for + N-ari element-wise operations) */ +CVAPI(int) cvInitNArrayIterator( int count, CvArr** arrs, + const CvArr* mask, CvMatND* stubs, + CvNArrayIterator* array_iterator, + int flags CV_DEFAULT(0) ); + +/** returns zero value if iteration is finished, non-zero (slice length) otherwise */ +CVAPI(int) cvNextNArraySlice( CvNArrayIterator* array_iterator ); + + +/** @brief Returns type of array elements. + +The function returns type of the array elements. In the case of IplImage the type is converted to +CvMat-like representation. For example, if the image has been created as: +@code + IplImage* img = cvCreateImage(cvSize(640, 480), IPL_DEPTH_8U, 3); +@endcode +The code cvGetElemType(img) will return CV_8UC3. +@param arr Input array + */ +CVAPI(int) cvGetElemType( const CvArr* arr ); + +/** @brief Return number of array dimensions + +The function returns the array dimensionality and the array of dimension sizes. In the case of +IplImage or CvMat it always returns 2 regardless of number of image/matrix rows. For example, the +following code calculates total number of array elements: +@code + int sizes[CV_MAX_DIM]; + int i, total = 1; + int dims = cvGetDims(arr, size); + for(i = 0; i < dims; i++ ) + total *= sizes[i]; +@endcode +@param arr Input array +@param sizes Optional output vector of the array dimension sizes. For 2d arrays the number of rows +(height) goes first, number of columns (width) next. + */ +CVAPI(int) cvGetDims( const CvArr* arr, int* sizes CV_DEFAULT(NULL) ); + + +/** @brief Returns array size along the specified dimension. + +@param arr Input array +@param index Zero-based dimension index (for matrices 0 means number of rows, 1 means number of +columns; for images 0 means height, 1 means width) + */ +CVAPI(int) cvGetDimSize( const CvArr* arr, int index ); + + +/** @brief Return pointer to a particular array element. + +The functions return a pointer to a specific array element. Number of array dimension should match +to the number of indices passed to the function except for cvPtr1D function that can be used for +sequential access to 1D, 2D or nD dense arrays. + +The functions can be used for sparse arrays as well - if the requested node does not exist they +create it and set it to zero. + +All these as well as other functions accessing array elements ( cvGetND , cvGetRealND , cvSet +, cvSetND , cvSetRealND ) raise an error in case if the element index is out of range. +@param arr Input array +@param idx0 The first zero-based component of the element index +@param type Optional output parameter: type of matrix elements + */ +CVAPI(uchar*) cvPtr1D( const CvArr* arr, int idx0, int* type CV_DEFAULT(NULL)); +/** @overload */ +CVAPI(uchar*) cvPtr2D( const CvArr* arr, int idx0, int idx1, int* type CV_DEFAULT(NULL) ); +/** @overload */ +CVAPI(uchar*) cvPtr3D( const CvArr* arr, int idx0, int idx1, int idx2, + int* type CV_DEFAULT(NULL)); +/** @overload +@param arr Input array +@param idx Array of the element indices +@param type Optional output parameter: type of matrix elements +@param create_node Optional input parameter for sparse matrices. Non-zero value of the parameter +means that the requested element is created if it does not exist already. +@param precalc_hashval Optional input parameter for sparse matrices. If the pointer is not NULL, +the function does not recalculate the node hash value, but takes it from the specified location. +It is useful for speeding up pair-wise operations (TODO: provide an example) +*/ +CVAPI(uchar*) cvPtrND( const CvArr* arr, const int* idx, int* type CV_DEFAULT(NULL), + int create_node CV_DEFAULT(1), + unsigned* precalc_hashval CV_DEFAULT(NULL)); + +/** @brief Return a specific array element. + +The functions return a specific array element. In the case of a sparse array the functions return 0 +if the requested node does not exist (no new node is created by the functions). +@param arr Input array +@param idx0 The first zero-based component of the element index + */ +CVAPI(CvScalar) cvGet1D( const CvArr* arr, int idx0 ); +/** @overload */ +CVAPI(CvScalar) cvGet2D( const CvArr* arr, int idx0, int idx1 ); +/** @overload */ +CVAPI(CvScalar) cvGet3D( const CvArr* arr, int idx0, int idx1, int idx2 ); +/** @overload +@param arr Input array +@param idx Array of the element indices +*/ +CVAPI(CvScalar) cvGetND( const CvArr* arr, const int* idx ); + +/** @brief Return a specific element of single-channel 1D, 2D, 3D or nD array. + +Returns a specific element of a single-channel array. If the array has multiple channels, a runtime +error is raised. Note that Get?D functions can be used safely for both single-channel and +multiple-channel arrays though they are a bit slower. + +In the case of a sparse array the functions return 0 if the requested node does not exist (no new +node is created by the functions). +@param arr Input array. Must have a single channel. +@param idx0 The first zero-based component of the element index + */ +CVAPI(double) cvGetReal1D( const CvArr* arr, int idx0 ); +/** @overload */ +CVAPI(double) cvGetReal2D( const CvArr* arr, int idx0, int idx1 ); +/** @overload */ +CVAPI(double) cvGetReal3D( const CvArr* arr, int idx0, int idx1, int idx2 ); +/** @overload +@param arr Input array. Must have a single channel. +@param idx Array of the element indices +*/ +CVAPI(double) cvGetRealND( const CvArr* arr, const int* idx ); + +/** @brief Change the particular array element. + +The functions assign the new value to a particular array element. In the case of a sparse array the +functions create the node if it does not exist yet. +@param arr Input array +@param idx0 The first zero-based component of the element index +@param value The assigned value + */ +CVAPI(void) cvSet1D( CvArr* arr, int idx0, CvScalar value ); +/** @overload */ +CVAPI(void) cvSet2D( CvArr* arr, int idx0, int idx1, CvScalar value ); +/** @overload */ +CVAPI(void) cvSet3D( CvArr* arr, int idx0, int idx1, int idx2, CvScalar value ); +/** @overload +@param arr Input array +@param idx Array of the element indices +@param value The assigned value +*/ +CVAPI(void) cvSetND( CvArr* arr, const int* idx, CvScalar value ); + +/** @brief Change a specific array element. + +The functions assign a new value to a specific element of a single-channel array. If the array has +multiple channels, a runtime error is raised. Note that the Set\*D function can be used safely for +both single-channel and multiple-channel arrays, though they are a bit slower. + +In the case of a sparse array the functions create the node if it does not yet exist. +@param arr Input array +@param idx0 The first zero-based component of the element index +@param value The assigned value + */ +CVAPI(void) cvSetReal1D( CvArr* arr, int idx0, double value ); +/** @overload */ +CVAPI(void) cvSetReal2D( CvArr* arr, int idx0, int idx1, double value ); +/** @overload */ +CVAPI(void) cvSetReal3D( CvArr* arr, int idx0, + int idx1, int idx2, double value ); +/** @overload +@param arr Input array +@param idx Array of the element indices +@param value The assigned value +*/ +CVAPI(void) cvSetRealND( CvArr* arr, const int* idx, double value ); + +/** clears element of ND dense array, + in case of sparse arrays it deletes the specified node */ +CVAPI(void) cvClearND( CvArr* arr, const int* idx ); + +/** @brief Returns matrix header for arbitrary array. + +The function returns a matrix header for the input array that can be a matrix - CvMat, an image - +IplImage, or a multi-dimensional dense array - CvMatND (the third option is allowed only if +allowND != 0) . In the case of matrix the function simply returns the input pointer. In the case of +IplImage\* or CvMatND it initializes the header structure with parameters of the current image ROI +and returns &header. Because COI is not supported by CvMat, it is returned separately. + +The function provides an easy way to handle both types of arrays - IplImage and CvMat using the same +code. Input array must have non-zero data pointer, otherwise the function will report an error. + +@note If the input array is IplImage with planar data layout and COI set, the function returns the +pointer to the selected plane and COI == 0. This feature allows user to process IplImage structures +with planar data layout, even though OpenCV does not support such images. +@param arr Input array +@param header Pointer to CvMat structure used as a temporary buffer +@param coi Optional output parameter for storing COI +@param allowND If non-zero, the function accepts multi-dimensional dense arrays (CvMatND\*) and +returns 2D matrix (if CvMatND has two dimensions) or 1D matrix (when CvMatND has 1 dimension or +more than 2 dimensions). The CvMatND array must be continuous. +@sa cvGetImage, cvarrToMat. + */ +CVAPI(CvMat*) cvGetMat( const CvArr* arr, CvMat* header, + int* coi CV_DEFAULT(NULL), + int allowND CV_DEFAULT(0)); + +/** @brief Returns image header for arbitrary array. + +The function returns the image header for the input array that can be a matrix (CvMat) or image +(IplImage). In the case of an image the function simply returns the input pointer. In the case of +CvMat it initializes an image_header structure with the parameters of the input matrix. Note that +if we transform IplImage to CvMat using cvGetMat and then transform CvMat back to IplImage using +this function, we will get different headers if the ROI is set in the original image. +@param arr Input array +@param image_header Pointer to IplImage structure used as a temporary buffer + */ +CVAPI(IplImage*) cvGetImage( const CvArr* arr, IplImage* image_header ); + + +/** @brief Changes the shape of a multi-dimensional array without copying the data. + +The function is an advanced version of cvReshape that can work with multi-dimensional arrays as +well (though it can work with ordinary images and matrices) and change the number of dimensions. + +Below are the two samples from the cvReshape description rewritten using cvReshapeMatND: +@code + IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3); + IplImage gray_img_hdr, *gray_img; + gray_img = (IplImage*)cvReshapeMatND(color_img, sizeof(gray_img_hdr), &gray_img_hdr, 1, 0, 0); + ... + int size[] = { 2, 2, 2 }; + CvMatND* mat = cvCreateMatND(3, size, CV_32F); + CvMat row_header, *row; + row = (CvMat*)cvReshapeMatND(mat, sizeof(row_header), &row_header, 0, 1, 0); +@endcode +In C, the header file for this function includes a convenient macro cvReshapeND that does away with +the sizeof_header parameter. So, the lines containing the call to cvReshapeMatND in the examples +may be replaced as follow: +@code + gray_img = (IplImage*)cvReshapeND(color_img, &gray_img_hdr, 1, 0, 0); + ... + row = (CvMat*)cvReshapeND(mat, &row_header, 0, 1, 0); +@endcode +@param arr Input array +@param sizeof_header Size of output header to distinguish between IplImage, CvMat and CvMatND +output headers +@param header Output header to be filled +@param new_cn New number of channels. new_cn = 0 means that the number of channels remains +unchanged. +@param new_dims New number of dimensions. new_dims = 0 means that the number of dimensions +remains the same. +@param new_sizes Array of new dimension sizes. Only new_dims-1 values are used, because the +total number of elements must remain the same. Thus, if new_dims = 1, new_sizes array is not +used. + */ +CVAPI(CvArr*) cvReshapeMatND( const CvArr* arr, + int sizeof_header, CvArr* header, + int new_cn, int new_dims, int* new_sizes ); + +#define cvReshapeND( arr, header, new_cn, new_dims, new_sizes ) \ + cvReshapeMatND( (arr), sizeof(*(header)), (header), \ + (new_cn), (new_dims), (new_sizes)) + +/** @brief Changes shape of matrix/image without copying data. + +The function initializes the CvMat header so that it points to the same data as the original array +but has a different shape - different number of channels, different number of rows, or both. + +The following example code creates one image buffer and two image headers, the first is for a +320x240x3 image and the second is for a 960x240x1 image: +@code + IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3); + CvMat gray_mat_hdr; + IplImage gray_img_hdr, *gray_img; + cvReshape(color_img, &gray_mat_hdr, 1); + gray_img = cvGetImage(&gray_mat_hdr, &gray_img_hdr); +@endcode +And the next example converts a 3x3 matrix to a single 1x9 vector: +@code + CvMat* mat = cvCreateMat(3, 3, CV_32F); + CvMat row_header, *row; + row = cvReshape(mat, &row_header, 0, 1); +@endcode +@param arr Input array +@param header Output header to be filled +@param new_cn New number of channels. 'new_cn = 0' means that the number of channels remains +unchanged. +@param new_rows New number of rows. 'new_rows = 0' means that the number of rows remains +unchanged unless it needs to be changed according to new_cn value. +*/ +CVAPI(CvMat*) cvReshape( const CvArr* arr, CvMat* header, + int new_cn, int new_rows CV_DEFAULT(0) ); + +/** Repeats source 2d array several times in both horizontal and + vertical direction to fill destination array */ +CVAPI(void) cvRepeat( const CvArr* src, CvArr* dst ); + +/** @brief Allocates array data + +The function allocates image, matrix or multi-dimensional dense array data. Note that in the case of +matrix types OpenCV allocation functions are used. In the case of IplImage they are used unless +CV_TURN_ON_IPL_COMPATIBILITY() has been called before. In the latter case IPL functions are used +to allocate the data. +@param arr Array header + */ +CVAPI(void) cvCreateData( CvArr* arr ); + +/** @brief Releases array data. + +The function releases the array data. In the case of CvMat or CvMatND it simply calls +cvDecRefData(), that is the function can not deallocate external data. See also the note to +cvCreateData . +@param arr Array header + */ +CVAPI(void) cvReleaseData( CvArr* arr ); + +/** @brief Assigns user data to the array header. + +The function assigns user data to the array header. Header should be initialized before using +cvCreateMatHeader, cvCreateImageHeader, cvCreateMatNDHeader, cvInitMatHeader, +cvInitImageHeader or cvInitMatNDHeader. +@param arr Array header +@param data User data +@param step Full row length in bytes + */ +CVAPI(void) cvSetData( CvArr* arr, void* data, int step ); + +/** @brief Retrieves low-level information about the array. + +The function fills output variables with low-level information about the array data. All output + +parameters are optional, so some of the pointers may be set to NULL. If the array is IplImage with +ROI set, the parameters of ROI are returned. + +The following example shows how to get access to array elements. It computes absolute values of the +array elements : +@code + float* data; + int step; + CvSize size; + + cvGetRawData(array, (uchar**)&data, &step, &size); + step /= sizeof(data[0]); + + for(int y = 0; y < size.height; y++, data += step ) + for(int x = 0; x < size.width; x++ ) + data[x] = (float)fabs(data[x]); +@endcode +@param arr Array header +@param data Output pointer to the whole image origin or ROI origin if ROI is set +@param step Output full row length in bytes +@param roi_size Output ROI size + */ +CVAPI(void) cvGetRawData( const CvArr* arr, uchar** data, + int* step CV_DEFAULT(NULL), + CvSize* roi_size CV_DEFAULT(NULL)); + +/** @brief Returns size of matrix or image ROI. + +The function returns number of rows (CvSize::height) and number of columns (CvSize::width) of the +input matrix or image. In the case of image the size of ROI is returned. +@param arr array header + */ +CVAPI(CvSize) cvGetSize( const CvArr* arr ); + +/** @brief Copies one array to another. + +The function copies selected elements from an input array to an output array: + +\f[\texttt{dst} (I)= \texttt{src} (I) \quad \text{if} \quad \texttt{mask} (I) \ne 0.\f] + +If any of the passed arrays is of IplImage type, then its ROI and COI fields are used. Both arrays +must have the same type, the same number of dimensions, and the same size. The function can also +copy sparse arrays (mask is not supported in this case). +@param src The source array +@param dst The destination array +@param mask Operation mask, 8-bit single channel array; specifies elements of the destination array +to be changed + */ +CVAPI(void) cvCopy( const CvArr* src, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Sets every element of an array to a given value. + +The function copies the scalar value to every selected element of the destination array: +\f[\texttt{arr} (I)= \texttt{value} \quad \text{if} \quad \texttt{mask} (I) \ne 0\f] +If array arr is of IplImage type, then is ROI used, but COI must not be set. +@param arr The destination array +@param value Fill value +@param mask Operation mask, 8-bit single channel array; specifies elements of the destination +array to be changed + */ +CVAPI(void) cvSet( CvArr* arr, CvScalar value, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Clears the array. + +The function clears the array. In the case of dense arrays (CvMat, CvMatND or IplImage), +cvZero(array) is equivalent to cvSet(array,cvScalarAll(0),0). In the case of sparse arrays all the +elements are removed. +@param arr Array to be cleared + */ +CVAPI(void) cvSetZero( CvArr* arr ); +#define cvZero cvSetZero + + +/** Splits a multi-channel array into the set of single-channel arrays or + extracts particular [color] plane */ +CVAPI(void) cvSplit( const CvArr* src, CvArr* dst0, CvArr* dst1, + CvArr* dst2, CvArr* dst3 ); + +/** Merges a set of single-channel arrays into the single multi-channel array + or inserts one particular [color] plane to the array */ +CVAPI(void) cvMerge( const CvArr* src0, const CvArr* src1, + const CvArr* src2, const CvArr* src3, + CvArr* dst ); + +/** Copies several channels from input arrays to + certain channels of output arrays */ +CVAPI(void) cvMixChannels( const CvArr** src, int src_count, + CvArr** dst, int dst_count, + const int* from_to, int pair_count ); + +/** @brief Converts one array to another with optional linear transformation. + +The function has several different purposes, and thus has several different names. It copies one +array to another with optional scaling, which is performed first, and/or optional type conversion, +performed after: + +\f[\texttt{dst} (I) = \texttt{scale} \texttt{src} (I) + ( \texttt{shift} _0, \texttt{shift} _1,...)\f] + +All the channels of multi-channel arrays are processed independently. + +The type of conversion is done with rounding and saturation, that is if the result of scaling + +conversion can not be represented exactly by a value of the destination array element type, it is +set to the nearest representable value on the real axis. +@param src Source array +@param dst Destination array +@param scale Scale factor +@param shift Value added to the scaled source array elements + */ +CVAPI(void) cvConvertScale( const CvArr* src, CvArr* dst, + double scale CV_DEFAULT(1), + double shift CV_DEFAULT(0) ); +#define cvCvtScale cvConvertScale +#define cvScale cvConvertScale +#define cvConvert( src, dst ) cvConvertScale( (src), (dst), 1, 0 ) + + +/** Performs linear transformation on every source array element, + stores absolute value of the result: + dst(x,y,c) = abs(scale*src(x,y,c)+shift). + destination array must have 8u type. + In other cases one may use cvConvertScale + cvAbsDiffS */ +CVAPI(void) cvConvertScaleAbs( const CvArr* src, CvArr* dst, + double scale CV_DEFAULT(1), + double shift CV_DEFAULT(0) ); +#define cvCvtScaleAbs cvConvertScaleAbs + + +/** checks termination criteria validity and + sets eps to default_eps (if it is not set), + max_iter to default_max_iters (if it is not set) +*/ +CVAPI(CvTermCriteria) cvCheckTermCriteria( CvTermCriteria criteria, + double default_eps, + int default_max_iters ); + +/****************************************************************************************\ +* Arithmetic, logic and comparison operations * +\****************************************************************************************/ + +/** dst(mask) = src1(mask) + src2(mask) */ +CVAPI(void) cvAdd( const CvArr* src1, const CvArr* src2, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(mask) = src(mask) + value */ +CVAPI(void) cvAddS( const CvArr* src, CvScalar value, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(mask) = src1(mask) - src2(mask) */ +CVAPI(void) cvSub( const CvArr* src1, const CvArr* src2, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(mask) = src(mask) - value = src(mask) + (-value) */ +CV_INLINE void cvSubS( const CvArr* src, CvScalar value, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)) +{ + cvAddS( src, cvScalar( -value.val[0], -value.val[1], -value.val[2], -value.val[3]), + dst, mask ); +} + +/** dst(mask) = value - src(mask) */ +CVAPI(void) cvSubRS( const CvArr* src, CvScalar value, CvArr* dst, + const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src1(idx) * src2(idx) * scale + (scaled element-wise multiplication of 2 arrays) */ +CVAPI(void) cvMul( const CvArr* src1, const CvArr* src2, + CvArr* dst, double scale CV_DEFAULT(1) ); + +/** element-wise division/inversion with scaling: + dst(idx) = src1(idx) * scale / src2(idx) + or dst(idx) = scale / src2(idx) if src1 == 0 */ +CVAPI(void) cvDiv( const CvArr* src1, const CvArr* src2, + CvArr* dst, double scale CV_DEFAULT(1)); + +/** dst = src1 * scale + src2 */ +CVAPI(void) cvScaleAdd( const CvArr* src1, CvScalar scale, + const CvArr* src2, CvArr* dst ); +#define cvAXPY( A, real_scalar, B, C ) cvScaleAdd(A, cvRealScalar(real_scalar), B, C) + +/** dst = src1 * alpha + src2 * beta + gamma */ +CVAPI(void) cvAddWeighted( const CvArr* src1, double alpha, + const CvArr* src2, double beta, + double gamma, CvArr* dst ); + +/** @brief Calculates the dot product of two arrays in Euclidean metrics. + +The function calculates and returns the Euclidean dot product of two arrays. + +\f[src1 \bullet src2 = \sum _I ( \texttt{src1} (I) \texttt{src2} (I))\f] + +In the case of multiple channel arrays, the results for all channels are accumulated. In particular, +cvDotProduct(a,a) where a is a complex vector, will return \f$||\texttt{a}||^2\f$. The function can +process multi-dimensional arrays, row by row, layer by layer, and so on. +@param src1 The first source array +@param src2 The second source array + */ +CVAPI(double) cvDotProduct( const CvArr* src1, const CvArr* src2 ); + +/** dst(idx) = src1(idx) & src2(idx) */ +CVAPI(void) cvAnd( const CvArr* src1, const CvArr* src2, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src(idx) & value */ +CVAPI(void) cvAndS( const CvArr* src, CvScalar value, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src1(idx) | src2(idx) */ +CVAPI(void) cvOr( const CvArr* src1, const CvArr* src2, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src(idx) | value */ +CVAPI(void) cvOrS( const CvArr* src, CvScalar value, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src1(idx) ^ src2(idx) */ +CVAPI(void) cvXor( const CvArr* src1, const CvArr* src2, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = src(idx) ^ value */ +CVAPI(void) cvXorS( const CvArr* src, CvScalar value, + CvArr* dst, const CvArr* mask CV_DEFAULT(NULL)); + +/** dst(idx) = ~src(idx) */ +CVAPI(void) cvNot( const CvArr* src, CvArr* dst ); + +/** dst(idx) = lower(idx) <= src(idx) < upper(idx) */ +CVAPI(void) cvInRange( const CvArr* src, const CvArr* lower, + const CvArr* upper, CvArr* dst ); + +/** dst(idx) = lower <= src(idx) < upper */ +CVAPI(void) cvInRangeS( const CvArr* src, CvScalar lower, + CvScalar upper, CvArr* dst ); + +#define CV_CMP_EQ 0 +#define CV_CMP_GT 1 +#define CV_CMP_GE 2 +#define CV_CMP_LT 3 +#define CV_CMP_LE 4 +#define CV_CMP_NE 5 + +/** The comparison operation support single-channel arrays only. + Destination image should be 8uC1 or 8sC1 */ + +/** dst(idx) = src1(idx) _cmp_op_ src2(idx) */ +CVAPI(void) cvCmp( const CvArr* src1, const CvArr* src2, CvArr* dst, int cmp_op ); + +/** dst(idx) = src1(idx) _cmp_op_ value */ +CVAPI(void) cvCmpS( const CvArr* src, double value, CvArr* dst, int cmp_op ); + +/** dst(idx) = min(src1(idx),src2(idx)) */ +CVAPI(void) cvMin( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** dst(idx) = max(src1(idx),src2(idx)) */ +CVAPI(void) cvMax( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** dst(idx) = min(src(idx),value) */ +CVAPI(void) cvMinS( const CvArr* src, double value, CvArr* dst ); + +/** dst(idx) = max(src(idx),value) */ +CVAPI(void) cvMaxS( const CvArr* src, double value, CvArr* dst ); + +/** dst(x,y,c) = abs(src1(x,y,c) - src2(x,y,c)) */ +CVAPI(void) cvAbsDiff( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** dst(x,y,c) = abs(src(x,y,c) - value(c)) */ +CVAPI(void) cvAbsDiffS( const CvArr* src, CvArr* dst, CvScalar value ); +#define cvAbs( src, dst ) cvAbsDiffS( (src), (dst), cvScalarAll(0)) + +/****************************************************************************************\ +* Math operations * +\****************************************************************************************/ + +/** Does cartesian->polar coordinates conversion. + Either of output components (magnitude or angle) is optional */ +CVAPI(void) cvCartToPolar( const CvArr* x, const CvArr* y, + CvArr* magnitude, CvArr* angle CV_DEFAULT(NULL), + int angle_in_degrees CV_DEFAULT(0)); + +/** Does polar->cartesian coordinates conversion. + Either of output components (magnitude or angle) is optional. + If magnitude is missing it is assumed to be all 1's */ +CVAPI(void) cvPolarToCart( const CvArr* magnitude, const CvArr* angle, + CvArr* x, CvArr* y, + int angle_in_degrees CV_DEFAULT(0)); + +/** Does powering: dst(idx) = src(idx)^power */ +CVAPI(void) cvPow( const CvArr* src, CvArr* dst, double power ); + +/** Does exponention: dst(idx) = exp(src(idx)). + Overflow is not handled yet. Underflow is handled. + Maximal relative error is ~7e-6 for single-precision input */ +CVAPI(void) cvExp( const CvArr* src, CvArr* dst ); + +/** Calculates natural logarithms: dst(idx) = log(abs(src(idx))). + Logarithm of 0 gives large negative number(~-700) + Maximal relative error is ~3e-7 for single-precision output +*/ +CVAPI(void) cvLog( const CvArr* src, CvArr* dst ); + +/** Fast arctangent calculation */ +CVAPI(float) cvFastArctan( float y, float x ); + +/** Fast cubic root calculation */ +CVAPI(float) cvCbrt( float value ); + +#define CV_CHECK_RANGE 1 +#define CV_CHECK_QUIET 2 +/** Checks array values for NaNs, Infs or simply for too large numbers + (if CV_CHECK_RANGE is set). If CV_CHECK_QUIET is set, + no runtime errors is raised (function returns zero value in case of "bad" values). + Otherwise cvError is called */ +CVAPI(int) cvCheckArr( const CvArr* arr, int flags CV_DEFAULT(0), + double min_val CV_DEFAULT(0), double max_val CV_DEFAULT(0)); +#define cvCheckArray cvCheckArr + +#define CV_RAND_UNI 0 +#define CV_RAND_NORMAL 1 + +/** @brief Fills an array with random numbers and updates the RNG state. + +The function fills the destination array with uniformly or normally distributed random numbers. +@param rng CvRNG state initialized by cvRNG +@param arr The destination array +@param dist_type Distribution type +> - **CV_RAND_UNI** uniform distribution +> - **CV_RAND_NORMAL** normal or Gaussian distribution +@param param1 The first parameter of the distribution. In the case of a uniform distribution it is +the inclusive lower boundary of the random numbers range. In the case of a normal distribution it +is the mean value of the random numbers. +@param param2 The second parameter of the distribution. In the case of a uniform distribution it +is the exclusive upper boundary of the random numbers range. In the case of a normal distribution +it is the standard deviation of the random numbers. +@sa randu, randn, RNG::fill. + */ +CVAPI(void) cvRandArr( CvRNG* rng, CvArr* arr, int dist_type, + CvScalar param1, CvScalar param2 ); + +CVAPI(void) cvRandShuffle( CvArr* mat, CvRNG* rng, + double iter_factor CV_DEFAULT(1.)); + +#define CV_SORT_EVERY_ROW 0 +#define CV_SORT_EVERY_COLUMN 1 +#define CV_SORT_ASCENDING 0 +#define CV_SORT_DESCENDING 16 + +CVAPI(void) cvSort( const CvArr* src, CvArr* dst CV_DEFAULT(NULL), + CvArr* idxmat CV_DEFAULT(NULL), + int flags CV_DEFAULT(0)); + +/** Finds real roots of a cubic equation */ +CVAPI(int) cvSolveCubic( const CvMat* coeffs, CvMat* roots ); + +/** Finds all real and complex roots of a polynomial equation */ +CVAPI(void) cvSolvePoly(const CvMat* coeffs, CvMat *roots2, + int maxiter CV_DEFAULT(20), int fig CV_DEFAULT(100)); + +/****************************************************************************************\ +* Matrix operations * +\****************************************************************************************/ + +/** @brief Calculates the cross product of two 3D vectors. + +The function calculates the cross product of two 3D vectors: +\f[\texttt{dst} = \texttt{src1} \times \texttt{src2}\f] +or: +\f[\begin{array}{l} \texttt{dst} _1 = \texttt{src1} _2 \texttt{src2} _3 - \texttt{src1} _3 \texttt{src2} _2 \\ \texttt{dst} _2 = \texttt{src1} _3 \texttt{src2} _1 - \texttt{src1} _1 \texttt{src2} _3 \\ \texttt{dst} _3 = \texttt{src1} _1 \texttt{src2} _2 - \texttt{src1} _2 \texttt{src2} _1 \end{array}\f] +@param src1 The first source vector +@param src2 The second source vector +@param dst The destination vector + */ +CVAPI(void) cvCrossProduct( const CvArr* src1, const CvArr* src2, CvArr* dst ); + +/** Matrix transform: dst = A*B + C, C is optional */ +#define cvMatMulAdd( src1, src2, src3, dst ) cvGEMM( (src1), (src2), 1., (src3), 1., (dst), 0 ) +#define cvMatMul( src1, src2, dst ) cvMatMulAdd( (src1), (src2), NULL, (dst)) + +#define CV_GEMM_A_T 1 +#define CV_GEMM_B_T 2 +#define CV_GEMM_C_T 4 +/** Extended matrix transform: + dst = alpha*op(A)*op(B) + beta*op(C), where op(X) is X or X^T */ +CVAPI(void) cvGEMM( const CvArr* src1, const CvArr* src2, double alpha, + const CvArr* src3, double beta, CvArr* dst, + int tABC CV_DEFAULT(0)); +#define cvMatMulAddEx cvGEMM + +/** Transforms each element of source array and stores + resultant vectors in destination array */ +CVAPI(void) cvTransform( const CvArr* src, CvArr* dst, + const CvMat* transmat, + const CvMat* shiftvec CV_DEFAULT(NULL)); +#define cvMatMulAddS cvTransform + +/** Does perspective transform on every element of input array */ +CVAPI(void) cvPerspectiveTransform( const CvArr* src, CvArr* dst, + const CvMat* mat ); + +/** Calculates (A-delta)*(A-delta)^T (order=0) or (A-delta)^T*(A-delta) (order=1) */ +CVAPI(void) cvMulTransposed( const CvArr* src, CvArr* dst, int order, + const CvArr* delta CV_DEFAULT(NULL), + double scale CV_DEFAULT(1.) ); + +/** Transposes matrix. Square matrices can be transposed in-place */ +CVAPI(void) cvTranspose( const CvArr* src, CvArr* dst ); +#define cvT cvTranspose + +/** Completes the symmetric matrix from the lower (LtoR=0) or from the upper (LtoR!=0) part */ +CVAPI(void) cvCompleteSymm( CvMat* matrix, int LtoR CV_DEFAULT(0) ); + +/** Mirror array data around horizontal (flip=0), + vertical (flip=1) or both(flip=-1) axises: + cvFlip(src) flips images vertically and sequences horizontally (inplace) */ +CVAPI(void) cvFlip( const CvArr* src, CvArr* dst CV_DEFAULT(NULL), + int flip_mode CV_DEFAULT(0)); +#define cvMirror cvFlip + + +#define CV_SVD_MODIFY_A 1 +#define CV_SVD_U_T 2 +#define CV_SVD_V_T 4 + +/** Performs Singular Value Decomposition of a matrix */ +CVAPI(void) cvSVD( CvArr* A, CvArr* W, CvArr* U CV_DEFAULT(NULL), + CvArr* V CV_DEFAULT(NULL), int flags CV_DEFAULT(0)); + +/** Performs Singular Value Back Substitution (solves A*X = B): + flags must be the same as in cvSVD */ +CVAPI(void) cvSVBkSb( const CvArr* W, const CvArr* U, + const CvArr* V, const CvArr* B, + CvArr* X, int flags ); + +#define CV_LU 0 +#define CV_SVD 1 +#define CV_SVD_SYM 2 +#define CV_CHOLESKY 3 +#define CV_QR 4 +#define CV_NORMAL 16 + +/** Inverts matrix */ +CVAPI(double) cvInvert( const CvArr* src, CvArr* dst, + int method CV_DEFAULT(CV_LU)); +#define cvInv cvInvert + +/** Solves linear system (src1)*(dst) = (src2) + (returns 0 if src1 is a singular and CV_LU method is used) */ +CVAPI(int) cvSolve( const CvArr* src1, const CvArr* src2, CvArr* dst, + int method CV_DEFAULT(CV_LU)); + +/** Calculates determinant of input matrix */ +CVAPI(double) cvDet( const CvArr* mat ); + +/** Calculates trace of the matrix (sum of elements on the main diagonal) */ +CVAPI(CvScalar) cvTrace( const CvArr* mat ); + +/** Finds eigen values and vectors of a symmetric matrix */ +CVAPI(void) cvEigenVV( CvArr* mat, CvArr* evects, CvArr* evals, + double eps CV_DEFAULT(0), + int lowindex CV_DEFAULT(-1), + int highindex CV_DEFAULT(-1)); + +///* Finds selected eigen values and vectors of a symmetric matrix */ +//CVAPI(void) cvSelectedEigenVV( CvArr* mat, CvArr* evects, CvArr* evals, +// int lowindex, int highindex ); + +/** Makes an identity matrix (mat_ij = i == j) */ +CVAPI(void) cvSetIdentity( CvArr* mat, CvScalar value CV_DEFAULT(cvRealScalar(1)) ); + +/** Fills matrix with given range of numbers */ +CVAPI(CvArr*) cvRange( CvArr* mat, double start, double end ); + +/** @anchor core_c_CovarFlags +@name Flags for cvCalcCovarMatrix +@see cvCalcCovarMatrix + @{ +*/ + +/** flag for cvCalcCovarMatrix, transpose([v1-avg, v2-avg,...]) * [v1-avg,v2-avg,...] */ +#define CV_COVAR_SCRAMBLED 0 + +/** flag for cvCalcCovarMatrix, [v1-avg, v2-avg,...] * transpose([v1-avg,v2-avg,...]) */ +#define CV_COVAR_NORMAL 1 + +/** flag for cvCalcCovarMatrix, do not calc average (i.e. mean vector) - use the input vector instead + (useful for calculating covariance matrix by parts) */ +#define CV_COVAR_USE_AVG 2 + +/** flag for cvCalcCovarMatrix, scale the covariance matrix coefficients by number of the vectors */ +#define CV_COVAR_SCALE 4 + +/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its rows */ +#define CV_COVAR_ROWS 8 + +/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its columns */ +#define CV_COVAR_COLS 16 + +/** @} */ + +/** Calculates covariation matrix for a set of vectors +@see @ref core_c_CovarFlags "flags" +*/ +CVAPI(void) cvCalcCovarMatrix( const CvArr** vects, int count, + CvArr* cov_mat, CvArr* avg, int flags ); + +#define CV_PCA_DATA_AS_ROW 0 +#define CV_PCA_DATA_AS_COL 1 +#define CV_PCA_USE_AVG 2 +CVAPI(void) cvCalcPCA( const CvArr* data, CvArr* mean, + CvArr* eigenvals, CvArr* eigenvects, int flags ); + +CVAPI(void) cvProjectPCA( const CvArr* data, const CvArr* mean, + const CvArr* eigenvects, CvArr* result ); + +CVAPI(void) cvBackProjectPCA( const CvArr* proj, const CvArr* mean, + const CvArr* eigenvects, CvArr* result ); + +/** Calculates Mahalanobis(weighted) distance */ +CVAPI(double) cvMahalanobis( const CvArr* vec1, const CvArr* vec2, const CvArr* mat ); +#define cvMahalonobis cvMahalanobis + +/****************************************************************************************\ +* Array Statistics * +\****************************************************************************************/ + +/** Finds sum of array elements */ +CVAPI(CvScalar) cvSum( const CvArr* arr ); + +/** Calculates number of non-zero pixels */ +CVAPI(int) cvCountNonZero( const CvArr* arr ); + +/** Calculates mean value of array elements */ +CVAPI(CvScalar) cvAvg( const CvArr* arr, const CvArr* mask CV_DEFAULT(NULL) ); + +/** Calculates mean and standard deviation of pixel values */ +CVAPI(void) cvAvgSdv( const CvArr* arr, CvScalar* mean, CvScalar* std_dev, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** Finds global minimum, maximum and their positions */ +CVAPI(void) cvMinMaxLoc( const CvArr* arr, double* min_val, double* max_val, + CvPoint* min_loc CV_DEFAULT(NULL), + CvPoint* max_loc CV_DEFAULT(NULL), + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @anchor core_c_NormFlags + @name Flags for cvNorm and cvNormalize + @{ +*/ +#define CV_C 1 +#define CV_L1 2 +#define CV_L2 4 +#define CV_NORM_MASK 7 +#define CV_RELATIVE 8 +#define CV_DIFF 16 +#define CV_MINMAX 32 + +#define CV_DIFF_C (CV_DIFF | CV_C) +#define CV_DIFF_L1 (CV_DIFF | CV_L1) +#define CV_DIFF_L2 (CV_DIFF | CV_L2) +#define CV_RELATIVE_C (CV_RELATIVE | CV_C) +#define CV_RELATIVE_L1 (CV_RELATIVE | CV_L1) +#define CV_RELATIVE_L2 (CV_RELATIVE | CV_L2) +/** @} */ + +/** Finds norm, difference norm or relative difference norm for an array (or two arrays) +@see ref core_c_NormFlags "flags" +*/ +CVAPI(double) cvNorm( const CvArr* arr1, const CvArr* arr2 CV_DEFAULT(NULL), + int norm_type CV_DEFAULT(CV_L2), + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @see ref core_c_NormFlags "flags" */ +CVAPI(void) cvNormalize( const CvArr* src, CvArr* dst, + double a CV_DEFAULT(1.), double b CV_DEFAULT(0.), + int norm_type CV_DEFAULT(CV_L2), + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @anchor core_c_ReduceFlags + @name Flags for cvReduce + @{ +*/ +#define CV_REDUCE_SUM 0 +#define CV_REDUCE_AVG 1 +#define CV_REDUCE_MAX 2 +#define CV_REDUCE_MIN 3 +/** @} */ + +/** @see @ref core_c_ReduceFlags "flags" */ +CVAPI(void) cvReduce( const CvArr* src, CvArr* dst, int dim CV_DEFAULT(-1), + int op CV_DEFAULT(CV_REDUCE_SUM) ); + +/****************************************************************************************\ +* Discrete Linear Transforms and Related Functions * +\****************************************************************************************/ + +/** @anchor core_c_DftFlags + @name Flags for cvDFT, cvDCT and cvMulSpectrums + @{ + */ +#define CV_DXT_FORWARD 0 +#define CV_DXT_INVERSE 1 +#define CV_DXT_SCALE 2 /**< divide result by size of array */ +#define CV_DXT_INV_SCALE (CV_DXT_INVERSE + CV_DXT_SCALE) +#define CV_DXT_INVERSE_SCALE CV_DXT_INV_SCALE +#define CV_DXT_ROWS 4 /**< transform each row individually */ +#define CV_DXT_MUL_CONJ 8 /**< conjugate the second argument of cvMulSpectrums */ +/** @} */ + +/** Discrete Fourier Transform: + complex->complex, + real->ccs (forward), + ccs->real (inverse) +@see core_c_DftFlags "flags" +*/ +CVAPI(void) cvDFT( const CvArr* src, CvArr* dst, int flags, + int nonzero_rows CV_DEFAULT(0) ); +#define cvFFT cvDFT + +/** Multiply results of DFTs: DFT(X)*DFT(Y) or DFT(X)*conj(DFT(Y)) +@see core_c_DftFlags "flags" +*/ +CVAPI(void) cvMulSpectrums( const CvArr* src1, const CvArr* src2, + CvArr* dst, int flags ); + +/** Finds optimal DFT vector size >= size0 */ +CVAPI(int) cvGetOptimalDFTSize( int size0 ); + +/** Discrete Cosine Transform +@see core_c_DftFlags "flags" +*/ +CVAPI(void) cvDCT( const CvArr* src, CvArr* dst, int flags ); + +/****************************************************************************************\ +* Dynamic data structures * +\****************************************************************************************/ + +/** Calculates length of sequence slice (with support of negative indices). */ +CVAPI(int) cvSliceLength( CvSlice slice, const CvSeq* seq ); + + +/** Creates new memory storage. + block_size == 0 means that default, + somewhat optimal size, is used (currently, it is 64K) */ +CVAPI(CvMemStorage*) cvCreateMemStorage( int block_size CV_DEFAULT(0)); + + +/** Creates a memory storage that will borrow memory blocks from parent storage */ +CVAPI(CvMemStorage*) cvCreateChildMemStorage( CvMemStorage* parent ); + + +/** Releases memory storage. All the children of a parent must be released before + the parent. A child storage returns all the blocks to parent when it is released */ +CVAPI(void) cvReleaseMemStorage( CvMemStorage** storage ); + + +/** Clears memory storage. This is the only way(!!!) (besides cvRestoreMemStoragePos) + to reuse memory allocated for the storage - cvClearSeq,cvClearSet ... + do not free any memory. + A child storage returns all the blocks to the parent when it is cleared */ +CVAPI(void) cvClearMemStorage( CvMemStorage* storage ); + +/** Remember a storage "free memory" position */ +CVAPI(void) cvSaveMemStoragePos( const CvMemStorage* storage, CvMemStoragePos* pos ); + +/** Restore a storage "free memory" position */ +CVAPI(void) cvRestoreMemStoragePos( CvMemStorage* storage, CvMemStoragePos* pos ); + +/** Allocates continuous buffer of the specified size in the storage */ +CVAPI(void*) cvMemStorageAlloc( CvMemStorage* storage, size_t size ); + +/** Allocates string in memory storage */ +//CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr, +// int len CV_DEFAULT(-1) ); + +/** Creates new empty sequence that will reside in the specified storage */ +CVAPI(CvSeq*) cvCreateSeq( int seq_flags, size_t header_size, + size_t elem_size, CvMemStorage* storage ); + +/** Changes default size (granularity) of sequence blocks. + The default size is ~1Kbyte */ +CVAPI(void) cvSetSeqBlockSize( CvSeq* seq, int delta_elems ); + + +/** Adds new element to the end of sequence. Returns pointer to the element */ +CVAPI(schar*) cvSeqPush( CvSeq* seq, const void* element CV_DEFAULT(NULL)); + + +/** Adds new element to the beginning of sequence. Returns pointer to it */ +CVAPI(schar*) cvSeqPushFront( CvSeq* seq, const void* element CV_DEFAULT(NULL)); + + +/** Removes the last element from sequence and optionally saves it */ +CVAPI(void) cvSeqPop( CvSeq* seq, void* element CV_DEFAULT(NULL)); + + +/** Removes the first element from sequence and optioanally saves it */ +CVAPI(void) cvSeqPopFront( CvSeq* seq, void* element CV_DEFAULT(NULL)); + + +#define CV_FRONT 1 +#define CV_BACK 0 +/** Adds several new elements to the end of sequence */ +CVAPI(void) cvSeqPushMulti( CvSeq* seq, const void* elements, + int count, int in_front CV_DEFAULT(0) ); + +/** Removes several elements from the end of sequence and optionally saves them */ +CVAPI(void) cvSeqPopMulti( CvSeq* seq, void* elements, + int count, int in_front CV_DEFAULT(0) ); + +/** Inserts a new element in the middle of sequence. + cvSeqInsert(seq,0,elem) == cvSeqPushFront(seq,elem) */ +CVAPI(schar*) cvSeqInsert( CvSeq* seq, int before_index, + const void* element CV_DEFAULT(NULL)); + +/** Removes specified sequence element */ +CVAPI(void) cvSeqRemove( CvSeq* seq, int index ); + + +/** Removes all the elements from the sequence. The freed memory + can be reused later only by the same sequence unless cvClearMemStorage + or cvRestoreMemStoragePos is called */ +CVAPI(void) cvClearSeq( CvSeq* seq ); + + +/** Retrieves pointer to specified sequence element. + Negative indices are supported and mean counting from the end + (e.g -1 means the last sequence element) */ +CVAPI(schar*) cvGetSeqElem( const CvSeq* seq, int index ); + +/** Calculates index of the specified sequence element. + Returns -1 if element does not belong to the sequence */ +CVAPI(int) cvSeqElemIdx( const CvSeq* seq, const void* element, + CvSeqBlock** block CV_DEFAULT(NULL) ); + +/** Initializes sequence writer. The new elements will be added to the end of sequence */ +CVAPI(void) cvStartAppendToSeq( CvSeq* seq, CvSeqWriter* writer ); + + +/** Combination of cvCreateSeq and cvStartAppendToSeq */ +CVAPI(void) cvStartWriteSeq( int seq_flags, int header_size, + int elem_size, CvMemStorage* storage, + CvSeqWriter* writer ); + +/** Closes sequence writer, updates sequence header and returns pointer + to the resultant sequence + (which may be useful if the sequence was created using cvStartWriteSeq)) +*/ +CVAPI(CvSeq*) cvEndWriteSeq( CvSeqWriter* writer ); + + +/** Updates sequence header. May be useful to get access to some of previously + written elements via cvGetSeqElem or sequence reader */ +CVAPI(void) cvFlushSeqWriter( CvSeqWriter* writer ); + + +/** Initializes sequence reader. + The sequence can be read in forward or backward direction */ +CVAPI(void) cvStartReadSeq( const CvSeq* seq, CvSeqReader* reader, + int reverse CV_DEFAULT(0) ); + + +/** Returns current sequence reader position (currently observed sequence element) */ +CVAPI(int) cvGetSeqReaderPos( CvSeqReader* reader ); + + +/** Changes sequence reader position. It may seek to an absolute or + to relative to the current position */ +CVAPI(void) cvSetSeqReaderPos( CvSeqReader* reader, int index, + int is_relative CV_DEFAULT(0)); + +/** Copies sequence content to a continuous piece of memory */ +CVAPI(void*) cvCvtSeqToArray( const CvSeq* seq, void* elements, + CvSlice slice CV_DEFAULT(CV_WHOLE_SEQ) ); + +/** Creates sequence header for array. + After that all the operations on sequences that do not alter the content + can be applied to the resultant sequence */ +CVAPI(CvSeq*) cvMakeSeqHeaderForArray( int seq_type, int header_size, + int elem_size, void* elements, int total, + CvSeq* seq, CvSeqBlock* block ); + +/** Extracts sequence slice (with or without copying sequence elements) */ +CVAPI(CvSeq*) cvSeqSlice( const CvSeq* seq, CvSlice slice, + CvMemStorage* storage CV_DEFAULT(NULL), + int copy_data CV_DEFAULT(0)); + +CV_INLINE CvSeq* cvCloneSeq( const CvSeq* seq, CvMemStorage* storage CV_DEFAULT(NULL)) +{ + return cvSeqSlice( seq, CV_WHOLE_SEQ, storage, 1 ); +} + +/** Removes sequence slice */ +CVAPI(void) cvSeqRemoveSlice( CvSeq* seq, CvSlice slice ); + +/** Inserts a sequence or array into another sequence */ +CVAPI(void) cvSeqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr ); + +/** a < b ? -1 : a > b ? 1 : 0 */ +typedef int (CV_CDECL* CvCmpFunc)(const void* a, const void* b, void* userdata ); + +/** Sorts sequence in-place given element comparison function */ +CVAPI(void) cvSeqSort( CvSeq* seq, CvCmpFunc func, void* userdata CV_DEFAULT(NULL) ); + +/** Finds element in a [sorted] sequence */ +CVAPI(schar*) cvSeqSearch( CvSeq* seq, const void* elem, CvCmpFunc func, + int is_sorted, int* elem_idx, + void* userdata CV_DEFAULT(NULL) ); + +/** Reverses order of sequence elements in-place */ +CVAPI(void) cvSeqInvert( CvSeq* seq ); + +/** Splits sequence into one or more equivalence classes using the specified criteria */ +CVAPI(int) cvSeqPartition( const CvSeq* seq, CvMemStorage* storage, + CvSeq** labels, CvCmpFunc is_equal, void* userdata ); + +/************ Internal sequence functions ************/ +CVAPI(void) cvChangeSeqBlock( void* reader, int direction ); +CVAPI(void) cvCreateSeqBlock( CvSeqWriter* writer ); + + +/** Creates a new set */ +CVAPI(CvSet*) cvCreateSet( int set_flags, int header_size, + int elem_size, CvMemStorage* storage ); + +/** Adds new element to the set and returns pointer to it */ +CVAPI(int) cvSetAdd( CvSet* set_header, CvSetElem* elem CV_DEFAULT(NULL), + CvSetElem** inserted_elem CV_DEFAULT(NULL) ); + +/** Fast variant of cvSetAdd */ +CV_INLINE CvSetElem* cvSetNew( CvSet* set_header ) +{ + CvSetElem* elem = set_header->free_elems; + if( elem ) + { + set_header->free_elems = elem->next_free; + elem->flags = elem->flags & CV_SET_ELEM_IDX_MASK; + set_header->active_count++; + } + else + cvSetAdd( set_header, NULL, &elem ); + return elem; +} + +/** Removes set element given its pointer */ +CV_INLINE void cvSetRemoveByPtr( CvSet* set_header, void* elem ) +{ + CvSetElem* _elem = (CvSetElem*)elem; + assert( _elem->flags >= 0 /*&& (elem->flags & CV_SET_ELEM_IDX_MASK) < set_header->total*/ ); + _elem->next_free = set_header->free_elems; + _elem->flags = (_elem->flags & CV_SET_ELEM_IDX_MASK) | CV_SET_ELEM_FREE_FLAG; + set_header->free_elems = _elem; + set_header->active_count--; +} + +/** Removes element from the set by its index */ +CVAPI(void) cvSetRemove( CvSet* set_header, int index ); + +/** Returns a set element by index. If the element doesn't belong to the set, + NULL is returned */ +CV_INLINE CvSetElem* cvGetSetElem( const CvSet* set_header, int idx ) +{ + CvSetElem* elem = (CvSetElem*)(void *)cvGetSeqElem( (CvSeq*)set_header, idx ); + return elem && CV_IS_SET_ELEM( elem ) ? elem : 0; +} + +/** Removes all the elements from the set */ +CVAPI(void) cvClearSet( CvSet* set_header ); + +/** Creates new graph */ +CVAPI(CvGraph*) cvCreateGraph( int graph_flags, int header_size, + int vtx_size, int edge_size, + CvMemStorage* storage ); + +/** Adds new vertex to the graph */ +CVAPI(int) cvGraphAddVtx( CvGraph* graph, const CvGraphVtx* vtx CV_DEFAULT(NULL), + CvGraphVtx** inserted_vtx CV_DEFAULT(NULL) ); + + +/** Removes vertex from the graph together with all incident edges */ +CVAPI(int) cvGraphRemoveVtx( CvGraph* graph, int index ); +CVAPI(int) cvGraphRemoveVtxByPtr( CvGraph* graph, CvGraphVtx* vtx ); + + +/** Link two vertices specified by indices or pointers if they + are not connected or return pointer to already existing edge + connecting the vertices. + Functions return 1 if a new edge was created, 0 otherwise */ +CVAPI(int) cvGraphAddEdge( CvGraph* graph, + int start_idx, int end_idx, + const CvGraphEdge* edge CV_DEFAULT(NULL), + CvGraphEdge** inserted_edge CV_DEFAULT(NULL) ); + +CVAPI(int) cvGraphAddEdgeByPtr( CvGraph* graph, + CvGraphVtx* start_vtx, CvGraphVtx* end_vtx, + const CvGraphEdge* edge CV_DEFAULT(NULL), + CvGraphEdge** inserted_edge CV_DEFAULT(NULL) ); + +/** Remove edge connecting two vertices */ +CVAPI(void) cvGraphRemoveEdge( CvGraph* graph, int start_idx, int end_idx ); +CVAPI(void) cvGraphRemoveEdgeByPtr( CvGraph* graph, CvGraphVtx* start_vtx, + CvGraphVtx* end_vtx ); + +/** Find edge connecting two vertices */ +CVAPI(CvGraphEdge*) cvFindGraphEdge( const CvGraph* graph, int start_idx, int end_idx ); +CVAPI(CvGraphEdge*) cvFindGraphEdgeByPtr( const CvGraph* graph, + const CvGraphVtx* start_vtx, + const CvGraphVtx* end_vtx ); +#define cvGraphFindEdge cvFindGraphEdge +#define cvGraphFindEdgeByPtr cvFindGraphEdgeByPtr + +/** Remove all vertices and edges from the graph */ +CVAPI(void) cvClearGraph( CvGraph* graph ); + + +/** Count number of edges incident to the vertex */ +CVAPI(int) cvGraphVtxDegree( const CvGraph* graph, int vtx_idx ); +CVAPI(int) cvGraphVtxDegreeByPtr( const CvGraph* graph, const CvGraphVtx* vtx ); + + +/** Retrieves graph vertex by given index */ +#define cvGetGraphVtx( graph, idx ) (CvGraphVtx*)cvGetSetElem((CvSet*)(graph), (idx)) + +/** Retrieves index of a graph vertex given its pointer */ +#define cvGraphVtxIdx( graph, vtx ) ((vtx)->flags & CV_SET_ELEM_IDX_MASK) + +/** Retrieves index of a graph edge given its pointer */ +#define cvGraphEdgeIdx( graph, edge ) ((edge)->flags & CV_SET_ELEM_IDX_MASK) + +#define cvGraphGetVtxCount( graph ) ((graph)->active_count) +#define cvGraphGetEdgeCount( graph ) ((graph)->edges->active_count) + +#define CV_GRAPH_VERTEX 1 +#define CV_GRAPH_TREE_EDGE 2 +#define CV_GRAPH_BACK_EDGE 4 +#define CV_GRAPH_FORWARD_EDGE 8 +#define CV_GRAPH_CROSS_EDGE 16 +#define CV_GRAPH_ANY_EDGE 30 +#define CV_GRAPH_NEW_TREE 32 +#define CV_GRAPH_BACKTRACKING 64 +#define CV_GRAPH_OVER -1 + +#define CV_GRAPH_ALL_ITEMS -1 + +/** flags for graph vertices and edges */ +#define CV_GRAPH_ITEM_VISITED_FLAG (1 << 30) +#define CV_IS_GRAPH_VERTEX_VISITED(vtx) \ + (((CvGraphVtx*)(vtx))->flags & CV_GRAPH_ITEM_VISITED_FLAG) +#define CV_IS_GRAPH_EDGE_VISITED(edge) \ + (((CvGraphEdge*)(edge))->flags & CV_GRAPH_ITEM_VISITED_FLAG) +#define CV_GRAPH_SEARCH_TREE_NODE_FLAG (1 << 29) +#define CV_GRAPH_FORWARD_EDGE_FLAG (1 << 28) + +typedef struct CvGraphScanner +{ + CvGraphVtx* vtx; /* current graph vertex (or current edge origin) */ + CvGraphVtx* dst; /* current graph edge destination vertex */ + CvGraphEdge* edge; /* current edge */ + + CvGraph* graph; /* the graph */ + CvSeq* stack; /* the graph vertex stack */ + int index; /* the lower bound of certainly visited vertices */ + int mask; /* event mask */ +} +CvGraphScanner; + +/** Creates new graph scanner. */ +CVAPI(CvGraphScanner*) cvCreateGraphScanner( CvGraph* graph, + CvGraphVtx* vtx CV_DEFAULT(NULL), + int mask CV_DEFAULT(CV_GRAPH_ALL_ITEMS)); + +/** Releases graph scanner. */ +CVAPI(void) cvReleaseGraphScanner( CvGraphScanner** scanner ); + +/** Get next graph element */ +CVAPI(int) cvNextGraphItem( CvGraphScanner* scanner ); + +/** Creates a copy of graph */ +CVAPI(CvGraph*) cvCloneGraph( const CvGraph* graph, CvMemStorage* storage ); + + +/** Does look-up transformation. Elements of the source array + (that should be 8uC1 or 8sC1) are used as indexes in lutarr 256-element table */ +CVAPI(void) cvLUT( const CvArr* src, CvArr* dst, const CvArr* lut ); + + +/******************* Iteration through the sequence tree *****************/ +typedef struct CvTreeNodeIterator +{ + const void* node; + int level; + int max_level; +} +CvTreeNodeIterator; + +CVAPI(void) cvInitTreeNodeIterator( CvTreeNodeIterator* tree_iterator, + const void* first, int max_level ); +CVAPI(void*) cvNextTreeNode( CvTreeNodeIterator* tree_iterator ); +CVAPI(void*) cvPrevTreeNode( CvTreeNodeIterator* tree_iterator ); + +/** Inserts sequence into tree with specified "parent" sequence. + If parent is equal to frame (e.g. the most external contour), + then added contour will have null pointer to parent. */ +CVAPI(void) cvInsertNodeIntoTree( void* node, void* parent, void* frame ); + +/** Removes contour from tree (together with the contour children). */ +CVAPI(void) cvRemoveNodeFromTree( void* node, void* frame ); + +/** Gathers pointers to all the sequences, + accessible from the `first`, to the single sequence */ +CVAPI(CvSeq*) cvTreeToNodeSeq( const void* first, int header_size, + CvMemStorage* storage ); + +/** The function implements the K-means algorithm for clustering an array of sample + vectors in a specified number of classes */ +#define CV_KMEANS_USE_INITIAL_LABELS 1 +CVAPI(int) cvKMeans2( const CvArr* samples, int cluster_count, CvArr* labels, + CvTermCriteria termcrit, int attempts CV_DEFAULT(1), + CvRNG* rng CV_DEFAULT(0), int flags CV_DEFAULT(0), + CvArr* _centers CV_DEFAULT(0), double* compactness CV_DEFAULT(0) ); + +/****************************************************************************************\ +* System functions * +\****************************************************************************************/ + +/** Loads optimized functions from IPP, MKL etc. or switches back to pure C code */ +CVAPI(int) cvUseOptimized( int on_off ); + +typedef IplImage* (CV_STDCALL* Cv_iplCreateImageHeader) + (int,int,int,char*,char*,int,int,int,int,int, + IplROI*,IplImage*,void*,IplTileInfo*); +typedef void (CV_STDCALL* Cv_iplAllocateImageData)(IplImage*,int,int); +typedef void (CV_STDCALL* Cv_iplDeallocate)(IplImage*,int); +typedef IplROI* (CV_STDCALL* Cv_iplCreateROI)(int,int,int,int,int); +typedef IplImage* (CV_STDCALL* Cv_iplCloneImage)(const IplImage*); + +/** @brief Makes OpenCV use IPL functions for allocating IplImage and IplROI structures. + +Normally, the function is not called directly. Instead, a simple macro +CV_TURN_ON_IPL_COMPATIBILITY() is used that calls cvSetIPLAllocators and passes there pointers +to IPL allocation functions. : +@code + ... + CV_TURN_ON_IPL_COMPATIBILITY() + ... +@endcode +@param create_header pointer to a function, creating IPL image header. +@param allocate_data pointer to a function, allocating IPL image data. +@param deallocate pointer to a function, deallocating IPL image. +@param create_roi pointer to a function, creating IPL image ROI (i.e. Region of Interest). +@param clone_image pointer to a function, cloning an IPL image. + */ +CVAPI(void) cvSetIPLAllocators( Cv_iplCreateImageHeader create_header, + Cv_iplAllocateImageData allocate_data, + Cv_iplDeallocate deallocate, + Cv_iplCreateROI create_roi, + Cv_iplCloneImage clone_image ); + +#define CV_TURN_ON_IPL_COMPATIBILITY() \ + cvSetIPLAllocators( iplCreateImageHeader, iplAllocateImage, \ + iplDeallocate, iplCreateROI, iplCloneImage ) + +/****************************************************************************************\ +* Data Persistence * +\****************************************************************************************/ + +#if 0 +/********************************** High-level functions ********************************/ + +/** @brief Opens file storage for reading or writing data. + +The function opens file storage for reading or writing data. In the latter case, a new file is +created or an existing file is rewritten. The type of the read or written file is determined by the +filename extension: .xml for XML, .yml or .yaml for YAML and .json for JSON. + +At the same time, it also supports adding parameters like "example.xml?base64". + +The function returns a pointer to the CvFileStorage structure. +If the file cannot be opened then the function returns NULL. +@param filename Name of the file associated with the storage +@param memstorage Memory storage used for temporary data and for +: storing dynamic structures, such as CvSeq or CvGraph . If it is NULL, a temporary memory + storage is created and used. +@param flags Can be one of the following: +> - **CV_STORAGE_READ** the storage is open for reading +> - **CV_STORAGE_WRITE** the storage is open for writing + (use **CV_STORAGE_WRITE | CV_STORAGE_WRITE_BASE64** to write rawdata in Base64) +@param encoding + */ +CVAPI(CvFileStorage*) cvOpenFileStorage( const char* filename, CvMemStorage* memstorage, + int flags, const char* encoding CV_DEFAULT(NULL) ); + +/** @brief Releases file storage. + +The function closes the file associated with the storage and releases all the temporary structures. +It must be called after all I/O operations with the storage are finished. +@param fs Double pointer to the released file storage + */ +CVAPI(void) cvReleaseFileStorage( CvFileStorage** fs ); + +/** returns attribute value or 0 (NULL) if there is no such attribute */ +CVAPI(const char*) cvAttrValue( const CvAttrList* attr, const char* attr_name ); + +/** @brief Starts writing a new structure. + +The function starts writing a compound structure (collection) that can be a sequence or a map. After +all the structure fields, which can be scalars or structures, are written, cvEndWriteStruct should +be called. The function can be used to group some objects or to implement the write function for a +some user object (see CvTypeInfo). +@param fs File storage +@param name Name of the written structure. The structure can be accessed by this name when the +storage is read. +@param struct_flags A combination one of the following values: +- **CV_NODE_SEQ** the written structure is a sequence (see discussion of CvFileStorage ), + that is, its elements do not have a name. +- **CV_NODE_MAP** the written structure is a map (see discussion of CvFileStorage ), that + is, all its elements have names. +One and only one of the two above flags must be specified +- **CV_NODE_FLOW** the optional flag that makes sense only for YAML streams. It means that + the structure is written as a flow (not as a block), which is more compact. It is + recommended to use this flag for structures or arrays whose elements are all scalars. +@param type_name Optional parameter - the object type name. In + case of XML it is written as a type_id attribute of the structure opening tag. In the case of + YAML it is written after a colon following the structure name (see the example in + CvFileStorage description). In case of JSON it is written as a name/value pair. + Mainly it is used with user objects. When the storage is read, the + encoded type name is used to determine the object type (see CvTypeInfo and cvFindType ). +@param attributes This parameter is not used in the current implementation + */ +CVAPI(void) cvStartWriteStruct( CvFileStorage* fs, const char* name, + int struct_flags, const char* type_name CV_DEFAULT(NULL), + CvAttrList attributes CV_DEFAULT(cvAttrList())); + +/** @brief Finishes writing to a file node collection. +@param fs File storage +@sa cvStartWriteStruct. + */ +CVAPI(void) cvEndWriteStruct( CvFileStorage* fs ); + +/** @brief Writes an integer value. + +The function writes a single integer value (with or without a name) to the file storage. +@param fs File storage +@param name Name of the written value. Should be NULL if and only if the parent structure is a +sequence. +@param value The written value + */ +CVAPI(void) cvWriteInt( CvFileStorage* fs, const char* name, int value ); + +/** @brief Writes a floating-point value. + +The function writes a single floating-point value (with or without a name) to file storage. Special +values are encoded as follows: NaN (Not A Number) as .NaN, infinity as +.Inf or -.Inf. + +The following example shows how to use the low-level writing functions to store custom structures, +such as termination criteria, without registering a new type. : +@code + void write_termcriteria( CvFileStorage* fs, const char* struct_name, + CvTermCriteria* termcrit ) + { + cvStartWriteStruct( fs, struct_name, CV_NODE_MAP, NULL, cvAttrList(0,0)); + cvWriteComment( fs, "termination criteria", 1 ); // just a description + if( termcrit->type & CV_TERMCRIT_ITER ) + cvWriteInteger( fs, "max_iterations", termcrit->max_iter ); + if( termcrit->type & CV_TERMCRIT_EPS ) + cvWriteReal( fs, "accuracy", termcrit->epsilon ); + cvEndWriteStruct( fs ); + } +@endcode +@param fs File storage +@param name Name of the written value. Should be NULL if and only if the parent structure is a +sequence. +@param value The written value +*/ +CVAPI(void) cvWriteReal( CvFileStorage* fs, const char* name, double value ); + +/** @brief Writes a text string. + +The function writes a text string to file storage. +@param fs File storage +@param name Name of the written string . Should be NULL if and only if the parent structure is a +sequence. +@param str The written text string +@param quote If non-zero, the written string is put in quotes, regardless of whether they are +required. Otherwise, if the flag is zero, quotes are used only when they are required (e.g. when +the string starts with a digit or contains spaces). + */ +CVAPI(void) cvWriteString( CvFileStorage* fs, const char* name, + const char* str, int quote CV_DEFAULT(0) ); + +/** @brief Writes a comment. + +The function writes a comment into file storage. The comments are skipped when the storage is read. +@param fs File storage +@param comment The written comment, single-line or multi-line +@param eol_comment If non-zero, the function tries to put the comment at the end of current line. +If the flag is zero, if the comment is multi-line, or if it does not fit at the end of the current +line, the comment starts a new line. + */ +CVAPI(void) cvWriteComment( CvFileStorage* fs, const char* comment, + int eol_comment ); + +/** @brief Writes an object to file storage. + +The function writes an object to file storage. First, the appropriate type info is found using +cvTypeOf. Then, the write method associated with the type info is called. + +Attributes are used to customize the writing procedure. The standard types support the following +attributes (all the dt attributes have the same format as in cvWriteRawData): + +-# CvSeq + - **header_dt** description of user fields of the sequence header that follow CvSeq, or + CvChain (if the sequence is a Freeman chain) or CvContour (if the sequence is a contour or + point sequence) + - **dt** description of the sequence elements. + - **recursive** if the attribute is present and is not equal to "0" or "false", the whole + tree of sequences (contours) is stored. +-# CvGraph + - **header_dt** description of user fields of the graph header that follows CvGraph; + - **vertex_dt** description of user fields of graph vertices + - **edge_dt** description of user fields of graph edges (note that the edge weight is + always written, so there is no need to specify it explicitly) + +Below is the code that creates the YAML file shown in the CvFileStorage description: +@code + #include "cxcore.h" + + int main( int argc, char** argv ) + { + CvMat* mat = cvCreateMat( 3, 3, CV_32F ); + CvFileStorage* fs = cvOpenFileStorage( "example.yml", 0, CV_STORAGE_WRITE ); + + cvSetIdentity( mat ); + cvWrite( fs, "A", mat, cvAttrList(0,0) ); + + cvReleaseFileStorage( &fs ); + cvReleaseMat( &mat ); + return 0; + } +@endcode +@param fs File storage +@param name Name of the written object. Should be NULL if and only if the parent structure is a +sequence. +@param ptr Pointer to the object +@param attributes The attributes of the object. They are specific for each particular type (see +the discussion below). + */ +CVAPI(void) cvWrite( CvFileStorage* fs, const char* name, const void* ptr, + CvAttrList attributes CV_DEFAULT(cvAttrList())); + +/** @brief Starts the next stream. + +The function finishes the currently written stream and starts the next stream. In the case of XML +the file with multiple streams looks like this: +@code{.xml} + + + + + + + ... +@endcode +The YAML file will look like this: +@code{.yaml} + %YAML 1.0 + # stream #1 data + ... + --- + # stream #2 data +@endcode +This is useful for concatenating files or for resuming the writing process. +@param fs File storage + */ +CVAPI(void) cvStartNextStream( CvFileStorage* fs ); + +/** @brief Writes multiple numbers. + +The function writes an array, whose elements consist of single or multiple numbers. The function +call can be replaced with a loop containing a few cvWriteInt and cvWriteReal calls, but a single +call is more efficient. Note that because none of the elements have a name, they should be written +to a sequence rather than a map. +@param fs File storage +@param src Pointer to the written array +@param len Number of the array elements to write +@param dt Specification of each array element, see @ref format_spec "format specification" + */ +CVAPI(void) cvWriteRawData( CvFileStorage* fs, const void* src, + int len, const char* dt ); + +/** @brief Writes multiple numbers in Base64. + +If either CV_STORAGE_WRITE_BASE64 or cv::FileStorage::WRITE_BASE64 is used, +this function will be the same as cvWriteRawData. If neither, the main +difference is that it outputs a sequence in Base64 encoding rather than +in plain text. + +This function can only be used to write a sequence with a type "binary". + +@param fs File storage +@param src Pointer to the written array +@param len Number of the array elements to write +@param dt Specification of each array element, see @ref format_spec "format specification" +*/ +CVAPI(void) cvWriteRawDataBase64( CvFileStorage* fs, const void* src, + int len, const char* dt ); + +/** @brief Returns a unique pointer for a given name. + +The function returns a unique pointer for each particular file node name. This pointer can be then +passed to the cvGetFileNode function that is faster than cvGetFileNodeByName because it compares +text strings by comparing pointers rather than the strings' content. + +Consider the following example where an array of points is encoded as a sequence of 2-entry maps: +@code + points: + - { x: 10, y: 10 } + - { x: 20, y: 20 } + - { x: 30, y: 30 } + # ... +@endcode +Then, it is possible to get hashed "x" and "y" pointers to speed up decoding of the points. : +@code + #include "cxcore.h" + + int main( int argc, char** argv ) + { + CvFileStorage* fs = cvOpenFileStorage( "points.yml", 0, CV_STORAGE_READ ); + CvStringHashNode* x_key = cvGetHashedNode( fs, "x", -1, 1 ); + CvStringHashNode* y_key = cvGetHashedNode( fs, "y", -1, 1 ); + CvFileNode* points = cvGetFileNodeByName( fs, 0, "points" ); + + if( CV_NODE_IS_SEQ(points->tag) ) + { + CvSeq* seq = points->data.seq; + int i, total = seq->total; + CvSeqReader reader; + cvStartReadSeq( seq, &reader, 0 ); + for( i = 0; i < total; i++ ) + { + CvFileNode* pt = (CvFileNode*)reader.ptr; + #if 1 // faster variant + CvFileNode* xnode = cvGetFileNode( fs, pt, x_key, 0 ); + CvFileNode* ynode = cvGetFileNode( fs, pt, y_key, 0 ); + assert( xnode && CV_NODE_IS_INT(xnode->tag) && + ynode && CV_NODE_IS_INT(ynode->tag)); + int x = xnode->data.i; // or x = cvReadInt( xnode, 0 ); + int y = ynode->data.i; // or y = cvReadInt( ynode, 0 ); + #elif 1 // slower variant; does not use x_key & y_key + CvFileNode* xnode = cvGetFileNodeByName( fs, pt, "x" ); + CvFileNode* ynode = cvGetFileNodeByName( fs, pt, "y" ); + assert( xnode && CV_NODE_IS_INT(xnode->tag) && + ynode && CV_NODE_IS_INT(ynode->tag)); + int x = xnode->data.i; // or x = cvReadInt( xnode, 0 ); + int y = ynode->data.i; // or y = cvReadInt( ynode, 0 ); + #else // the slowest yet the easiest to use variant + int x = cvReadIntByName( fs, pt, "x", 0 ); + int y = cvReadIntByName( fs, pt, "y", 0 ); + #endif + CV_NEXT_SEQ_ELEM( seq->elem_size, reader ); + printf(" + } + } + cvReleaseFileStorage( &fs ); + return 0; + } +@endcode +Please note that whatever method of accessing a map you are using, it is still much slower than +using plain sequences; for example, in the above example, it is more efficient to encode the points +as pairs of integers in a single numeric sequence. +@param fs File storage +@param name Literal node name +@param len Length of the name (if it is known apriori), or -1 if it needs to be calculated +@param create_missing Flag that specifies, whether an absent key should be added into the hash table +*/ +CVAPI(CvStringHashNode*) cvGetHashedKey( CvFileStorage* fs, const char* name, + int len CV_DEFAULT(-1), + int create_missing CV_DEFAULT(0)); + +/** @brief Retrieves one of the top-level nodes of the file storage. + +The function returns one of the top-level file nodes. The top-level nodes do not have a name, they +correspond to the streams that are stored one after another in the file storage. If the index is out +of range, the function returns a NULL pointer, so all the top-level nodes can be iterated by +subsequent calls to the function with stream_index=0,1,..., until the NULL pointer is returned. +This function can be used as a base for recursive traversal of the file storage. +@param fs File storage +@param stream_index Zero-based index of the stream. See cvStartNextStream . In most cases, +there is only one stream in the file; however, there can be several. + */ +CVAPI(CvFileNode*) cvGetRootFileNode( const CvFileStorage* fs, + int stream_index CV_DEFAULT(0) ); + +/** @brief Finds a node in a map or file storage. + +The function finds a file node. It is a faster version of cvGetFileNodeByName (see +cvGetHashedKey discussion). Also, the function can insert a new node, if it is not in the map yet. +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. If both map and +key are NULLs, the function returns the root file node - a map that contains top-level nodes. +@param key Unique pointer to the node name, retrieved with cvGetHashedKey +@param create_missing Flag that specifies whether an absent node should be added to the map + */ +CVAPI(CvFileNode*) cvGetFileNode( CvFileStorage* fs, CvFileNode* map, + const CvStringHashNode* key, + int create_missing CV_DEFAULT(0) ); + +/** @brief Finds a node in a map or file storage. + +The function finds a file node by name. The node is searched either in map or, if the pointer is +NULL, among the top-level file storage nodes. Using this function for maps and cvGetSeqElem (or +sequence reader) for sequences, it is possible to navigate through the file storage. To speed up +multiple queries for a certain key (e.g., in the case of an array of structures) one may use a +combination of cvGetHashedKey and cvGetFileNode. +@param fs File storage +@param map The parent map. If it is NULL, the function searches in all the top-level nodes +(streams), starting with the first one. +@param name The file node name + */ +CVAPI(CvFileNode*) cvGetFileNodeByName( const CvFileStorage* fs, + const CvFileNode* map, + const char* name ); + +/** @brief Retrieves an integer value from a file node. + +The function returns an integer that is represented by the file node. If the file node is NULL, the +default_value is returned (thus, it is convenient to call the function right after cvGetFileNode +without checking for a NULL pointer). If the file node has type CV_NODE_INT, then node-\>data.i is +returned. If the file node has type CV_NODE_REAL, then node-\>data.f is converted to an integer +and returned. Otherwise the error is reported. +@param node File node +@param default_value The value that is returned if node is NULL + */ +CV_INLINE int cvReadInt( const CvFileNode* node, int default_value CV_DEFAULT(0) ) +{ + return !node ? default_value : + CV_NODE_IS_INT(node->tag) ? node->data.i : + CV_NODE_IS_REAL(node->tag) ? cvRound(node->data.f) : 0x7fffffff; +} + +/** @brief Finds a file node and returns its value. + +The function is a simple superposition of cvGetFileNodeByName and cvReadInt. +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param default_value The value that is returned if the file node is not found + */ +CV_INLINE int cvReadIntByName( const CvFileStorage* fs, const CvFileNode* map, + const char* name, int default_value CV_DEFAULT(0) ) +{ + return cvReadInt( cvGetFileNodeByName( fs, map, name ), default_value ); +} + +/** @brief Retrieves a floating-point value from a file node. + +The function returns a floating-point value that is represented by the file node. If the file node +is NULL, the default_value is returned (thus, it is convenient to call the function right after +cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_REAL , +then node-\>data.f is returned. If the file node has type CV_NODE_INT , then node-:math:\>data.f +is converted to floating-point and returned. Otherwise the result is not determined. +@param node File node +@param default_value The value that is returned if node is NULL + */ +CV_INLINE double cvReadReal( const CvFileNode* node, double default_value CV_DEFAULT(0.) ) +{ + return !node ? default_value : + CV_NODE_IS_INT(node->tag) ? (double)node->data.i : + CV_NODE_IS_REAL(node->tag) ? node->data.f : 1e300; +} + +/** @brief Finds a file node and returns its value. + +The function is a simple superposition of cvGetFileNodeByName and cvReadReal . +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param default_value The value that is returned if the file node is not found + */ +CV_INLINE double cvReadRealByName( const CvFileStorage* fs, const CvFileNode* map, + const char* name, double default_value CV_DEFAULT(0.) ) +{ + return cvReadReal( cvGetFileNodeByName( fs, map, name ), default_value ); +} + +/** @brief Retrieves a text string from a file node. + +The function returns a text string that is represented by the file node. If the file node is NULL, +the default_value is returned (thus, it is convenient to call the function right after +cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_STR , then +node-:math:\>data.str.ptr is returned. Otherwise the result is not determined. +@param node File node +@param default_value The value that is returned if node is NULL + */ +CV_INLINE const char* cvReadString( const CvFileNode* node, + const char* default_value CV_DEFAULT(NULL) ) +{ + return !node ? default_value : CV_NODE_IS_STRING(node->tag) ? node->data.str.ptr : 0; +} + +/** @brief Finds a file node by its name and returns its value. + +The function is a simple superposition of cvGetFileNodeByName and cvReadString . +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param default_value The value that is returned if the file node is not found + */ +CV_INLINE const char* cvReadStringByName( const CvFileStorage* fs, const CvFileNode* map, + const char* name, const char* default_value CV_DEFAULT(NULL) ) +{ + return cvReadString( cvGetFileNodeByName( fs, map, name ), default_value ); +} + + +/** @brief Decodes an object and returns a pointer to it. + +The function decodes a user object (creates an object in a native representation from the file +storage subtree) and returns it. The object to be decoded must be an instance of a registered type +that supports the read method (see CvTypeInfo). The type of the object is determined by the type +name that is encoded in the file. If the object is a dynamic structure, it is created either in +memory storage and passed to cvOpenFileStorage or, if a NULL pointer was passed, in temporary +memory storage, which is released when cvReleaseFileStorage is called. Otherwise, if the object is +not a dynamic structure, it is created in a heap and should be released with a specialized function +or by using the generic cvRelease. +@param fs File storage +@param node The root object node +@param attributes Unused parameter + */ +CVAPI(void*) cvRead( CvFileStorage* fs, CvFileNode* node, + CvAttrList* attributes CV_DEFAULT(NULL)); + +/** @brief Finds an object by name and decodes it. + +The function is a simple superposition of cvGetFileNodeByName and cvRead. +@param fs File storage +@param map The parent map. If it is NULL, the function searches a top-level node. +@param name The node name +@param attributes Unused parameter + */ +CV_INLINE void* cvReadByName( CvFileStorage* fs, const CvFileNode* map, + const char* name, CvAttrList* attributes CV_DEFAULT(NULL) ) +{ + return cvRead( fs, cvGetFileNodeByName( fs, map, name ), attributes ); +} + + +/** @brief Initializes the file node sequence reader. + +The function initializes the sequence reader to read data from a file node. The initialized reader +can be then passed to cvReadRawDataSlice. +@param fs File storage +@param src The file node (a sequence) to read numbers from +@param reader Pointer to the sequence reader + */ +CVAPI(void) cvStartReadRawData( const CvFileStorage* fs, const CvFileNode* src, + CvSeqReader* reader ); + +/** @brief Initializes file node sequence reader. + +The function reads one or more elements from the file node, representing a sequence, to a +user-specified array. The total number of read sequence elements is a product of total and the +number of components in each array element. For example, if dt=2if, the function will read total\*3 +sequence elements. As with any sequence, some parts of the file node sequence can be skipped or read +repeatedly by repositioning the reader using cvSetSeqReaderPos. +@param fs File storage +@param reader The sequence reader. Initialize it with cvStartReadRawData . +@param count The number of elements to read +@param dst Pointer to the destination array +@param dt Specification of each array element. It has the same format as in cvWriteRawData . + */ +CVAPI(void) cvReadRawDataSlice( const CvFileStorage* fs, CvSeqReader* reader, + int count, void* dst, const char* dt ); + +/** @brief Reads multiple numbers. + +The function reads elements from a file node that represents a sequence of scalars. +@param fs File storage +@param src The file node (a sequence) to read numbers from +@param dst Pointer to the destination array +@param dt Specification of each array element. It has the same format as in cvWriteRawData . + */ +CVAPI(void) cvReadRawData( const CvFileStorage* fs, const CvFileNode* src, + void* dst, const char* dt ); + +/** @brief Writes a file node to another file storage. + +The function writes a copy of a file node to file storage. Possible applications of the function are +merging several file storages into one and conversion between XML, YAML and JSON formats. +@param fs Destination file storage +@param new_node_name New name of the file node in the destination file storage. To keep the +existing name, use cvcvGetFileNodeName +@param node The written node +@param embed If the written node is a collection and this parameter is not zero, no extra level of +hierarchy is created. Instead, all the elements of node are written into the currently written +structure. Of course, map elements can only be embedded into another map, and sequence elements +can only be embedded into another sequence. + */ +CVAPI(void) cvWriteFileNode( CvFileStorage* fs, const char* new_node_name, + const CvFileNode* node, int embed ); + +/** @brief Returns the name of a file node. + +The function returns the name of a file node or NULL, if the file node does not have a name or if +node is NULL. +@param node File node + */ +CVAPI(const char*) cvGetFileNodeName( const CvFileNode* node ); + +/*********************************** Adding own types ***********************************/ + +/** @brief Registers a new type. + +The function registers a new type, which is described by info . The function creates a copy of the +structure, so the user should delete it after calling the function. +@param info Type info structure + */ +CVAPI(void) cvRegisterType( const CvTypeInfo* info ); + +/** @brief Unregisters the type. + +The function unregisters a type with a specified name. If the name is unknown, it is possible to +locate the type info by an instance of the type using cvTypeOf or by iterating the type list, +starting from cvFirstType, and then calling cvUnregisterType(info-\>typeName). +@param type_name Name of an unregistered type + */ +CVAPI(void) cvUnregisterType( const char* type_name ); + +/** @brief Returns the beginning of a type list. + +The function returns the first type in the list of registered types. Navigation through the list can +be done via the prev and next fields of the CvTypeInfo structure. + */ +CVAPI(CvTypeInfo*) cvFirstType(void); + +/** @brief Finds a type by its name. + +The function finds a registered type by its name. It returns NULL if there is no type with the +specified name. +@param type_name Type name + */ +CVAPI(CvTypeInfo*) cvFindType( const char* type_name ); + +/** @brief Returns the type of an object. + +The function finds the type of a given object. It iterates through the list of registered types and +calls the is_instance function/method for every type info structure with that object until one of +them returns non-zero or until the whole list has been traversed. In the latter case, the function +returns NULL. +@param struct_ptr The object pointer + */ +CVAPI(CvTypeInfo*) cvTypeOf( const void* struct_ptr ); + +#endif + +/** @brief Releases an object. + + The function finds the type of a given object and calls release with the double pointer. + @param struct_ptr Double pointer to the object + */ +CVAPI(void) cvRelease( void** struct_ptr ); + +/** @brief Makes a clone of an object. + +The function finds the type of a given object and calls clone with the passed object. Of course, if +you know the object type, for example, struct_ptr is CvMat\*, it is faster to call the specific +function, like cvCloneMat. +@param struct_ptr The object to clone + */ +CVAPI(void*) cvClone( const void* struct_ptr ); + +/*********************************** Measuring Execution Time ***************************/ + +/** helper functions for RNG initialization and accurate time measurement: + uses internal clock counter on x86 */ +CVAPI(int64) cvGetTickCount( void ); +CVAPI(double) cvGetTickFrequency( void ); + +/*********************************** CPU capabilities ***********************************/ + +CVAPI(int) cvCheckHardwareSupport(int feature); + +/*********************************** Multi-Threading ************************************/ + +/** retrieve/set the number of threads used in OpenMP implementations */ +CVAPI(int) cvGetNumThreads( void ); +CVAPI(void) cvSetNumThreads( int threads CV_DEFAULT(0) ); +/** get index of the thread being executed */ +CVAPI(int) cvGetThreadNum( void ); + + +/********************************** Error Handling **************************************/ + +/** Get current OpenCV error status */ +CVAPI(int) cvGetErrStatus( void ); + +/** Sets error status silently */ +CVAPI(void) cvSetErrStatus( int status ); + +#define CV_ErrModeLeaf 0 /* Print error and exit program */ +#define CV_ErrModeParent 1 /* Print error and continue */ +#define CV_ErrModeSilent 2 /* Don't print and continue */ + +/** Retrieves current error processing mode */ +CVAPI(int) cvGetErrMode( void ); + +/** Sets error processing mode, returns previously used mode */ +CVAPI(int) cvSetErrMode( int mode ); + +/** Sets error status and performs some additional actions (displaying message box, + writing message to stderr, terminating application etc.) + depending on the current error mode */ +CVAPI(void) cvError( int status, const char* func_name, + const char* err_msg, const char* file_name, int line ); + +/** Retrieves textual description of the error given its code */ +CVAPI(const char*) cvErrorStr( int status ); + +/** Retrieves detailed information about the last error occurred */ +CVAPI(int) cvGetErrInfo( const char** errcode_desc, const char** description, + const char** filename, int* line ); + +/** Maps IPP error codes to the counterparts from OpenCV */ +CVAPI(int) cvErrorFromIppStatus( int ipp_status ); + +typedef int (CV_CDECL *CvErrorCallback)( int status, const char* func_name, + const char* err_msg, const char* file_name, int line, void* userdata ); + +/** Assigns a new error-handling function */ +CVAPI(CvErrorCallback) cvRedirectError( CvErrorCallback error_handler, + void* userdata CV_DEFAULT(NULL), + void** prev_userdata CV_DEFAULT(NULL) ); + +/** Output nothing */ +CVAPI(int) cvNulDevReport( int status, const char* func_name, const char* err_msg, + const char* file_name, int line, void* userdata ); + +/** Output to console(fprintf(stderr,...)) */ +CVAPI(int) cvStdErrReport( int status, const char* func_name, const char* err_msg, + const char* file_name, int line, void* userdata ); + +/** Output to MessageBox(WIN32) */ +CVAPI(int) cvGuiBoxReport( int status, const char* func_name, const char* err_msg, + const char* file_name, int line, void* userdata ); + +#define OPENCV_ERROR(status,func,context) \ +cvError((status),(func),(context),__FILE__,__LINE__) + +#define OPENCV_ASSERT(expr,func,context) \ +{if (! (expr)) \ +{OPENCV_ERROR(CV_StsInternal,(func),(context));}} + +#define OPENCV_CALL( Func ) \ +{ \ +Func; \ +} + + +/** CV_FUNCNAME macro defines icvFuncName constant which is used by CV_ERROR macro */ +#ifdef CV_NO_FUNC_NAMES +#define CV_FUNCNAME( Name ) +#define cvFuncName "" +#else +#define CV_FUNCNAME( Name ) \ +static char cvFuncName[] = Name +#endif + + +/** + CV_ERROR macro unconditionally raises error with passed code and message. + After raising error, control will be transferred to the exit label. + */ +#define CV_ERROR( Code, Msg ) \ +{ \ + cvError( (Code), cvFuncName, Msg, __FILE__, __LINE__ ); \ + __CV_EXIT__; \ +} + +/** + CV_CHECK macro checks error status after CV (or IPL) + function call. If error detected, control will be transferred to the exit + label. + */ +#define CV_CHECK() \ +{ \ + if( cvGetErrStatus() < 0 ) \ + CV_ERROR( CV_StsBackTrace, "Inner function failed." ); \ +} + + +/** + CV_CALL macro calls CV (or IPL) function, checks error status and + signals a error if the function failed. Useful in "parent node" + error processing mode + */ +#define CV_CALL( Func ) \ +{ \ + Func; \ + CV_CHECK(); \ +} + + +/** Runtime assertion macro */ +#define CV_ASSERT( Condition ) \ +{ \ + if( !(Condition) ) \ + CV_ERROR( CV_StsInternal, "Assertion: " #Condition " failed" ); \ +} + +#define __CV_BEGIN__ { +#define __CV_END__ goto exit; exit: ; } +#define __CV_EXIT__ goto exit + +/** @} core_c */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#ifdef __cplusplus + +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @addtogroup core_c_glue +//! @{ + +/////////////////////////////////////////// glue /////////////////////////////////////////// + +//! converts array (CvMat or IplImage) to cv::Mat +CV_EXPORTS Mat cvarrToMat(const CvArr* arr, bool copyData=false, + bool allowND=true, int coiMode=0, + AutoBuffer* buf=0); + +static inline Mat cvarrToMatND(const CvArr* arr, bool copyData=false, int coiMode=0) +{ + return cvarrToMat(arr, copyData, true, coiMode); +} + + +//! extracts Channel of Interest from CvMat or IplImage and makes cv::Mat out of it. +CV_EXPORTS void extractImageCOI(const CvArr* arr, OutputArray coiimg, int coi=-1); +//! inserts single-channel cv::Mat into a multi-channel CvMat or IplImage +CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1); + + + +////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types ////// + +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMat* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(IplImage* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMatND* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvSparseMat* obj) const; }; +template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvMemStorage* obj) const; }; + +////////////// convenient wrappers for operating old-style dynamic structures ////////////// + +template class SeqIterator; + +typedef Ptr MemStorage; + +/*! + Template Sequence Class derived from CvSeq + + The class provides more convenient access to sequence elements, + STL-style operations and iterators. + + \note The class is targeted for simple data types, + i.e. no constructors or destructors + are called for the sequence elements. +*/ +template class Seq +{ +public: + typedef SeqIterator<_Tp> iterator; + typedef SeqIterator<_Tp> const_iterator; + + //! the default constructor + Seq(); + //! the constructor for wrapping CvSeq structure. The real element type in CvSeq should match _Tp. + Seq(const CvSeq* seq); + //! creates the empty sequence that resides in the specified storage + Seq(MemStorage& storage, int headerSize = sizeof(CvSeq)); + //! returns read-write reference to the specified element + _Tp& operator [](int idx); + //! returns read-only reference to the specified element + const _Tp& operator[](int idx) const; + //! returns iterator pointing to the beginning of the sequence + SeqIterator<_Tp> begin() const; + //! returns iterator pointing to the element following the last sequence element + SeqIterator<_Tp> end() const; + //! returns the number of elements in the sequence + size_t size() const; + //! returns the type of sequence elements (CV_8UC1 ... CV_64FC(CV_CN_MAX) ...) + int type() const; + //! returns the depth of sequence elements (CV_8U ... CV_64F) + int depth() const; + //! returns the number of channels in each sequence element + int channels() const; + //! returns the size of each sequence element + size_t elemSize() const; + //! returns index of the specified sequence element + size_t index(const _Tp& elem) const; + //! appends the specified element to the end of the sequence + void push_back(const _Tp& elem); + //! appends the specified element to the front of the sequence + void push_front(const _Tp& elem); + //! appends zero or more elements to the end of the sequence + void push_back(const _Tp* elems, size_t count); + //! appends zero or more elements to the front of the sequence + void push_front(const _Tp* elems, size_t count); + //! inserts the specified element to the specified position + void insert(int idx, const _Tp& elem); + //! inserts zero or more elements to the specified position + void insert(int idx, const _Tp* elems, size_t count); + //! removes element at the specified position + void remove(int idx); + //! removes the specified subsequence + void remove(const Range& r); + + //! returns reference to the first sequence element + _Tp& front(); + //! returns read-only reference to the first sequence element + const _Tp& front() const; + //! returns reference to the last sequence element + _Tp& back(); + //! returns read-only reference to the last sequence element + const _Tp& back() const; + //! returns true iff the sequence contains no elements + bool empty() const; + + //! removes all the elements from the sequence + void clear(); + //! removes the first element from the sequence + void pop_front(); + //! removes the last element from the sequence + void pop_back(); + //! removes zero or more elements from the beginning of the sequence + void pop_front(_Tp* elems, size_t count); + //! removes zero or more elements from the end of the sequence + void pop_back(_Tp* elems, size_t count); + + //! copies the whole sequence or the sequence slice to the specified vector + void copyTo(std::vector<_Tp>& vec, const Range& range=Range::all()) const; + //! returns the vector containing all the sequence elements + operator std::vector<_Tp>() const; + + CvSeq* seq; +}; + + +/*! + STL-style Sequence Iterator inherited from the CvSeqReader structure +*/ +template class SeqIterator : public CvSeqReader +{ +public: + //! the default constructor + SeqIterator(); + //! the constructor setting the iterator to the beginning or to the end of the sequence + SeqIterator(const Seq<_Tp>& seq, bool seekEnd=false); + //! positions the iterator within the sequence + void seek(size_t pos); + //! reports the current iterator position + size_t tell() const; + //! returns reference to the current sequence element + _Tp& operator *(); + //! returns read-only reference to the current sequence element + const _Tp& operator *() const; + //! moves iterator to the next sequence element + SeqIterator& operator ++(); + //! moves iterator to the next sequence element + SeqIterator operator ++(int) const; + //! moves iterator to the previous sequence element + SeqIterator& operator --(); + //! moves iterator to the previous sequence element + SeqIterator operator --(int) const; + + //! moves iterator forward by the specified offset (possibly negative) + SeqIterator& operator +=(int); + //! moves iterator backward by the specified offset (possibly negative) + SeqIterator& operator -=(int); + + // this is index of the current element module seq->total*2 + // (to distinguish between 0 and seq->total) + int index; +}; + + + +// bridge C++ => C Seq API +CV_EXPORTS schar* seqPush( CvSeq* seq, const void* element=0); +CV_EXPORTS schar* seqPushFront( CvSeq* seq, const void* element=0); +CV_EXPORTS void seqPop( CvSeq* seq, void* element=0); +CV_EXPORTS void seqPopFront( CvSeq* seq, void* element=0); +CV_EXPORTS void seqPopMulti( CvSeq* seq, void* elements, + int count, int in_front=0 ); +CV_EXPORTS void seqRemove( CvSeq* seq, int index ); +CV_EXPORTS void clearSeq( CvSeq* seq ); +CV_EXPORTS schar* getSeqElem( const CvSeq* seq, int index ); +CV_EXPORTS void seqRemoveSlice( CvSeq* seq, CvSlice slice ); +CV_EXPORTS void seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr ); + +template inline Seq<_Tp>::Seq() : seq(0) {} +template inline Seq<_Tp>::Seq( const CvSeq* _seq ) : seq((CvSeq*)_seq) +{ + CV_Assert(!_seq || _seq->elem_size == sizeof(_Tp)); +} + +template inline Seq<_Tp>::Seq( MemStorage& storage, + int headerSize ) +{ + CV_Assert(headerSize >= (int)sizeof(CvSeq)); + seq = cvCreateSeq(DataType<_Tp>::type, headerSize, sizeof(_Tp), storage); +} + +template inline _Tp& Seq<_Tp>::operator [](int idx) +{ return *(_Tp*)getSeqElem(seq, idx); } + +template inline const _Tp& Seq<_Tp>::operator [](int idx) const +{ return *(_Tp*)getSeqElem(seq, idx); } + +template inline SeqIterator<_Tp> Seq<_Tp>::begin() const +{ return SeqIterator<_Tp>(*this); } + +template inline SeqIterator<_Tp> Seq<_Tp>::end() const +{ return SeqIterator<_Tp>(*this, true); } + +template inline size_t Seq<_Tp>::size() const +{ return seq ? seq->total : 0; } + +template inline int Seq<_Tp>::type() const +{ return seq ? CV_MAT_TYPE(seq->flags) : 0; } + +template inline int Seq<_Tp>::depth() const +{ return seq ? CV_MAT_DEPTH(seq->flags) : 0; } + +template inline int Seq<_Tp>::channels() const +{ return seq ? CV_MAT_CN(seq->flags) : 0; } + +template inline size_t Seq<_Tp>::elemSize() const +{ return seq ? seq->elem_size : 0; } + +template inline size_t Seq<_Tp>::index(const _Tp& elem) const +{ return cvSeqElemIdx(seq, &elem); } + +template inline void Seq<_Tp>::push_back(const _Tp& elem) +{ cvSeqPush(seq, &elem); } + +template inline void Seq<_Tp>::push_front(const _Tp& elem) +{ cvSeqPushFront(seq, &elem); } + +template inline void Seq<_Tp>::push_back(const _Tp* elem, size_t count) +{ cvSeqPushMulti(seq, elem, (int)count, 0); } + +template inline void Seq<_Tp>::push_front(const _Tp* elem, size_t count) +{ cvSeqPushMulti(seq, elem, (int)count, 1); } + +template inline _Tp& Seq<_Tp>::back() +{ return *(_Tp*)getSeqElem(seq, -1); } + +template inline const _Tp& Seq<_Tp>::back() const +{ return *(const _Tp*)getSeqElem(seq, -1); } + +template inline _Tp& Seq<_Tp>::front() +{ return *(_Tp*)getSeqElem(seq, 0); } + +template inline const _Tp& Seq<_Tp>::front() const +{ return *(const _Tp*)getSeqElem(seq, 0); } + +template inline bool Seq<_Tp>::empty() const +{ return !seq || seq->total == 0; } + +template inline void Seq<_Tp>::clear() +{ if(seq) clearSeq(seq); } + +template inline void Seq<_Tp>::pop_back() +{ seqPop(seq); } + +template inline void Seq<_Tp>::pop_front() +{ seqPopFront(seq); } + +template inline void Seq<_Tp>::pop_back(_Tp* elem, size_t count) +{ seqPopMulti(seq, elem, (int)count, 0); } + +template inline void Seq<_Tp>::pop_front(_Tp* elem, size_t count) +{ seqPopMulti(seq, elem, (int)count, 1); } + +template inline void Seq<_Tp>::insert(int idx, const _Tp& elem) +{ seqInsert(seq, idx, &elem); } + +template inline void Seq<_Tp>::insert(int idx, const _Tp* elems, size_t count) +{ + CvMat m = cvMat(1, count, DataType<_Tp>::type, elems); + seqInsertSlice(seq, idx, &m); +} + +template inline void Seq<_Tp>::remove(int idx) +{ seqRemove(seq, idx); } + +template inline void Seq<_Tp>::remove(const Range& r) +{ seqRemoveSlice(seq, cvSlice(r.start, r.end)); } + +template inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const Range& range) const +{ + size_t len = !seq ? 0 : range == Range::all() ? seq->total : range.end - range.start; + vec.resize(len); + if( seq && len ) + cvCvtSeqToArray(seq, &vec[0], cvSlice(range)); +} + +template inline Seq<_Tp>::operator std::vector<_Tp>() const +{ + std::vector<_Tp> vec; + copyTo(vec); + return vec; +} + +template inline SeqIterator<_Tp>::SeqIterator() +{ memset(this, 0, sizeof(*this)); } + +template inline SeqIterator<_Tp>::SeqIterator(const Seq<_Tp>& _seq, bool seekEnd) +{ + cvStartReadSeq(_seq.seq, this); + index = seekEnd ? _seq.seq->total : 0; +} + +template inline void SeqIterator<_Tp>::seek(size_t pos) +{ + cvSetSeqReaderPos(this, (int)pos, false); + index = pos; +} + +template inline size_t SeqIterator<_Tp>::tell() const +{ return index; } + +template inline _Tp& SeqIterator<_Tp>::operator *() +{ return *(_Tp*)ptr; } + +template inline const _Tp& SeqIterator<_Tp>::operator *() const +{ return *(const _Tp*)ptr; } + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator ++() +{ + CV_NEXT_SEQ_ELEM(sizeof(_Tp), *this); + if( ++index >= seq->total*2 ) + index = 0; + return *this; +} + +template inline SeqIterator<_Tp> SeqIterator<_Tp>::operator ++(int) const +{ + SeqIterator<_Tp> it = *this; + ++*this; + return it; +} + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator --() +{ + CV_PREV_SEQ_ELEM(sizeof(_Tp), *this); + if( --index < 0 ) + index = seq->total*2-1; + return *this; +} + +template inline SeqIterator<_Tp> SeqIterator<_Tp>::operator --(int) const +{ + SeqIterator<_Tp> it = *this; + --*this; + return it; +} + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator +=(int delta) +{ + cvSetSeqReaderPos(this, delta, 1); + index += delta; + int n = seq->total*2; + if( index < 0 ) + index += n; + if( index >= n ) + index -= n; + return *this; +} + +template inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator -=(int delta) +{ + return (*this += -delta); +} + +template inline ptrdiff_t operator - (const SeqIterator<_Tp>& a, + const SeqIterator<_Tp>& b) +{ + ptrdiff_t delta = a.index - b.index, n = a.seq->total; + if( delta > n || delta < -n ) + delta += delta < 0 ? n : -n; + return delta; +} + +template inline bool operator == (const SeqIterator<_Tp>& a, + const SeqIterator<_Tp>& b) +{ + return a.seq == b.seq && a.index == b.index; +} + +template inline bool operator != (const SeqIterator<_Tp>& a, + const SeqIterator<_Tp>& b) +{ + return !(a == b); +} + +//! @} + +} // cv + +#endif + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda.hpp new file mode 100644 index 0000000..1ebea07 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda.hpp @@ -0,0 +1,1271 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDA_HPP +#define OPENCV_CORE_CUDA_HPP + +#ifndef __cplusplus +# error cuda.hpp header must be compiled as C++ +#endif + +#include "opencv2/core.hpp" +#include "opencv2/core/cuda_types.hpp" + +/** + @defgroup cuda CUDA-accelerated Computer Vision + @{ + @defgroup cudacore Core part + @{ + @defgroup cudacore_init Initialization and Information + @defgroup cudacore_struct Data Structures + @} + @} + */ + +namespace cv { namespace cuda { + +//! @addtogroup cudacore_struct +//! @{ + +//=================================================================================== +// GpuMat +//=================================================================================== + +/** @brief Base storage class for GPU memory with reference counting. + +Its interface matches the Mat interface with the following limitations: + +- no arbitrary dimensions support (only 2D) +- no functions that return references to their data (because references on GPU are not valid for + CPU) +- no expression templates technique support + +Beware that the latter limitation may lead to overloaded matrix operators that cause memory +allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be +passed directly to the kernel. + +@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are +aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix. + +@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely +on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory +release function returns error if the CUDA context has been destroyed before. + +Some member functions are described as a "Blocking Call" while some are described as a +"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU +operation is finished when the function returns. However, non-blocking functions are asynchronous to +host. Those functions may return even if the GPU operation is not finished. + +Compared to their blocking counterpart, non-blocking functions accept Stream as an additional +argument. If a non-default stream is passed, the GPU operation may overlap with operations in other +streams. + +@sa Mat + */ +class CV_EXPORTS_W GpuMat +{ +public: + class CV_EXPORTS_W Allocator + { + public: + virtual ~Allocator() {} + + // allocator must fill data, step and refcount fields + virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0; + virtual void free(GpuMat* mat) = 0; + }; + + //! default allocator + CV_WRAP static GpuMat::Allocator* defaultAllocator(); + CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator); + + //! default constructor + CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! constructs GpuMat of the specified size and type + CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! constructs GpuMat and fills it with the specified value _s + CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! copy constructor + CV_WRAP GpuMat(const GpuMat& m); + + //! constructor for GpuMat headers pointing to user-allocated data + GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); + GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); + + //! creates a GpuMat header for a part of the bigger matrix + CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange); + CV_WRAP GpuMat(const GpuMat& m, Rect roi); + + //! builds GpuMat from host memory (Blocking call) + CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); + + //! destructor - calls release() + ~GpuMat(); + + //! assignment operators + GpuMat& operator =(const GpuMat& m); + + //! allocates new GpuMat data unless the GpuMat already has specified size and type + CV_WRAP void create(int rows, int cols, int type); + CV_WRAP void create(Size size, int type); + + //! decreases reference counter, deallocate the data when reference counter reaches 0 + CV_WRAP void release(); + + //! swaps with other smart pointer + CV_WRAP void swap(GpuMat& mat); + + /** @brief Performs data upload to GpuMat (Blocking call) + + This function copies data from host memory to device memory. As being a blocking call, it is + guaranteed that the copy operation is finished when this function returns. + */ + CV_WRAP void upload(InputArray arr); + + /** @brief Performs data upload to GpuMat (Non-Blocking call) + + This function copies data from host memory to device memory. As being a non-blocking call, this + function may return even if the copy operation is not finished. + + The copy operation may be overlapped with operations in other non-default streams if \p stream is + not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option. + */ + CV_WRAP void upload(InputArray arr, Stream& stream); + + /** @brief Performs data download from GpuMat (Blocking call) + + This function copies data from device memory to host memory. As being a blocking call, it is + guaranteed that the copy operation is finished when this function returns. + */ + CV_WRAP void download(OutputArray dst) const; + + /** @brief Performs data download from GpuMat (Non-Blocking call) + + This function copies data from device memory to host memory. As being a non-blocking call, this + function may return even if the copy operation is not finished. + + The copy operation may be overlapped with operations in other non-default streams if \p stream is + not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option. + */ + CV_WRAP void download(OutputArray dst, Stream& stream) const; + + //! returns deep copy of the GpuMat, i.e. the data is copied + CV_WRAP GpuMat clone() const; + + //! copies the GpuMat content to device memory (Blocking call) + CV_WRAP void copyTo(OutputArray dst) const; + + //! copies the GpuMat content to device memory (Non-Blocking call) + CV_WRAP void copyTo(OutputArray dst, Stream& stream) const; + + //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) + CV_WRAP void copyTo(OutputArray dst, InputArray mask) const; + + //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) + CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + + //! sets some of the GpuMat elements to s (Blocking call) + CV_WRAP GpuMat& setTo(Scalar s); + + //! sets some of the GpuMat elements to s (Non-Blocking call) + CV_WRAP GpuMat& setTo(Scalar s, Stream& stream); + + //! sets some of the GpuMat elements to s, according to the mask (Blocking call) + CV_WRAP GpuMat& setTo(Scalar s, InputArray mask); + + //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call) + CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); + + //! converts GpuMat to another datatype (Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype) const; + + //! converts GpuMat to another datatype (Non-Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const; + + //! converts GpuMat to another datatype with scaling (Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; + + //! converts GpuMat to another datatype with scaling (Non-Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; + + //! converts GpuMat to another datatype with scaling (Non-Blocking call) + CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; + + CV_WRAP void assignTo(GpuMat& m, int type = -1) const; + + //! returns pointer to y-th row + uchar* ptr(int y = 0); + const uchar* ptr(int y = 0) const; + + //! template version of the above method + template _Tp* ptr(int y = 0); + template const _Tp* ptr(int y = 0) const; + + template operator PtrStepSz<_Tp>() const; + template operator PtrStep<_Tp>() const; + + //! returns a new GpuMat header for the specified row + CV_WRAP GpuMat row(int y) const; + + //! returns a new GpuMat header for the specified column + CV_WRAP GpuMat col(int x) const; + + //! ... for the specified row span + CV_WRAP GpuMat rowRange(int startrow, int endrow) const; + CV_WRAP GpuMat rowRange(Range r) const; + + //! ... for the specified column span + CV_WRAP GpuMat colRange(int startcol, int endcol) const; + CV_WRAP GpuMat colRange(Range r) const; + + //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.) + GpuMat operator ()(Range rowRange, Range colRange) const; + GpuMat operator ()(Rect roi) const; + + //! creates alternative GpuMat header for the same data, with different + //! number of channels and/or different number of rows + CV_WRAP GpuMat reshape(int cn, int rows = 0) const; + + //! locates GpuMat header within a parent GpuMat + CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const; + + //! moves/resizes the current GpuMat ROI inside the parent GpuMat + CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); + + //! returns true iff the GpuMat data is continuous + //! (i.e. when there are no gaps between successive rows) + CV_WRAP bool isContinuous() const; + + //! returns element size in bytes + CV_WRAP size_t elemSize() const; + + //! returns the size of element channel in bytes + CV_WRAP size_t elemSize1() const; + + //! returns element type + CV_WRAP int type() const; + + //! returns element type + CV_WRAP int depth() const; + + //! returns number of channels + CV_WRAP int channels() const; + + //! returns step/elemSize1() + CV_WRAP size_t step1() const; + + //! returns GpuMat size : width == number of columns, height == number of rows + CV_WRAP Size size() const; + + //! returns true if GpuMat data is NULL + CV_WRAP bool empty() const; + + // returns pointer to cuda memory + CV_WRAP void* cudaPtr() const; + + //! internal use method: updates the continuity flag + CV_WRAP void updateContinuityFlag(); + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! the number of rows and columns + int rows, cols; + + //! a distance between successive rows in bytes; includes the gap if any + CV_PROP size_t step; + + //! pointer to the data + uchar* data; + + //! pointer to the reference counter; + //! when GpuMat points to user-allocated data, the pointer is NULL + int* refcount; + + //! helper fields used in locateROI and adjustROI + uchar* datastart; + const uchar* dataend; + + //! allocator + Allocator* allocator; +}; + +struct CV_EXPORTS_W GpuData +{ + explicit GpuData(size_t _size); + ~GpuData(); + + GpuData(const GpuData&) = delete; + GpuData& operator=(const GpuData&) = delete; + + GpuData(GpuData&&) = delete; + GpuData& operator=(GpuData&&) = delete; + + uchar* data; + size_t size; +}; + +class CV_EXPORTS_W GpuMatND +{ +public: + using SizeArray = std::vector; + using StepArray = std::vector; + using IndexArray = std::vector; + + //! destructor + ~GpuMatND(); + + //! default constructor + GpuMatND(); + + /** @overload + @param size Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + GpuMatND(SizeArray size, int type); + + /** @overload + @param size Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param step Array of _size.size()-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray()); + + /** @brief Allocates GPU memory. + Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that + GPU memory under the specific condition: it must be of the same size and type, not externally allocated, + the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND + (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by + this method is always continuous and is not a sub-region of another GpuMatND. + */ + void create(SizeArray size, int type); + + void release(); + + void swap(GpuMatND& m) noexcept; + + /** @brief Creates a full copy of the array and the underlying data. + The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e. + the original step is not taken into account. So, the array copy is a continuous array + occupying total()\*elemSize() bytes. + */ + GpuMatND clone() const; + + /** @overload + This overload is non-blocking, so it may return even if the copy operation is not finished. + */ + GpuMatND clone(Stream& stream) const; + + /** @brief Extracts a sub-matrix. + The operator makes a new header for the specified sub-array of \*this. + The operator is an O(1) operation, that is, no matrix data is copied. + @param ranges Array of selected ranges along each dimension. + */ + GpuMatND operator()(const std::vector& ranges) const; + + /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix. + @note The returned GpuMat is constructed with the constructor for user-allocated data. + That is, It does not perform reference counting. + @note This function does not increment this GpuMatND's reference counter. + */ + GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const; + + /** @overload + Creates a GpuMat header if this GpuMatND is effectively 2D. + @note The returned GpuMat is constructed with the constructor for user-allocated data. + That is, It does not perform reference counting. + @note This function does not increment this GpuMatND's reference counter. + */ + GpuMat createGpuMatHeader() const; + + /** @brief Extracts a 2D plane part of an n-dim matrix. + It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this + GpuMatND to the returned GpuMat. + @note This operator does not increment this GpuMatND's reference counter; + */ + GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const; + + /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D. + It differs from createGpuMatHeader() in that it clones a part of this GpuMatND. + @note This operator does not increment this GpuMatND's reference counter; + */ + operator GpuMat() const; + + GpuMatND(const GpuMatND&) = default; + GpuMatND& operator=(const GpuMatND&) = default; + +#if defined(__GNUC__) && __GNUC__ < 5 + // error: function '...' defaulted on its first declaration with an exception-specification + // that differs from the implicit declaration '...' + + GpuMatND(GpuMatND&&) = default; + GpuMatND& operator=(GpuMatND&&) = default; +#else + GpuMatND(GpuMatND&&) noexcept = default; + GpuMatND& operator=(GpuMatND&&) noexcept = default; +#endif + + void upload(InputArray src); + void upload(InputArray src, Stream& stream); + void download(OutputArray dst) const; + void download(OutputArray dst, Stream& stream) const; + + //! returns true iff the GpuMatND data is continuous + //! (i.e. when there are no gaps between successive rows) + bool isContinuous() const; + + //! returns true if the matrix is a sub-matrix of another matrix + bool isSubmatrix() const; + + //! returns element size in bytes + size_t elemSize() const; + + //! returns the size of element channel in bytes + size_t elemSize1() const; + + //! returns true if data is null + bool empty() const; + + //! returns true if not empty and points to external(user-allocated) gpu memory + bool external() const; + + //! returns pointer to the first byte of the GPU memory + uchar* getDevicePtr() const; + + //! returns the total number of array elements + size_t total() const; + + //! returns the size of underlying memory in bytes + size_t totalMemSize() const; + + //! returns element type + int type() const; + +private: + //! internal use + void setFields(SizeArray size, int type, StepArray step = StepArray()); + +public: + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! matrix dimensionality + int dims; + + //! shape of this array + SizeArray size; + + /*! step values + Their semantics is identical to the semantics of step for Mat. + */ + StepArray step; + +private: + /*! internal use + If this GpuMatND holds external memory, this is empty. + */ + std::shared_ptr data_; + + /*! internal use + If this GpuMatND manages memory with reference counting, this value is + always equal to data_->data. If this GpuMatND holds external memory, + data_ is empty and data points to the external memory. + */ + uchar* data; + + /*! internal use + If this GpuMatND is a sub-matrix of a larger matrix, this value is the + difference of the first byte between the sub-matrix and the whole matrix. + */ + size_t offset; +}; + +/** @brief Creates a continuous matrix. + +@param rows Row count. +@param cols Column count. +@param type Type of the matrix. +@param arr Destination matrix. This parameter changes only if it has a proper type and area ( +\f$\texttt{rows} \times \texttt{cols}\f$ ). + +Matrix is called continuous if its elements are stored continuously, that is, without gaps at the +end of each row. + */ +CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr); + +/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type. + +@param rows Minimum desired number of rows. +@param cols Minimum desired number of columns. +@param type Desired matrix type. +@param arr Destination matrix. + +The function does not reallocate memory if the matrix has proper attributes already. + */ +CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); + +/** @brief BufferPool for use with CUDA streams + +BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is +only useful when enabled with #setBufferPoolUsage. + +@code + setBufferPoolUsage(true); +@endcode + +@note #setBufferPoolUsage must be called \em before any Stream declaration. + +Users may specify custom allocator for Stream and may implement their own stream based +functions utilizing the same underlying GPU memory management. + +If custom allocator is not specified, BufferPool utilizes StackAllocator by +default. StackAllocator allocates a chunk of GPU device memory beforehand, +and when GpuMat is declared later on, it is given the pre-allocated memory. +This kind of strategy reduces the number of calls for memory allocating APIs +such as cudaMalloc or cudaMallocPitch. + +Below is an example that utilizes BufferPool with StackAllocator: + +@code + #include + + using namespace cv; + using namespace cv::cuda + + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks) + + Stream stream1, stream2; // Each stream uses 1 stack + BufferPool pool1(stream1), pool2(stream2); + + GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB + GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full + + GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB + GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB + + cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1); + cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2); + } +@endcode + +If we allocate another GpuMat on pool1 in the above example, it will be carried out by +the DefaultAllocator since the stack for pool1 is full. + +@code + GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator +@endcode + +If a third stream is declared in the above example, allocating with #getBuffer +within that stream will also be carried out by the DefaultAllocator because we've run out of +stacks. + +@code + Stream stream3; // Only 2 stacks were allocated, we've run out of stacks + BufferPool pool3(stream3); + GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator +@endcode + +@warning When utilizing StackAllocator, deallocation order is important. + +Just like a stack, deallocation must be done in LIFO order. Below is an example of +erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this +sample code will emit CV_Assert error. + +@code + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + Stream stream; // A default size (10 MB) stack is allocated to this stream + BufferPool pool(stream); + + GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB) + GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB) + + mat1.release(); // erroneous usage : mat2 must be deallocated before mat1 + } +@endcode + +Since C++ local variables are destroyed in the reverse order of construction, +the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated +and the corresponding memory is automatically returned to the pool for later usage. + +@code + int main() + { + setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool + setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks) + + Stream stream1, stream2; // Each stream uses 1 stack + BufferPool pool1(stream1), pool2(stream2); + + for (int i = 0; i < 10; i++) + { + GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB + GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full + + GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB + GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB + + d_src1.setTo(Scalar(i), stream1); + d_src2.setTo(Scalar(i), stream2); + + cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1); + cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2); + // The order of destruction of the local variables is: + // d_dst2 => d_src2 => d_dst1 => d_src1 + // LIFO rule is satisfied, this code runs without error + } + } +@endcode + */ +class CV_EXPORTS_W BufferPool +{ +public: + + //! Gets the BufferPool for the given stream. + CV_WRAP explicit BufferPool(Stream& stream); + + //! Allocates a new GpuMat of given size and type. + CV_WRAP GpuMat getBuffer(int rows, int cols, int type); + + //! Allocates a new GpuMat of given size and type. + CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } + + //! Returns the allocator associated with the stream. + CV_WRAP Ptr getAllocator() const { return allocator_; } + +private: + Ptr allocator_; +}; + +//! BufferPool management (must be called before Stream creation) +CV_EXPORTS_W void setBufferPoolUsage(bool on); +CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); + +//=================================================================================== +// HostMem +//=================================================================================== + +/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA. + +Its interface is also Mat-like but with additional memory type parameters. + +- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous + uploading/downloading data from/to GPU. +- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU + address space, if supported. +- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are + used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache + utilization. + +@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 +Pinned Memory APIs* document or *CUDA C Programming Guide*. + */ +class CV_EXPORTS_W HostMem +{ +public: + enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 }; + + static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + HostMem(const HostMem& m); + + CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + //! creates from host memory with coping data + CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED); + + ~HostMem(); + + HostMem& operator =(const HostMem& m); + + //! swaps with other smart pointer + CV_WRAP void swap(HostMem& b); + + //! returns deep copy of the matrix, i.e. the data is copied + CV_WRAP HostMem clone() const; + + //! allocates new matrix data unless the matrix already has specified size and type. + CV_WRAP void create(int rows, int cols, int type); + void create(Size size, int type); + + //! creates alternative HostMem header for the same data, with different + //! number of channels and/or different number of rows + CV_WRAP HostMem reshape(int cn, int rows = 0) const; + + //! decrements reference counter and released memory if needed. + void release(); + + //! returns matrix header with disabled reference counting for HostMem data. + CV_WRAP Mat createMatHeader() const; + + /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting + for it. + + This can be done only if memory was allocated with the SHARED flag and if it is supported by the + hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which + eliminates an extra copy. + */ + GpuMat createGpuMatHeader() const; + + // Please see cv::Mat for descriptions + CV_WRAP bool isContinuous() const; + CV_WRAP size_t elemSize() const; + CV_WRAP size_t elemSize1() const; + CV_WRAP int type() const; + CV_WRAP int depth() const; + CV_WRAP int channels() const; + CV_WRAP size_t step1() const; + CV_WRAP Size size() const; + CV_WRAP bool empty() const; + + // Please see cv::Mat for descriptions + int flags; + int rows, cols; + CV_PROP size_t step; + + uchar* data; + int* refcount; + + uchar* datastart; + const uchar* dataend; + + AllocType alloc_type; +}; + +/** @brief Page-locks the memory of matrix and maps it for the device(s). + +@param m Input matrix. + */ +CV_EXPORTS_W void registerPageLocked(Mat& m); + +/** @brief Unmaps the memory of matrix and makes it pageable again. + +@param m Input matrix. + */ +CV_EXPORTS_W void unregisterPageLocked(Mat& m); + +//=================================================================================== +// Stream +//=================================================================================== + +/** @brief This class encapsulates a queue of asynchronous calls. + +@note Currently, you may face problems if an operation is enqueued twice with different data. Some +functions use the constant GPU memory, and next call may update the memory before the previous one +has been finished. But calling different operations asynchronously is safe because each operation +has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are +also safe. + +@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads. + +@code +void thread1() +{ + cv::cuda::Stream stream1; + cv::cuda::func1(..., stream1); +} + +void thread2() +{ + cv::cuda::Stream stream2; + cv::cuda::func2(..., stream2); +} +@endcode + +@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user. +In multi-threading environment the stream objects must be passed explicitly (see previous note). + */ +class CV_EXPORTS_W Stream +{ + typedef void (Stream::*bool_type)() const; + void this_type_does_not_support_comparisons() const {} + +public: + typedef void (*StreamCallback)(int status, void* userData); + + //! creates a new asynchronous stream + CV_WRAP Stream(); + + //! creates a new asynchronous stream with custom allocator + CV_WRAP Stream(const Ptr& allocator); + + /** @brief creates a new Stream using the cudaFlags argument to determine the behaviors of the stream + + @note The cudaFlags parameter is passed to the underlying api cudaStreamCreateWithFlags() and + supports the same parameter values. + @code + // creates an OpenCV cuda::Stream that manages an asynchronous, non-blocking, + // non-default CUDA stream + cv::cuda::Stream cvStream(cudaStreamNonBlocking); + @endcode + */ + CV_WRAP Stream(const size_t cudaFlags); + + /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false. + */ + CV_WRAP bool queryIfComplete() const; + + /** @brief Blocks the current CPU thread until all operations in the stream are complete. + */ + CV_WRAP void waitForCompletion(); + + /** @brief Makes a compute stream wait on an event. + */ + CV_WRAP void waitEvent(const Event& event); + + /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have + completed. + + @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization + that may depend on outstanding device work or other callbacks that are not mandated to run earlier. + Callbacks without a mandated order (in independent streams) execute in undefined order and may be + serialized. + */ + void enqueueHostCallback(StreamCallback callback, void* userData); + + //! return Stream object for default CUDA stream + CV_WRAP static Stream& Null(); + + //! returns true if stream object is not default (!= 0) + operator bool_type() const; + + //! return Pointer to CUDA stream + CV_WRAP void* cudaPtr() const; + + class Impl; + +private: + Ptr impl_; + Stream(const Ptr& impl); + + friend struct StreamAccessor; + friend class BufferPool; + friend class DefaultDeviceInitializer; +}; + +class CV_EXPORTS_W Event +{ +public: + enum CreateFlags + { + DEFAULT = 0x00, /**< Default event flag */ + BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */ + DISABLE_TIMING = 0x02, /**< Event will not record timing data */ + INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ + }; + + CV_WRAP explicit Event(const Event::CreateFlags flags = Event::CreateFlags::DEFAULT); + + //! records an event + CV_WRAP void record(Stream& stream = Stream::Null()); + + //! queries an event's status + CV_WRAP bool queryIfComplete() const; + + //! waits for an event to complete + CV_WRAP void waitForCompletion(); + + //! computes the elapsed time between events + CV_WRAP static float elapsedTime(const Event& start, const Event& end); + + class Impl; + +private: + Ptr impl_; + Event(const Ptr& impl); + + friend struct EventAccessor; +}; +CV_ENUM_FLAGS(Event::CreateFlags) + +//! @} cudacore_struct + +//=================================================================================== +// Initialization & Info +//=================================================================================== + +//! @addtogroup cudacore_init +//! @{ + +/** @brief Returns the number of installed CUDA-enabled devices. + +Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support, +this function returns 0. If the CUDA driver is not installed, or is incompatible, this function +returns -1. + */ +CV_EXPORTS_W int getCudaEnabledDeviceCount(); + +/** @brief Sets a device and initializes it for the current thread. + +@param device System index of a CUDA device starting with 0. + +If the call of this function is omitted, a default device is initialized at the fist CUDA usage. + */ +CV_EXPORTS_W void setDevice(int device); + +/** @brief Returns the current device index set by cuda::setDevice or initialized by default. + */ +CV_EXPORTS_W int getDevice(); + +/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current +process. + +Any subsequent API call to this device will reinitialize the device. + */ +CV_EXPORTS_W void resetDevice(); + +/** @brief Enumeration providing CUDA computing features. + */ +enum FeatureSet +{ + FEATURE_SET_COMPUTE_10 = 10, + FEATURE_SET_COMPUTE_11 = 11, + FEATURE_SET_COMPUTE_12 = 12, + FEATURE_SET_COMPUTE_13 = 13, + FEATURE_SET_COMPUTE_20 = 20, + FEATURE_SET_COMPUTE_21 = 21, + FEATURE_SET_COMPUTE_30 = 30, + FEATURE_SET_COMPUTE_32 = 32, + FEATURE_SET_COMPUTE_35 = 35, + FEATURE_SET_COMPUTE_50 = 50, + + GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, + SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, + NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13, + WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30, + DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35 +}; + +//! checks whether current device supports the given feature +CV_EXPORTS bool deviceSupports(FeatureSet feature_set); + +/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was +built for. + +According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute +capability can always be compiled to binary code of greater or equal compute capability". + */ +class CV_EXPORTS_W TargetArchs +{ +public: + /** @brief The following method checks whether the module was built with the support of the given feature: + + @param feature_set Features to be checked. See :ocvcuda::FeatureSet. + */ + static bool builtWith(FeatureSet feature_set); + + /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA + code for the given architecture(s): + + @param major Major compute capability version. + @param minor Minor compute capability version. + */ + CV_WRAP static bool has(int major, int minor); + CV_WRAP static bool hasPtx(int major, int minor); + CV_WRAP static bool hasBin(int major, int minor); + + CV_WRAP static bool hasEqualOrLessPtx(int major, int minor); + CV_WRAP static bool hasEqualOrGreater(int major, int minor); + CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor); + CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor); +}; + +/** @brief Class providing functionality for querying the specified GPU properties. + */ +class CV_EXPORTS_W DeviceInfo +{ +public: + //! creates DeviceInfo object for the current GPU + CV_WRAP DeviceInfo(); + + /** @brief The constructors. + + @param device_id System index of the CUDA device starting with 0. + + Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it + constructs an object for the current device. + */ + CV_WRAP DeviceInfo(int device_id); + + /** @brief Returns system index of the CUDA device starting with 0. + */ + CV_WRAP int deviceID() const; + + //! ASCII string identifying device + const char* name() const; + + //! global memory available on device in bytes + CV_WRAP size_t totalGlobalMem() const; + + //! shared memory available per block in bytes + CV_WRAP size_t sharedMemPerBlock() const; + + //! 32-bit registers available per block + CV_WRAP int regsPerBlock() const; + + //! warp size in threads + CV_WRAP int warpSize() const; + + //! maximum pitch in bytes allowed by memory copies + CV_WRAP size_t memPitch() const; + + //! maximum number of threads per block + CV_WRAP int maxThreadsPerBlock() const; + + //! maximum size of each dimension of a block + CV_WRAP Vec3i maxThreadsDim() const; + + //! maximum size of each dimension of a grid + CV_WRAP Vec3i maxGridSize() const; + + //! clock frequency in kilohertz + CV_WRAP int clockRate() const; + + //! constant memory available on device in bytes + CV_WRAP size_t totalConstMem() const; + + //! major compute capability + CV_WRAP int majorVersion() const; + + //! minor compute capability + CV_WRAP int minorVersion() const; + + //! alignment requirement for textures + CV_WRAP size_t textureAlignment() const; + + //! pitch alignment requirement for texture references bound to pitched memory + CV_WRAP size_t texturePitchAlignment() const; + + //! number of multiprocessors on device + CV_WRAP int multiProcessorCount() const; + + //! specified whether there is a run time limit on kernels + CV_WRAP bool kernelExecTimeoutEnabled() const; + + //! device is integrated as opposed to discrete + CV_WRAP bool integrated() const; + + //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer + CV_WRAP bool canMapHostMemory() const; + + enum ComputeMode + { + ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */ + ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */ + ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */ + ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */ + }; + + //! compute mode + CV_WRAP DeviceInfo::ComputeMode computeMode() const; + + //! maximum 1D texture size + CV_WRAP int maxTexture1D() const; + + //! maximum 1D mipmapped texture size + CV_WRAP int maxTexture1DMipmap() const; + + //! maximum size for 1D textures bound to linear memory + CV_WRAP int maxTexture1DLinear() const; + + //! maximum 2D texture dimensions + CV_WRAP Vec2i maxTexture2D() const; + + //! maximum 2D mipmapped texture dimensions + CV_WRAP Vec2i maxTexture2DMipmap() const; + + //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory + CV_WRAP Vec3i maxTexture2DLinear() const; + + //! maximum 2D texture dimensions if texture gather operations have to be performed + CV_WRAP Vec2i maxTexture2DGather() const; + + //! maximum 3D texture dimensions + CV_WRAP Vec3i maxTexture3D() const; + + //! maximum Cubemap texture dimensions + CV_WRAP int maxTextureCubemap() const; + + //! maximum 1D layered texture dimensions + CV_WRAP Vec2i maxTexture1DLayered() const; + + //! maximum 2D layered texture dimensions + CV_WRAP Vec3i maxTexture2DLayered() const; + + //! maximum Cubemap layered texture dimensions + CV_WRAP Vec2i maxTextureCubemapLayered() const; + + //! maximum 1D surface size + CV_WRAP int maxSurface1D() const; + + //! maximum 2D surface dimensions + CV_WRAP Vec2i maxSurface2D() const; + + //! maximum 3D surface dimensions + CV_WRAP Vec3i maxSurface3D() const; + + //! maximum 1D layered surface dimensions + CV_WRAP Vec2i maxSurface1DLayered() const; + + //! maximum 2D layered surface dimensions + CV_WRAP Vec3i maxSurface2DLayered() const; + + //! maximum Cubemap surface dimensions + CV_WRAP int maxSurfaceCubemap() const; + + //! maximum Cubemap layered surface dimensions + CV_WRAP Vec2i maxSurfaceCubemapLayered() const; + + //! alignment requirements for surfaces + CV_WRAP size_t surfaceAlignment() const; + + //! device can possibly execute multiple kernels concurrently + CV_WRAP bool concurrentKernels() const; + + //! device has ECC support enabled + CV_WRAP bool ECCEnabled() const; + + //! PCI bus ID of the device + CV_WRAP int pciBusID() const; + + //! PCI device ID of the device + CV_WRAP int pciDeviceID() const; + + //! PCI domain ID of the device + CV_WRAP int pciDomainID() const; + + //! true if device is a Tesla device using TCC driver, false otherwise + CV_WRAP bool tccDriver() const; + + //! number of asynchronous engines + CV_WRAP int asyncEngineCount() const; + + //! device shares a unified address space with the host + CV_WRAP bool unifiedAddressing() const; + + //! peak memory clock frequency in kilohertz + CV_WRAP int memoryClockRate() const; + + //! global memory bus width in bits + CV_WRAP int memoryBusWidth() const; + + //! size of L2 cache in bytes + CV_WRAP int l2CacheSize() const; + + //! maximum resident threads per multiprocessor + CV_WRAP int maxThreadsPerMultiProcessor() const; + + //! gets free and total device memory + CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const; + CV_WRAP size_t freeMemory() const; + CV_WRAP size_t totalMemory() const; + + /** @brief Provides information on CUDA feature support. + + @param feature_set Features to be checked. See cuda::FeatureSet. + + This function returns true if the device has the specified CUDA feature. Otherwise, it returns false + */ + bool supports(FeatureSet feature_set) const; + + /** @brief Checks the CUDA module and device compatibility. + + This function returns true if the CUDA module can be run on the specified device. Otherwise, it + returns false . + */ + CV_WRAP bool isCompatible() const; + +private: + int device_id_; +}; + +CV_EXPORTS_W void printCudaDeviceInfo(int device); +CV_EXPORTS_W void printShortCudaDeviceInfo(int device); + +/** @brief Converts an array to half precision floating number. + +@param _src input array. +@param _dst output array. +@param stream Stream for the asynchronous version. +@sa convertFp16 +*/ +CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null()); + +//! @} cudacore_init + +}} // namespace cv { namespace cuda { + + +#include "opencv2/core/cuda.inl.hpp" + +#endif /* OPENCV_CORE_CUDA_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda.inl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda.inl.hpp new file mode 100644 index 0000000..3f2a0c7 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda.inl.hpp @@ -0,0 +1,723 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDAINL_HPP +#define OPENCV_CORE_CUDAINL_HPP + +#include "opencv2/core/cuda.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { + +//=================================================================================== +// GpuMat +//=================================================================================== + +inline +GpuMat::GpuMat(Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{} + +inline +GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (rows_ > 0 && cols_ > 0) + create(rows_, cols_, type_); +} + +inline +GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (size_.height > 0 && size_.width > 0) + create(size_.height, size_.width, type_); +} + +inline +GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (rows_ > 0 && cols_ > 0) + { + create(rows_, cols_, type_); + setTo(s_); + } +} + +inline +GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + if (size_.height > 0 && size_.width > 0) + { + create(size_.height, size_.width, type_); + setTo(s_); + } +} + +inline +GpuMat::GpuMat(const GpuMat& m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator) +{ + if (refcount) + CV_XADD(refcount, 1); +} + +inline +GpuMat::GpuMat(InputArray arr, Allocator* allocator_) : + flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) +{ + upload(arr); +} + +inline +GpuMat::~GpuMat() +{ + release(); +} + +inline +GpuMat& GpuMat::operator =(const GpuMat& m) +{ + if (this != &m) + { + GpuMat temp(m); + swap(temp); + } + + return *this; +} + +inline +void GpuMat::create(Size size_, int type_) +{ + create(size_.height, size_.width, type_); +} + +inline +void GpuMat::swap(GpuMat& b) +{ + std::swap(flags, b.flags); + std::swap(rows, b.rows); + std::swap(cols, b.cols); + std::swap(step, b.step); + std::swap(data, b.data); + std::swap(datastart, b.datastart); + std::swap(dataend, b.dataend); + std::swap(refcount, b.refcount); + std::swap(allocator, b.allocator); +} + +inline +GpuMat GpuMat::clone() const +{ + GpuMat m; + copyTo(m); + return m; +} + +inline +void GpuMat::copyTo(OutputArray dst, InputArray mask) const +{ + copyTo(dst, mask, Stream::Null()); +} + +inline +GpuMat& GpuMat::setTo(Scalar s) +{ + return setTo(s, Stream::Null()); +} + +inline +GpuMat& GpuMat::setTo(Scalar s, InputArray mask) +{ + return setTo(s, mask, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype) const +{ + convertTo(dst, rtype, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const +{ + convertTo(dst, rtype, alpha, beta, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const +{ + convertTo(dst, rtype, alpha, 0.0, stream); +} + +inline +void GpuMat::assignTo(GpuMat& m, int _type) const +{ + if (_type < 0) + m = *this; + else + convertTo(m, _type); +} + +inline +uchar* GpuMat::ptr(int y) +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step * y; +} + +inline +const uchar* GpuMat::ptr(int y) const +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step * y; +} + +template inline +_Tp* GpuMat::ptr(int y) +{ + return (_Tp*)ptr(y); +} + +template inline +const _Tp* GpuMat::ptr(int y) const +{ + return (const _Tp*)ptr(y); +} + +template inline +GpuMat::operator PtrStepSz() const +{ + return PtrStepSz(rows, cols, (T*)data, step); +} + +template inline +GpuMat::operator PtrStep() const +{ + return PtrStep((T*)data, step); +} + +inline +GpuMat GpuMat::row(int y) const +{ + return GpuMat(*this, Range(y, y+1), Range::all()); +} + +inline +GpuMat GpuMat::col(int x) const +{ + return GpuMat(*this, Range::all(), Range(x, x+1)); +} + +inline +GpuMat GpuMat::rowRange(int startrow, int endrow) const +{ + return GpuMat(*this, Range(startrow, endrow), Range::all()); +} + +inline +GpuMat GpuMat::rowRange(Range r) const +{ + return GpuMat(*this, r, Range::all()); +} + +inline +GpuMat GpuMat::colRange(int startcol, int endcol) const +{ + return GpuMat(*this, Range::all(), Range(startcol, endcol)); +} + +inline +GpuMat GpuMat::colRange(Range r) const +{ + return GpuMat(*this, Range::all(), r); +} + +inline +GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const +{ + return GpuMat(*this, rowRange_, colRange_); +} + +inline +GpuMat GpuMat::operator ()(Rect roi) const +{ + return GpuMat(*this, roi); +} + +inline +bool GpuMat::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +inline +size_t GpuMat::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t GpuMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int GpuMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int GpuMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int GpuMat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +size_t GpuMat::step1() const +{ + return step / elemSize1(); +} + +inline +Size GpuMat::size() const +{ + return Size(cols, rows); +} + +inline +bool GpuMat::empty() const +{ + return data == 0; +} + +inline +void* GpuMat::cudaPtr() const +{ + return data; +} + +static inline +GpuMat createContinuous(int rows, int cols, int type) +{ + GpuMat m; + createContinuous(rows, cols, type, m); + return m; +} + +static inline +void createContinuous(Size size, int type, OutputArray arr) +{ + createContinuous(size.height, size.width, type, arr); +} + +static inline +GpuMat createContinuous(Size size, int type) +{ + GpuMat m; + createContinuous(size, type, m); + return m; +} + +static inline +void ensureSizeIsEnough(Size size, int type, OutputArray arr) +{ + ensureSizeIsEnough(size.height, size.width, type, arr); +} + +static inline +void swap(GpuMat& a, GpuMat& b) +{ + a.swap(b); +} + +//=================================================================================== +// GpuMatND +//=================================================================================== + +inline +GpuMatND::GpuMatND() : + flags(0), dims(0), data(nullptr), offset(0) +{ +} + +inline +GpuMatND::GpuMatND(SizeArray _size, int _type) : + flags(0), dims(0), data(nullptr), offset(0) +{ + create(std::move(_size), _type); +} + +inline +void GpuMatND::swap(GpuMatND& m) noexcept +{ + std::swap(*this, m); +} + +inline +bool GpuMatND::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +inline +bool GpuMatND::isSubmatrix() const +{ + return (flags & Mat::SUBMATRIX_FLAG) != 0; +} + +inline +size_t GpuMatND::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t GpuMatND::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +bool GpuMatND::empty() const +{ + return data == nullptr; +} + +inline +bool GpuMatND::external() const +{ + return !empty() && data_.use_count() == 0; +} + +inline +uchar* GpuMatND::getDevicePtr() const +{ + return data + offset; +} + +inline +size_t GpuMatND::total() const +{ + size_t p = 1; + for(auto s : size) + p *= s; + return p; +} + +inline +size_t GpuMatND::totalMemSize() const +{ + return size[0] * step[0]; +} + +inline +int GpuMatND::type() const +{ + return CV_MAT_TYPE(flags); +} + +//=================================================================================== +// HostMem +//=================================================================================== + +inline +HostMem::HostMem(AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ +} + +inline +HostMem::HostMem(const HostMem& m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type) +{ + if( refcount ) + CV_XADD(refcount, 1); +} + +inline +HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ + if (rows_ > 0 && cols_ > 0) + create(rows_, cols_, type_); +} + +inline +HostMem::HostMem(Size size_, int type_, AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ + if (size_.height > 0 && size_.width > 0) + create(size_.height, size_.width, type_); +} + +inline +HostMem::HostMem(InputArray arr, AllocType alloc_type_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) +{ + arr.getMat().copyTo(*this); +} + +inline +HostMem::~HostMem() +{ + release(); +} + +inline +HostMem& HostMem::operator =(const HostMem& m) +{ + if (this != &m) + { + HostMem temp(m); + swap(temp); + } + + return *this; +} + +inline +void HostMem::swap(HostMem& b) +{ + std::swap(flags, b.flags); + std::swap(rows, b.rows); + std::swap(cols, b.cols); + std::swap(step, b.step); + std::swap(data, b.data); + std::swap(datastart, b.datastart); + std::swap(dataend, b.dataend); + std::swap(refcount, b.refcount); + std::swap(alloc_type, b.alloc_type); +} + +inline +HostMem HostMem::clone() const +{ + HostMem m(size(), type(), alloc_type); + createMatHeader().copyTo(m); + return m; +} + +inline +void HostMem::create(Size size_, int type_) +{ + create(size_.height, size_.width, type_); +} + +inline +Mat HostMem::createMatHeader() const +{ + return Mat(size(), type(), data, step); +} + +inline +bool HostMem::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +inline +size_t HostMem::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t HostMem::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int HostMem::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int HostMem::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int HostMem::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +size_t HostMem::step1() const +{ + return step / elemSize1(); +} + +inline +Size HostMem::size() const +{ + return Size(cols, rows); +} + +inline +bool HostMem::empty() const +{ + return data == 0; +} + +static inline +void swap(HostMem& a, HostMem& b) +{ + a.swap(b); +} + +//=================================================================================== +// Stream +//=================================================================================== + +inline +Stream::Stream(const Ptr& impl) + : impl_(impl) +{ +} + +//=================================================================================== +// Event +//=================================================================================== + +inline +Event::Event(const Ptr& impl) + : impl_(impl) +{ +} + +//=================================================================================== +// Initialization & Info +//=================================================================================== + +inline +bool TargetArchs::has(int major, int minor) +{ + return hasPtx(major, minor) || hasBin(major, minor); +} + +inline +bool TargetArchs::hasEqualOrGreater(int major, int minor) +{ + return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor); +} + +inline +DeviceInfo::DeviceInfo() +{ + device_id_ = getDevice(); +} + +inline +DeviceInfo::DeviceInfo(int device_id) +{ + CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() ); + device_id_ = device_id; +} + +inline +int DeviceInfo::deviceID() const +{ + return device_id_; +} + +inline +size_t DeviceInfo::freeMemory() const +{ + size_t _totalMemory = 0, _freeMemory = 0; + queryMemory(_totalMemory, _freeMemory); + return _freeMemory; +} + +inline +size_t DeviceInfo::totalMemory() const +{ + size_t _totalMemory = 0, _freeMemory = 0; + queryMemory(_totalMemory, _freeMemory); + return _totalMemory; +} + +inline +bool DeviceInfo::supports(FeatureSet feature_set) const +{ + int version = majorVersion() * 10 + minorVersion(); + return version >= feature_set; +} + + +}} // namespace cv { namespace cuda { + +//=================================================================================== +// Mat +//=================================================================================== + +namespace cv { + +inline +Mat::Mat(const cuda::GpuMat& m) + : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows) +{ + m.download(*this); +} + +} + +//! @endcond + +#endif // OPENCV_CORE_CUDAINL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/block.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/block.hpp new file mode 100644 index 0000000..c277f0e --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/block.hpp @@ -0,0 +1,211 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP +#define OPENCV_CUDA_DEVICE_BLOCK_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct Block + { + static __device__ __forceinline__ unsigned int id() + { + return blockIdx.x; + } + + static __device__ __forceinline__ unsigned int stride() + { + return blockDim.x * blockDim.y * blockDim.z; + } + + static __device__ __forceinline__ void sync() + { + __syncthreads(); + } + + static __device__ __forceinline__ int flattenedThreadId() + { + return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + } + + template + static __device__ __forceinline__ void fill(It beg, It end, const T& value) + { + int STRIDE = stride(); + It t = beg + flattenedThreadId(); + + for(; t < end; t += STRIDE) + *t = value; + } + + template + static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) + { + int STRIDE = stride(); + int tid = flattenedThreadId(); + value += tid; + + for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE) + *t = value; + } + + template + static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out) + { + int STRIDE = stride(); + InIt t = beg + flattenedThreadId(); + OutIt o = out + (t - beg); + + for(; t < end; t += STRIDE, o += STRIDE) + *o = *t; + } + + template + static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op) + { + int STRIDE = stride(); + InIt t = beg + flattenedThreadId(); + OutIt o = out + (t - beg); + + for(; t < end; t += STRIDE, o += STRIDE) + *o = op(*t); + } + + template + static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) + { + int STRIDE = stride(); + InIt1 t1 = beg1 + flattenedThreadId(); + InIt2 t2 = beg2 + flattenedThreadId(); + OutIt o = out + (t1 - beg1); + + for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE) + *o = op(*t1, *t2); + } + + template + static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op) + { + int tid = flattenedThreadId(); + T val = buffer[tid]; + + if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } + if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } + if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } + if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } + + if (tid < 32) + { + if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } + if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } + if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } + if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } + if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } + if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } + } + } + + template + static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op) + { + int tid = flattenedThreadId(); + T val = buffer[tid] = init; + __syncthreads(); + + if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } + if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } + if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } + if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } + + if (tid < 32) + { + if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } + if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } + if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } + if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } + if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } + if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } + } + __syncthreads(); + return buffer[0]; + } + + template + static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op) + { + int ftid = flattenedThreadId(); + int sft = stride(); + + if (sft < n) + { + for (unsigned int i = sft + ftid; i < n; i += sft) + data[ftid] = op(data[ftid], data[i]); + + __syncthreads(); + + n = sft; + } + + while (n > 1) + { + unsigned int half = n/2; + + if (ftid < half) + data[ftid] = op(data[ftid], data[n - ftid - 1]); + + __syncthreads(); + + n = n - half; + } + } + }; +}}} + +//! @endcond + +#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/border_interpolate.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/border_interpolate.hpp new file mode 100644 index 0000000..874f705 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/border_interpolate.hpp @@ -0,0 +1,722 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP +#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP + +#include "saturate_cast.hpp" +#include "vec_traits.hpp" +#include "vec_math.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + ////////////////////////////////////////////////////////////// + // BrdConstant + + template struct BrdRowConstant + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits::all(0)) : width(width_), val(val_) {} + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return x >= 0 ? saturate_cast(data[x]) : val; + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return x < width ? saturate_cast(data[x]) : val; + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return (x >= 0 && x < width) ? saturate_cast(data[x]) : val; + } + + int width; + D val; + }; + + template struct BrdColConstant + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits::all(0)) : height(height_), val(val_) {} + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return y >= 0 ? saturate_cast(*(const T*)((const char*)data + y * step)) : val; + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return y < height ? saturate_cast(*(const T*)((const char*)data + y * step)) : val; + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return (y >= 0 && y < height) ? saturate_cast(*(const T*)((const char*)data + y * step)) : val; + } + + int height; + D val; + }; + + template struct BrdConstant + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits::all(0)) : height(height_), width(width_), val(val_) + { + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast(((const T*)((const uchar*)data + y * step))[x]) : val; + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast(src(y, x)) : val; + } + + int height; + int width; + D val; + }; + + ////////////////////////////////////////////////////////////// + // BrdReplicate + + template struct BrdRowReplicate + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::max(x, 0); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::min(x, last_col); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int last_col; + }; + + template struct BrdColReplicate + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {} + template __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::max(y, 0); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::min(y, last_row); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const T*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const T*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const T*)((const char*)data + idx_row(y) * step)); + } + + int last_row; + }; + + template struct BrdReplicate + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::max(y, 0); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::min(y, last_row); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::max(x, 0); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::min(x, last_col); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int last_row; + int last_col; + }; + + ////////////////////////////////////////////////////////////// + // BrdReflect101 + + template struct BrdRowReflect101 + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::abs(x) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int last_col; + }; + + template struct BrdColReflect101 + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {} + template __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::abs(y) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + int last_row; + }; + + template struct BrdReflect101 + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return ::abs(y) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return ::abs(x) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int last_row; + int last_col; + }; + + ////////////////////////////////////////////////////////////// + // BrdReflect + + template struct BrdRowReflect + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (::abs(x) - (x < 0)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(::abs(x) - (x < 0)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int last_col; + }; + + template struct BrdColReflect + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {} + template __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (::abs(y) - (y < 0)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(::abs(y) - (y < 0)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + int last_row; + }; + + template struct BrdReflect + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {} + template __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (::abs(y) - (y < 0)) % (last_row + 1); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/; + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_low(idx_row_high(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (::abs(x) - (x < 0)) % (last_col + 1); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (last_col - ::abs(last_col - x) + (x > last_col)); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_low(idx_col_high(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int last_row; + int last_col; + }; + + ////////////////////////////////////////////////////////////// + // BrdWrap + + template struct BrdRowWrap + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {} + template __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {} + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (x < width) * x + (x >= width) * (x % width); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(idx_col_low(x)); + } + + template __device__ __forceinline__ D at_low(int x, const T* data) const + { + return saturate_cast(data[idx_col_low(x)]); + } + + template __device__ __forceinline__ D at_high(int x, const T* data) const + { + return saturate_cast(data[idx_col_high(x)]); + } + + template __device__ __forceinline__ D at(int x, const T* data) const + { + return saturate_cast(data[idx_col(x)]); + } + + int width; + }; + + template struct BrdColWrap + { + typedef D result_type; + + explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {} + template __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {} + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return (y < height) * y + (y >= height) * (y % height); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(idx_row_low(y)); + } + + template __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_low(y) * step)); + } + + template __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row_high(y) * step)); + } + + template __device__ __forceinline__ D at(int y, const T* data, size_t step) const + { + return saturate_cast(*(const D*)((const char*)data + idx_row(y) * step)); + } + + int height; + }; + + template struct BrdWrap + { + typedef D result_type; + + __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) : + height(height_), width(width_) + { + } + template + __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) : + height(height_), width(width_) + { + } + + __device__ __forceinline__ int idx_row_low(int y) const + { + return (y >= 0) ? y : (y - ((y - height + 1) / height) * height); + } + + __device__ __forceinline__ int idx_row_high(int y) const + { + return (y < height) ? y : (y % height); + } + + __device__ __forceinline__ int idx_row(int y) const + { + return idx_row_high(idx_row_low(y)); + } + + __device__ __forceinline__ int idx_col_low(int x) const + { + return (x >= 0) ? x : (x - ((x - width + 1) / width) * width); + } + + __device__ __forceinline__ int idx_col_high(int x) const + { + return (x < width) ? x : (x % width); + } + + __device__ __forceinline__ int idx_col(int x) const + { + return idx_col_high(idx_col_low(x)); + } + + template __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const + { + return saturate_cast(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]); + } + + template __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const + { + return saturate_cast(src(idx_row(y), idx_col(x))); + } + + int height; + int width; + }; + + ////////////////////////////////////////////////////////////// + // BorderReader + + template struct BorderReader + { + typedef typename B::result_type elem_type; + typedef typename Ptr2D::index_type index_type; + + __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {} + + __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const + { + return b.at(y, x, ptr); + } + + Ptr2D ptr; + B b; + }; + + // under win32 there is some bug with templated types that passed as kernel parameters + // with this specialization all works fine + template struct BorderReader< Ptr2D, BrdConstant > + { + typedef typename BrdConstant::result_type elem_type; + typedef typename Ptr2D::index_type index_type; + + __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant& b) : + src(src_), height(b.height), width(b.width), val(b.val) + { + } + + __device__ __forceinline__ D operator ()(index_type y, index_type x) const + { + return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast(src(y, x)) : val; + } + + Ptr2D src; + int height; + int width; + D val; + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/color.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/color.hpp new file mode 100644 index 0000000..dcce280 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/color.hpp @@ -0,0 +1,309 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_COLOR_HPP +#define OPENCV_CUDA_COLOR_HPP + +#include "detail/color_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements + // template class ColorSpace1_to_ColorSpace2_traits + // { + // typedef ... functor_type; + // static __host__ __device__ functor_type create_functor(); + // }; + + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6) + + #undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3) + OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4) + + #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5) + OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6) + + #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5) + OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6) + + #undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS + + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS + + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS + + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS + + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS + + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0) + OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0) + + #undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS + + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS + + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS + + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0) + + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0) + OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0) + + #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_COLOR_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/common.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/common.hpp new file mode 100644 index 0000000..80b2ff0 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/common.hpp @@ -0,0 +1,123 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_COMMON_HPP +#define OPENCV_CUDA_COMMON_HPP + +#include +#include "opencv2/core/cuda_types.hpp" +#include "opencv2/core/cvdef.h" +#include "opencv2/core/base.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +#ifndef CV_PI_F + #ifndef CV_PI + #define CV_PI_F 3.14159265f + #else + #define CV_PI_F ((float)CV_PI) + #endif +#endif + +namespace cv { namespace cuda { + static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func) + { + if (cudaSuccess != err) + cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line); + } +}} + +#ifndef cudaSafeCall + #define cudaSafeCall(expr) cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func) +#endif + +namespace cv { namespace cuda +{ + template static inline bool isAligned(const T* ptr, size_t size) + { + return reinterpret_cast(ptr) % size == 0; + } + + static inline bool isAligned(size_t step, size_t size) + { + return step % size == 0; + } +}} + +namespace cv { namespace cuda +{ + namespace device + { + __host__ __device__ __forceinline__ int divUp(int total, int grain) + { + return (total + grain - 1) / grain; + } + + template inline void bindTexture(const textureReference* tex, const PtrStepSz& img) + { + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); + } + + template inline void createTextureObjectPitch2D(cudaTextureObject_t* tex, PtrStepSz& img, const cudaTextureDesc& texDesc) + { + cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = static_cast(img.ptr()); + resDesc.res.pitch2D.height = img.rows; + resDesc.res.pitch2D.width = img.cols; + resDesc.res.pitch2D.pitchInBytes = img.step; + resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); + + cudaSafeCall( cudaCreateTextureObject(tex, &resDesc, &texDesc, NULL) ); + } + } +}} + +//! @endcond + +#endif // OPENCV_CUDA_COMMON_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/datamov_utils.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/datamov_utils.hpp new file mode 100644 index 0000000..6820d0f --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/datamov_utils.hpp @@ -0,0 +1,113 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP +#define OPENCV_CUDA_DATAMOV_UTILS_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 + + // for Fermi memory space is detected automatically + template struct ForceGlob + { + __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; } + }; + + #else // __CUDA_ARCH__ >= 200 + + #if defined(_WIN64) || defined(__LP64__) + // 64-bit register modifier for inlined asm + #define OPENCV_CUDA_ASM_PTR "l" + #else + // 32-bit register modifier for inlined asm + #define OPENCV_CUDA_ASM_PTR "r" + #endif + + template struct ForceGlob; + + #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \ + template <> struct ForceGlob \ + { \ + __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ + { \ + asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ + } \ + }; + + #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \ + template <> struct ForceGlob \ + { \ + __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ + { \ + asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ + } \ + }; + + OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8) + OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8) + OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8) + OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h) + OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h) + OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r) + OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r) + OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f) + OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d) + + #undef OPENCV_CUDA_DEFINE_FORCE_GLOB + #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B + #undef OPENCV_CUDA_ASM_PTR + + #endif // __CUDA_ARCH__ >= 200 +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/color_detail.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/color_detail.hpp new file mode 100644 index 0000000..f4b4796 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/color_detail.hpp @@ -0,0 +1,2018 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_COLOR_DETAIL_HPP +#define OPENCV_CUDA_COLOR_DETAIL_HPP + +#include "../common.hpp" +#include "../vec_traits.hpp" +#include "../saturate_cast.hpp" +#include "../limits.hpp" +#include "../functional.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + #ifndef CV_DESCALE + #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) + #endif + + namespace color_detail + { + template struct ColorChannel + { + typedef float worktype_f; + static __device__ __forceinline__ T max() { return numeric_limits::max(); } + static __device__ __forceinline__ T half() { return (T)(max()/2 + 1); } + }; + + template<> struct ColorChannel + { + typedef float worktype_f; + static __device__ __forceinline__ float max() { return 1.f; } + static __device__ __forceinline__ float half() { return 0.5f; } + }; + + template static __device__ __forceinline__ void setAlpha(typename TypeVec::vec_type& vec, T val) + { + } + + template static __device__ __forceinline__ void setAlpha(typename TypeVec::vec_type& vec, T val) + { + vec.w = val; + } + + template static __device__ __forceinline__ T getAlpha(const typename TypeVec::vec_type& vec) + { + return ColorChannel::max(); + } + + template static __device__ __forceinline__ T getAlpha(const typename TypeVec::vec_type& vec) + { + return vec.w; + } + + //constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601 + constexpr float B2YF = 0.114f; + constexpr float G2YF = 0.587f; + constexpr float R2YF = 0.299f; + + //to YCbCr + constexpr float YCBF = 0.564f; // == 1/2/(1-B2YF) + constexpr float YCRF = 0.713f; // == 1/2/(1-R2YF) + const int YCBI = 9241; // == YCBF*16384 + const int YCRI = 11682; // == YCRF*16384 + //to YUV + constexpr float B2UF = 0.492f; + constexpr float R2VF = 0.877f; + const int B2UI = 8061; // == B2UF*16384 + const int R2VI = 14369; // == R2VF*16384 + //from YUV + constexpr float U2BF = 2.032f; + constexpr float U2GF = -0.395f; + constexpr float V2GF = -0.581f; + constexpr float V2RF = 1.140f; + const int U2BI = 33292; + const int U2GI = -6472; + const int V2GI = -9519; + const int V2RI = 18678; + //from YCrCb + constexpr float CB2BF = 1.773f; + constexpr float CB2GF = -0.344f; + constexpr float CR2GF = -0.714f; + constexpr float CR2RF = 1.403f; + const int CB2BI = 29049; + const int CB2GI = -5636; + const int CR2GI = -11698; + const int CR2RI = 22987; + + enum + { + yuv_shift = 14, + xyz_shift = 12, + gray_shift = 15, + R2Y = 4899, + G2Y = 9617, + B2Y = 1868, + RY15 = 9798, // == R2YF*32768 + 0.5 + GY15 = 19235, // == G2YF*32768 + 0.5 + BY15 = 3735, // == B2YF*32768 + 0.5 + BLOCK_SIZE = 256 + }; + } + +////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// + + namespace color_detail + { + template struct RGB2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + dst.x = (&src.x)[bidx]; + dst.y = src.y; + dst.z = (&src.x)[bidx^2]; + setAlpha(dst, getAlpha(src)); + + return dst; + } + + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} + }; + + template <> struct RGB2RGB : unary_function + { + __device__ uint operator()(uint src) const + { + uint dst = 0; + + dst |= (0xffu & (src >> 16)); + dst |= (0xffu & (src >> 8)) << 8; + dst |= (0xffu & (src)) << 16; + dst |= (0xffu & (src >> 24)) << 24; + + return dst; + } + + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB ////////// + + namespace color_detail + { + template struct RGB2RGB5x5Converter; + template struct RGB2RGB5x5Converter<6, bidx> + { + static __device__ __forceinline__ ushort cvt(const uchar3& src) + { + return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8)); + } + + static __device__ __forceinline__ ushort cvt(uint src) + { + uint b = 0xffu & (src >> (bidx * 8)); + uint g = 0xffu & (src >> 8); + uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + return (ushort)((b >> 3) | ((g & ~3) << 3) | ((r & ~7) << 8)); + } + }; + + template struct RGB2RGB5x5Converter<5, bidx> + { + static __device__ __forceinline__ ushort cvt(const uchar3& src) + { + return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7)); + } + + static __device__ __forceinline__ ushort cvt(uint src) + { + uint b = 0xffu & (src >> (bidx * 8)); + uint g = 0xffu & (src >> 8); + uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + uint a = 0xffu & (src >> 24); + return (ushort)((b >> 3) | ((g & ~7) << 2) | ((r & ~7) << 7) | (a * 0x8000)); + } + }; + + template struct RGB2RGB5x5; + + template struct RGB2RGB5x5<3, bidx,green_bits> : unary_function + { + __device__ __forceinline__ ushort operator()(const uchar3& src) const + { + return RGB2RGB5x5Converter::cvt(src); + } + + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} + }; + + template struct RGB2RGB5x5<4, bidx,green_bits> : unary_function + { + __device__ __forceinline__ ushort operator()(uint src) const + { + return RGB2RGB5x5Converter::cvt(src); + } + + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2RGB5x5 functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template struct RGB5x52RGBConverter; + + template struct RGB5x52RGBConverter<5, bidx> + { + static __device__ __forceinline__ void cvt(uint src, uchar3& dst) + { + (&dst.x)[bidx] = src << 3; + dst.y = (src >> 2) & ~7; + (&dst.x)[bidx ^ 2] = (src >> 7) & ~7; + } + + static __device__ __forceinline__ void cvt(uint src, uint& dst) + { + dst = 0; + + dst |= (0xffu & (src << 3)) << (bidx * 8); + dst |= (0xffu & ((src >> 2) & ~7)) << 8; + dst |= (0xffu & ((src >> 7) & ~7)) << ((bidx ^ 2) * 8); + dst |= ((src & 0x8000) * 0xffu) << 24; + } + }; + + template struct RGB5x52RGBConverter<6, bidx> + { + static __device__ __forceinline__ void cvt(uint src, uchar3& dst) + { + (&dst.x)[bidx] = src << 3; + dst.y = (src >> 3) & ~3; + (&dst.x)[bidx ^ 2] = (src >> 8) & ~7; + } + + static __device__ __forceinline__ void cvt(uint src, uint& dst) + { + dst = 0xffu << 24; + + dst |= (0xffu & (src << 3)) << (bidx * 8); + dst |= (0xffu &((src >> 3) & ~3)) << 8; + dst |= (0xffu & ((src >> 8) & ~7)) << ((bidx ^ 2) * 8); + } + }; + + template struct RGB5x52RGB; + + template struct RGB5x52RGB<3, bidx, green_bits> : unary_function + { + __device__ __forceinline__ uchar3 operator()(ushort src) const + { + uchar3 dst; + RGB5x52RGBConverter::cvt(src, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} + + }; + + template struct RGB5x52RGB<4, bidx, green_bits> : unary_function + { + __device__ __forceinline__ uint operator()(ushort src) const + { + uint dst; + RGB5x52RGBConverter::cvt(src, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB5x52RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////// Grayscale to Color //////////////////////////////// + + namespace color_detail + { + template struct Gray2RGB : unary_function::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(T src) const + { + typename TypeVec::vec_type dst; + + dst.z = dst.y = dst.x = src; + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} + }; + + template <> struct Gray2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + uint dst = 0xffu << 24; + + dst |= src; + dst |= src << 8; + dst |= src << 16; + + return dst; + } + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Gray2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template struct Gray2RGB5x5Converter; + template<> struct Gray2RGB5x5Converter<6> + { + static __device__ __forceinline__ ushort cvt(uint t) + { + return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8)); + } + }; + + template<> struct Gray2RGB5x5Converter<5> + { + static __device__ __forceinline__ ushort cvt(uint t) + { + t >>= 3; + return (ushort)(t | (t << 5) | (t << 10)); + } + }; + + template struct Gray2RGB5x5 : unary_function + { + __device__ __forceinline__ ushort operator()(uint src) const + { + return Gray2RGB5x5Converter::cvt(src); + } + + __host__ __device__ __forceinline__ Gray2RGB5x5() {} + __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Gray2RGB5x5 functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////// Color to Grayscale //////////////////////////////// + + namespace color_detail + { + template struct RGB5x52GrayConverter; + template <> struct RGB5x52GrayConverter<6> + { + static __device__ __forceinline__ uchar cvt(uint t) + { + return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 3) & 0xfc) * GY15 + ((t >> 8) & 0xf8) * RY15, gray_shift); + } + }; + + template <> struct RGB5x52GrayConverter<5> + { + static __device__ __forceinline__ uchar cvt(uint t) + { + return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 2) & 0xf8) * GY15 + ((t >> 7) & 0xf8) * RY15, gray_shift); + } + }; + + template struct RGB5x52Gray : unary_function + { + __device__ __forceinline__ uchar operator()(uint src) const + { + return RGB5x52GrayConverter::cvt(src); + } + __host__ __device__ __forceinline__ RGB5x52Gray() {} + __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \ + struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB5x52Gray functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template static __device__ __forceinline__ T RGB2GrayConvert(const T* src) + { + return (T)CV_DESCALE((unsigned)(src[bidx] * BY15 + src[1] * GY15 + src[bidx^2] * RY15), gray_shift); + } + + template static __device__ __forceinline__ uchar RGB2GrayConvert(uint src) + { + uint b = 0xffu & (src >> (bidx * 8)); + uint g = 0xffu & (src >> 8); + uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + return CV_DESCALE((uint)(b * BY15 + g * GY15 + r * RY15), gray_shift); + } + + template static __device__ __forceinline__ float RGB2GrayConvert(const float* src) + { + return src[bidx] * B2YF + src[1] * G2YF + src[bidx^2] * R2YF; + } + + template struct RGB2Gray : unary_function::vec_type, T> + { + __device__ __forceinline__ T operator()(const typename TypeVec::vec_type& src) const + { + return RGB2GrayConvert(&src.x); + } + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} + }; + + template struct RGB2Gray : unary_function + { + __device__ __forceinline__ uchar operator()(uint src) const + { + return RGB2GrayConvert(src); + } + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2Gray functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// + + namespace color_detail + { + __constant__ float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF }; + __constant__ int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI }; + + template static __device__ void RGB2YUVConvert(const T* src, D& dst) + { + const int delta = ColorChannel::half() * (1 << yuv_shift); + + const int Y = CV_DESCALE(src[0] * c_RGB2YUVCoeffs_i[bidx^2] + src[1] * c_RGB2YUVCoeffs_i[1] + src[2] * c_RGB2YUVCoeffs_i[bidx], yuv_shift); + const int Cr = CV_DESCALE((src[bidx^2] - Y) * c_RGB2YUVCoeffs_i[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((src[bidx] - Y) * c_RGB2YUVCoeffs_i[4] + delta, yuv_shift); + + dst.x = saturate_cast(Y); + dst.y = saturate_cast(Cr); + dst.z = saturate_cast(Cb); + } + + template static __device__ __forceinline__ void RGB2YUVConvert(const float* src, D& dst) + { + dst.x = src[0] * c_RGB2YUVCoeffs_f[bidx^2] + src[1] * c_RGB2YUVCoeffs_f[1] + src[2] * c_RGB2YUVCoeffs_f[bidx]; + dst.y = (src[bidx^2] - dst.x) * c_RGB2YUVCoeffs_f[3] + ColorChannel::half(); + dst.z = (src[bidx] - dst.x) * c_RGB2YUVCoeffs_f[4] + ColorChannel::half(); + } + + template struct RGB2YUV + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + RGB2YUVConvert(&src.x, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB2YUV() {} + __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2YUV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_YUV2RGBCoeffs_f[5] = { U2BF, U2GF, V2GF, V2RF }; + __constant__ int c_YUV2RGBCoeffs_i[5] = { U2BI, U2GI, V2GI, V2RI }; + + template static __device__ void YUV2RGBConvert(const T& src, D* dst) + { + const int b = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[3], yuv_shift); + + const int g = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[2] + + (src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[1], yuv_shift); + + const int r = src.x + CV_DESCALE((src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[0], yuv_shift); + + dst[bidx] = saturate_cast(b); + dst[1] = saturate_cast(g); + dst[bidx^2] = saturate_cast(r); + } + + template static __device__ uint YUV2RGBConvert(uint src) + { + const int x = 0xff & (src); + const int y = 0xff & (src >> 8); + const int z = 0xff & (src >> 16); + + const int b = x + CV_DESCALE((z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[3], yuv_shift); + + const int g = x + CV_DESCALE((z - ColorChannel::half()) * c_YUV2RGBCoeffs_i[2] + + (y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[1], yuv_shift); + + const int r = x + CV_DESCALE((y - ColorChannel::half()) * c_YUV2RGBCoeffs_i[0], yuv_shift); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(b) << (bidx * 8); + dst |= saturate_cast(g) << 8; + dst |= saturate_cast(r) << ((bidx ^ 2) * 8); + + return dst; + } + + template static __device__ __forceinline__ void YUV2RGBConvert(const T& src, float* dst) + { + dst[bidx] = src.x + (src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_f[3]; + + dst[1] = src.x + (src.z - ColorChannel::half()) * c_YUV2RGBCoeffs_f[2] + + (src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_f[1]; + + dst[bidx^2] = src.x + (src.y - ColorChannel::half()) * c_YUV2RGBCoeffs_f[0]; + } + + template struct YUV2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + YUV2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} + }; + + template struct YUV2RGB : unary_function + { + __device__ __forceinline__ uint operator ()(uint src) const + { + return YUV2RGBConvert(src); + } + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::YUV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// + + namespace color_detail + { + __constant__ float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF}; + __constant__ int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI}; + + template static __device__ void RGB2YCrCbConvert(const T* src, D& dst) + { + const int delta = ColorChannel::half() * (1 << yuv_shift); + + const int Y = CV_DESCALE(src[0] * c_RGB2YCrCbCoeffs_i[bidx^2] + src[1] * c_RGB2YCrCbCoeffs_i[1] + src[2] * c_RGB2YCrCbCoeffs_i[bidx], yuv_shift); + const int Cr = CV_DESCALE((src[bidx^2] - Y) * c_RGB2YCrCbCoeffs_i[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((src[bidx] - Y) * c_RGB2YCrCbCoeffs_i[4] + delta, yuv_shift); + + dst.x = saturate_cast(Y); + dst.y = saturate_cast(Cr); + dst.z = saturate_cast(Cb); + } + + template static __device__ uint RGB2YCrCbConvert(uint src) + { + const int delta = ColorChannel::half() * (1 << yuv_shift); + + const int Y = CV_DESCALE((0xffu & src) * c_RGB2YCrCbCoeffs_i[bidx^2] + (0xffu & (src >> 8)) * c_RGB2YCrCbCoeffs_i[1] + (0xffu & (src >> 16)) * c_RGB2YCrCbCoeffs_i[bidx], yuv_shift); + const int Cr = CV_DESCALE(((0xffu & (src >> ((bidx ^ 2) * 8))) - Y) * c_RGB2YCrCbCoeffs_i[3] + delta, yuv_shift); + const int Cb = CV_DESCALE(((0xffu & (src >> (bidx * 8))) - Y) * c_RGB2YCrCbCoeffs_i[4] + delta, yuv_shift); + + uint dst = 0; + + dst |= saturate_cast(Y); + dst |= saturate_cast(Cr) << 8; + dst |= saturate_cast(Cb) << 16; + + return dst; + } + + template static __device__ __forceinline__ void RGB2YCrCbConvert(const float* src, D& dst) + { + dst.x = src[0] * c_RGB2YCrCbCoeffs_f[bidx^2] + src[1] * c_RGB2YCrCbCoeffs_f[1] + src[2] * c_RGB2YCrCbCoeffs_f[bidx]; + dst.y = (src[bidx^2] - dst.x) * c_RGB2YCrCbCoeffs_f[3] + ColorChannel::half(); + dst.z = (src[bidx] - dst.x) * c_RGB2YCrCbCoeffs_f[4] + ColorChannel::half(); + } + + template struct RGB2YCrCb + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + RGB2YCrCbConvert(&src.x, dst); + return dst; + } + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} + }; + + template struct RGB2YCrCb : unary_function + { + __device__ __forceinline__ uint operator ()(uint src) const + { + return RGB2YCrCbConvert(src); + } + + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2YCrCb functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_YCrCb2RGBCoeffs_f[5] = {CR2RF, CR2GF, CB2GF, CB2BF}; + __constant__ int c_YCrCb2RGBCoeffs_i[5] = {CR2RI, CR2GI, CB2GI, CB2BI}; + + template static __device__ void YCrCb2RGBConvert(const T& src, D* dst) + { + const int b = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[3], yuv_shift); + const int g = src.x + CV_DESCALE((src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[2] + (src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[1], yuv_shift); + const int r = src.x + CV_DESCALE((src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[0], yuv_shift); + + dst[bidx] = saturate_cast(b); + dst[1] = saturate_cast(g); + dst[bidx^2] = saturate_cast(r); + } + + template static __device__ uint YCrCb2RGBConvert(uint src) + { + const int x = 0xff & (src); + const int y = 0xff & (src >> 8); + const int z = 0xff & (src >> 16); + + const int b = x + CV_DESCALE((z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[3], yuv_shift); + const int g = x + CV_DESCALE((z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[2] + (y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[1], yuv_shift); + const int r = x + CV_DESCALE((y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_i[0], yuv_shift); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(b) << (bidx * 8); + dst |= saturate_cast(g) << 8; + dst |= saturate_cast(r) << ((bidx ^ 2) * 8); + + return dst; + } + + template __device__ __forceinline__ void YCrCb2RGBConvert(const T& src, float* dst) + { + dst[bidx] = src.x + (src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[3]; + dst[1] = src.x + (src.z - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[2] + (src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[1]; + dst[bidx^2] = src.x + (src.y - ColorChannel::half()) * c_YCrCb2RGBCoeffs_f[0]; + } + + template struct YCrCb2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + YCrCb2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} + }; + + template struct YCrCb2RGB : unary_function + { + __device__ __forceinline__ uint operator ()(uint src) const + { + return YCrCb2RGBConvert(src); + } + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::YCrCb2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +////////////////////////////////////// RGB <-> XYZ /////////////////////////////////////// + + namespace color_detail + { + __constant__ float c_RGB2XYZ_D65f[9] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f }; + __constant__ int c_RGB2XYZ_D65i[9] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 }; + + template static __device__ __forceinline__ void RGB2XYZConvert(const T* src, D& dst) + { + dst.z = saturate_cast(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[6] + src[1] * c_RGB2XYZ_D65i[7] + src[bidx] * c_RGB2XYZ_D65i[8], xyz_shift)); + dst.x = saturate_cast(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[0] + src[1] * c_RGB2XYZ_D65i[1] + src[bidx] * c_RGB2XYZ_D65i[2], xyz_shift)); + dst.y = saturate_cast(CV_DESCALE(src[bidx^2] * c_RGB2XYZ_D65i[3] + src[1] * c_RGB2XYZ_D65i[4] + src[bidx] * c_RGB2XYZ_D65i[5], xyz_shift)); + } + + template static __device__ __forceinline__ uint RGB2XYZConvert(uint src) + { + const uint b = 0xffu & (src >> (bidx * 8)); + const uint g = 0xffu & (src >> 8); + const uint r = 0xffu & (src >> ((bidx ^ 2) * 8)); + + const uint x = saturate_cast(CV_DESCALE(r * c_RGB2XYZ_D65i[0] + g * c_RGB2XYZ_D65i[1] + b * c_RGB2XYZ_D65i[2], xyz_shift)); + const uint y = saturate_cast(CV_DESCALE(r * c_RGB2XYZ_D65i[3] + g * c_RGB2XYZ_D65i[4] + b * c_RGB2XYZ_D65i[5], xyz_shift)); + const uint z = saturate_cast(CV_DESCALE(r * c_RGB2XYZ_D65i[6] + g * c_RGB2XYZ_D65i[7] + b * c_RGB2XYZ_D65i[8], xyz_shift)); + + uint dst = 0; + + dst |= x; + dst |= y << 8; + dst |= z << 16; + + return dst; + } + + template static __device__ __forceinline__ void RGB2XYZConvert(const float* src, D& dst) + { + dst.x = src[bidx^2] * c_RGB2XYZ_D65f[0] + src[1] * c_RGB2XYZ_D65f[1] + src[bidx] * c_RGB2XYZ_D65f[2]; + dst.y = src[bidx^2] * c_RGB2XYZ_D65f[3] + src[1] * c_RGB2XYZ_D65f[4] + src[bidx] * c_RGB2XYZ_D65f[5]; + dst.z = src[bidx^2] * c_RGB2XYZ_D65f[6] + src[1] * c_RGB2XYZ_D65f[7] + src[bidx] * c_RGB2XYZ_D65f[8]; + } + + template struct RGB2XYZ + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2XYZConvert(&src.x, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} + }; + + template struct RGB2XYZ : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return RGB2XYZConvert(src); + } + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2XYZ functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_XYZ2sRGB_D65f[9] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f }; + __constant__ int c_XYZ2sRGB_D65i[9] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 }; + + template static __device__ __forceinline__ void XYZ2RGBConvert(const T& src, D* dst) + { + dst[bidx^2] = saturate_cast(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[0] + src.y * c_XYZ2sRGB_D65i[1] + src.z * c_XYZ2sRGB_D65i[2], xyz_shift)); + dst[1] = saturate_cast(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[3] + src.y * c_XYZ2sRGB_D65i[4] + src.z * c_XYZ2sRGB_D65i[5], xyz_shift)); + dst[bidx] = saturate_cast(CV_DESCALE(src.x * c_XYZ2sRGB_D65i[6] + src.y * c_XYZ2sRGB_D65i[7] + src.z * c_XYZ2sRGB_D65i[8], xyz_shift)); + } + + template static __device__ __forceinline__ uint XYZ2RGBConvert(uint src) + { + const int x = 0xff & src; + const int y = 0xff & (src >> 8); + const int z = 0xff & (src >> 16); + + const uint r = saturate_cast(CV_DESCALE(x * c_XYZ2sRGB_D65i[0] + y * c_XYZ2sRGB_D65i[1] + z * c_XYZ2sRGB_D65i[2], xyz_shift)); + const uint g = saturate_cast(CV_DESCALE(x * c_XYZ2sRGB_D65i[3] + y * c_XYZ2sRGB_D65i[4] + z * c_XYZ2sRGB_D65i[5], xyz_shift)); + const uint b = saturate_cast(CV_DESCALE(x * c_XYZ2sRGB_D65i[6] + y * c_XYZ2sRGB_D65i[7] + z * c_XYZ2sRGB_D65i[8], xyz_shift)); + + uint dst = 0xffu << 24; + + dst |= b << (bidx * 8); + dst |= g << 8; + dst |= r << ((bidx ^ 2) * 8); + + return dst; + } + + template static __device__ __forceinline__ void XYZ2RGBConvert(const T& src, float* dst) + { + dst[bidx^2] = src.x * c_XYZ2sRGB_D65f[0] + src.y * c_XYZ2sRGB_D65f[1] + src.z * c_XYZ2sRGB_D65f[2]; + dst[1] = src.x * c_XYZ2sRGB_D65f[3] + src.y * c_XYZ2sRGB_D65f[4] + src.z * c_XYZ2sRGB_D65f[5]; + dst[bidx] = src.x * c_XYZ2sRGB_D65f[6] + src.y * c_XYZ2sRGB_D65f[7] + src.z * c_XYZ2sRGB_D65f[8]; + } + + template struct XYZ2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + XYZ2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} + }; + + template struct XYZ2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return XYZ2RGBConvert(src); + } + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::XYZ2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +////////////////////////////////////// RGB <-> HSV /////////////////////////////////////// + + namespace color_detail + { + __constant__ int c_HsvDivTable [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096}; + __constant__ int c_HsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482}; + __constant__ int c_HsvDivTable256[256] = {0, 174763, 87381, 58254, 43691, 34953, 29127, 24966, 21845, 19418, 17476, 15888, 14564, 13443, 12483, 11651, 10923, 10280, 9709, 9198, 8738, 8322, 7944, 7598, 7282, 6991, 6722, 6473, 6242, 6026, 5825, 5638, 5461, 5296, 5140, 4993, 4855, 4723, 4599, 4481, 4369, 4263, 4161, 4064, 3972, 3884, 3799, 3718, 3641, 3567, 3495, 3427, 3361, 3297, 3236, 3178, 3121, 3066, 3013, 2962, 2913, 2865, 2819, 2774, 2731, 2689, 2648, 2608, 2570, 2533, 2497, 2461, 2427, 2394, 2362, 2330, 2300, 2270, 2241, 2212, 2185, 2158, 2131, 2106, 2081, 2056, 2032, 2009, 1986, 1964, 1942, 1920, 1900, 1879, 1859, 1840, 1820, 1802, 1783, 1765, 1748, 1730, 1713, 1697, 1680, 1664, 1649, 1633, 1618, 1603, 1589, 1574, 1560, 1547, 1533, 1520, 1507, 1494, 1481, 1469, 1456, 1444, 1432, 1421, 1409, 1398, 1387, 1376, 1365, 1355, 1344, 1334, 1324, 1314, 1304, 1295, 1285, 1276, 1266, 1257, 1248, 1239, 1231, 1222, 1214, 1205, 1197, 1189, 1181, 1173, 1165, 1157, 1150, 1142, 1135, 1128, 1120, 1113, 1106, 1099, 1092, 1085, 1079, 1072, 1066, 1059, 1053, 1046, 1040, 1034, 1028, 1022, 1016, 1010, 1004, 999, 993, 987, 982, 976, 971, 966, 960, 955, 950, 945, 940, 935, 930, 925, 920, 915, 910, 906, 901, 896, 892, 887, 883, 878, 874, 869, 865, 861, 857, 853, 848, 844, 840, 836, 832, 828, 824, 820, 817, 813, 809, 805, 802, 798, 794, 791, 787, 784, 780, 777, 773, 770, 767, 763, 760, 757, 753, 750, 747, 744, 741, 737, 734, 731, 728, 725, 722, 719, 716, 713, 710, 708, 705, 702, 699, 696, 694, 691, 688, 685}; + + template static __device__ void RGB2HSVConvert(const uchar* src, D& dst) + { + const int hsv_shift = 12; + const int* hdiv_table = hr == 180 ? c_HsvDivTable180 : c_HsvDivTable256; + + int b = src[bidx], g = src[1], r = src[bidx^2]; + int h, s, v = b; + int vmin = b, diff; + int vr, vg; + + v = ::max(v, g); + v = ::max(v, r); + vmin = ::min(vmin, g); + vmin = ::min(vmin, r); + + diff = v - vmin; + vr = (v == r) * -1; + vg = (v == g) * -1; + + s = (diff * c_HsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift; + h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff)))); + h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift; + h += (h < 0) * hr; + + dst.x = saturate_cast(h); + dst.y = (uchar)s; + dst.z = (uchar)v; + } + + template static __device__ uint RGB2HSVConvert(uint src) + { + const int hsv_shift = 12; + const int* hdiv_table = hr == 180 ? c_HsvDivTable180 : c_HsvDivTable256; + + const int b = 0xff & (src >> (bidx * 8)); + const int g = 0xff & (src >> 8); + const int r = 0xff & (src >> ((bidx ^ 2) * 8)); + + int h, s, v = b; + int vmin = b, diff; + int vr, vg; + + v = ::max(v, g); + v = ::max(v, r); + vmin = ::min(vmin, g); + vmin = ::min(vmin, r); + + diff = v - vmin; + vr = (v == r) * -1; + vg = (v == g) * -1; + + s = (diff * c_HsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift; + h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff)))); + h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift; + h += (h < 0) * hr; + + uint dst = 0; + + dst |= saturate_cast(h); + dst |= (0xffu & s) << 8; + dst |= (0xffu & v) << 16; + + return dst; + } + + template static __device__ void RGB2HSVConvert(const float* src, D& dst) + { + const float hscale = hr * (1.f / 360.f); + + float b = src[bidx], g = src[1], r = src[bidx^2]; + float h, s, v; + + float vmin, diff; + + v = vmin = r; + v = fmax(v, g); + v = fmax(v, b); + vmin = fmin(vmin, g); + vmin = fmin(vmin, b); + + diff = v - vmin; + s = diff / (float)(::fabs(v) + numeric_limits::epsilon()); + diff = (float)(60. / (diff + numeric_limits::epsilon())); + + h = (v == r) * (g - b) * diff; + h += (v != r && v == g) * ((b - r) * diff + 120.f); + h += (v != r && v != g) * ((r - g) * diff + 240.f); + h += (h < 0) * 360.f; + + dst.x = h * hscale; + dst.y = s; + dst.z = v; + } + + template struct RGB2HSV + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2HSVConvert(&src.x, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} + }; + + template struct RGB2HSV : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return RGB2HSVConvert(src); + } + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HSV functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ int c_HsvSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} }; + + template static __device__ void HSV2RGBConvert(const T& src, float* dst) + { + const float hscale = 6.f / hr; + + float h = src.x, s = src.y, v = src.z; + float b = v, g = v, r = v; + + if (s != 0) + { + h *= hscale; + + if( h < 0 ) + do h += 6; while( h < 0 ); + else if( h >= 6 ) + do h -= 6; while( h >= 6 ); + + int sector = __float2int_rd(h); + h -= sector; + + if ( (unsigned)sector >= 6u ) + { + sector = 0; + h = 0.f; + } + + float tab[4]; + tab[0] = v; + tab[1] = v * (1.f - s); + tab[2] = v * (1.f - s * h); + tab[3] = v * (1.f - s * (1.f - h)); + + b = tab[c_HsvSectorData[sector][0]]; + g = tab[c_HsvSectorData[sector][1]]; + r = tab[c_HsvSectorData[sector][2]]; + } + + dst[bidx] = b; + dst[1] = g; + dst[bidx^2] = r; + } + + template static __device__ void HSV2RGBConvert(const T& src, uchar* dst) + { + float3 buf; + + buf.x = src.x; + buf.y = src.y * (1.f / 255.f); + buf.z = src.z * (1.f / 255.f); + + HSV2RGBConvert(buf, &buf.x); + + dst[0] = saturate_cast(buf.x * 255.f); + dst[1] = saturate_cast(buf.y * 255.f); + dst[2] = saturate_cast(buf.z * 255.f); + } + + template static __device__ uint HSV2RGBConvert(uint src) + { + float3 buf; + + buf.x = src & 0xff; + buf.y = ((src >> 8) & 0xff) * (1.f/255.f); + buf.z = ((src >> 16) & 0xff) * (1.f/255.f); + + HSV2RGBConvert(buf, &buf.x); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(buf.x * 255.f); + dst |= saturate_cast(buf.y * 255.f) << 8; + dst |= saturate_cast(buf.z * 255.f) << 16; + + return dst; + } + + template struct HSV2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + HSV2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} + }; + + template struct HSV2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return HSV2RGBConvert(src); + } + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HSV2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +/////////////////////////////////////// RGB <-> HLS //////////////////////////////////////// + + namespace color_detail + { + template static __device__ void RGB2HLSConvert(const float* src, D& dst) + { + const float hscale = hr * (1.f / 360.f); + + float b = src[bidx], g = src[1], r = src[bidx^2]; + float h = 0.f, s = 0.f, l; + float vmin, vmax, diff; + + vmax = vmin = r; + vmax = fmax(vmax, g); + vmax = fmax(vmax, b); + vmin = fmin(vmin, g); + vmin = fmin(vmin, b); + + diff = vmax - vmin; + l = (vmax + vmin) * 0.5f; + + if (diff > numeric_limits::epsilon()) + { + s = (l < 0.5f) * diff / (vmax + vmin); + s += (l >= 0.5f) * diff / (2.0f - vmax - vmin); + + diff = 60.f / diff; + + h = (vmax == r) * (g - b) * diff; + h += (vmax != r && vmax == g) * ((b - r) * diff + 120.f); + h += (vmax != r && vmax != g) * ((r - g) * diff + 240.f); + h += (h < 0.f) * 360.f; + } + + dst.x = h * hscale; + dst.y = l; + dst.z = s; + } + + template static __device__ void RGB2HLSConvert(const uchar* src, D& dst) + { + float3 buf; + + buf.x = src[0] * (1.f / 255.f); + buf.y = src[1] * (1.f / 255.f); + buf.z = src[2] * (1.f / 255.f); + + RGB2HLSConvert(&buf.x, buf); + + dst.x = saturate_cast(buf.x); + dst.y = saturate_cast(buf.y*255.f); + dst.z = saturate_cast(buf.z*255.f); + } + + template static __device__ uint RGB2HLSConvert(uint src) + { + float3 buf; + + buf.x = (0xff & src) * (1.f / 255.f); + buf.y = (0xff & (src >> 8)) * (1.f / 255.f); + buf.z = (0xff & (src >> 16)) * (1.f / 255.f); + + RGB2HLSConvert(&buf.x, buf); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(buf.x); + dst |= saturate_cast(buf.y * 255.f) << 8; + dst |= saturate_cast(buf.z * 255.f) << 16; + + return dst; + } + + template struct RGB2HLS + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2HLSConvert(&src.x, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} + }; + + template struct RGB2HLS : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return RGB2HLSConvert(src); + } + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2HLS functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ int c_HlsSectorData[6][3] = { {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0} }; + + template static __device__ void HLS2RGBConvert(const T& src, float* dst) + { + const float hscale = 6.0f / hr; + + float h = src.x, l = src.y, s = src.z; + float b = l, g = l, r = l; + + if (s != 0) + { + float p2 = (l <= 0.5f) * l * (1 + s); + p2 += (l > 0.5f) * (l + s - l * s); + float p1 = 2 * l - p2; + + h *= hscale; + + if( h < 0 ) + do h += 6; while( h < 0 ); + else if( h >= 6 ) + do h -= 6; while( h >= 6 ); + + int sector; + sector = __float2int_rd(h); + + h -= sector; + + float tab[4]; + tab[0] = p2; + tab[1] = p1; + tab[2] = p1 + (p2 - p1) * (1 - h); + tab[3] = p1 + (p2 - p1) * h; + + b = tab[c_HlsSectorData[sector][0]]; + g = tab[c_HlsSectorData[sector][1]]; + r = tab[c_HlsSectorData[sector][2]]; + } + + dst[bidx] = b; + dst[1] = g; + dst[bidx^2] = r; + } + + template static __device__ void HLS2RGBConvert(const T& src, uchar* dst) + { + float3 buf; + + buf.x = src.x; + buf.y = src.y * (1.f / 255.f); + buf.z = src.z * (1.f / 255.f); + + HLS2RGBConvert(buf, &buf.x); + + dst[0] = saturate_cast(buf.x * 255.f); + dst[1] = saturate_cast(buf.y * 255.f); + dst[2] = saturate_cast(buf.z * 255.f); + } + + template static __device__ uint HLS2RGBConvert(uint src) + { + float3 buf; + + buf.x = 0xff & src; + buf.y = (0xff & (src >> 8)) * (1.f / 255.f); + buf.z = (0xff & (src >> 16)) * (1.f / 255.f); + + HLS2RGBConvert(buf, &buf.x); + + uint dst = 0xffu << 24; + + dst |= saturate_cast(buf.x * 255.f); + dst |= saturate_cast(buf.y * 255.f) << 8; + dst |= saturate_cast(buf.z * 255.f) << 16; + + return dst; + } + + template struct HLS2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + HLS2RGBConvert(src, &dst.x); + setAlpha(dst, ColorChannel::max()); + + return dst; + } + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} + }; + + template struct HLS2RGB : unary_function + { + __device__ __forceinline__ uint operator()(uint src) const + { + return HLS2RGBConvert(src); + } + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; \ + template <> struct name ## _full_traits \ + { \ + typedef ::cv::cuda::device::color_detail::HLS2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> Lab ///////////////////////////////////// + + namespace color_detail + { + enum + { + LAB_CBRT_TAB_SIZE = 1024, + GAMMA_TAB_SIZE = 1024, + lab_shift = xyz_shift, + gamma_shift = 3, + lab_shift2 = (lab_shift + gamma_shift), + LAB_CBRT_TAB_SIZE_B = (256 * 3 / 2 * (1 << gamma_shift)) + }; + + __constant__ ushort c_sRGBGammaTab_b[] = {0,1,1,2,2,3,4,4,5,6,6,7,8,8,9,10,11,11,12,13,14,15,16,17,19,20,21,22,24,25,26,28,29,31,33,34,36,38,40,41,43,45,47,49,51,54,56,58,60,63,65,68,70,73,75,78,81,83,86,89,92,95,98,101,105,108,111,115,118,121,125,129,132,136,140,144,147,151,155,160,164,168,172,176,181,185,190,194,199,204,209,213,218,223,228,233,239,244,249,255,260,265,271,277,282,288,294,300,306,312,318,324,331,337,343,350,356,363,370,376,383,390,397,404,411,418,426,433,440,448,455,463,471,478,486,494,502,510,518,527,535,543,552,560,569,578,586,595,604,613,622,631,641,650,659,669,678,688,698,707,717,727,737,747,757,768,778,788,799,809,820,831,842,852,863,875,886,897,908,920,931,943,954,966,978,990,1002,1014,1026,1038,1050,1063,1075,1088,1101,1113,1126,1139,1152,1165,1178,1192,1205,1218,1232,1245,1259,1273,1287,1301,1315,1329,1343,1357,1372,1386,1401,1415,1430,1445,1460,1475,1490,1505,1521,1536,1551,1567,1583,1598,1614,1630,1646,1662,1678,1695,1711,1728,1744,1761,1778,1794,1811,1828,1846,1863,1880,1897,1915,1933,1950,1968,1986,2004,2022,2040}; + + __device__ __forceinline__ int LabCbrt_b(int i) + { + float x = i * (1.f / (255.f * (1 << gamma_shift))); + return (1 << lab_shift2) * (x < 0.008856f ? x * 7.787f + 0.13793103448275862f : ::cbrtf(x)); + } + + template + __device__ __forceinline__ void RGB2LabConvert_b(const T& src, D& dst) + { + const int Lscale = (116 * 255 + 50) / 100; + const int Lshift = -((16 * 255 * (1 << lab_shift2) + 50) / 100); + + int B = blueIdx == 0 ? src.x : src.z; + int G = src.y; + int R = blueIdx == 0 ? src.z : src.x; + + if (srgb) + { + B = c_sRGBGammaTab_b[B]; + G = c_sRGBGammaTab_b[G]; + R = c_sRGBGammaTab_b[R]; + } + else + { + B <<= 3; + G <<= 3; + R <<= 3; + } + + int fX = LabCbrt_b(CV_DESCALE(B * 778 + G * 1541 + R * 1777, lab_shift)); + int fY = LabCbrt_b(CV_DESCALE(B * 296 + G * 2929 + R * 871, lab_shift)); + int fZ = LabCbrt_b(CV_DESCALE(B * 3575 + G * 448 + R * 73, lab_shift)); + + int L = CV_DESCALE(Lscale * fY + Lshift, lab_shift2); + int a = CV_DESCALE(500 * (fX - fY) + 128 * (1 << lab_shift2), lab_shift2); + int b = CV_DESCALE(200 * (fY - fZ) + 128 * (1 << lab_shift2), lab_shift2); + + dst.x = saturate_cast(L); + dst.y = saturate_cast(a); + dst.z = saturate_cast(b); + } + + __device__ __forceinline__ float splineInterpolate(float x, const float* tab, int n) + { + int ix = ::min(::max(int(x), 0), n-1); + x -= ix; + tab += ix * 4; + return ((tab[3] * x + tab[2]) * x + tab[1]) * x + tab[0]; + } + + __constant__ float c_sRGBGammaTab[] = {0,7.55853e-05,0.,-7.51331e-13,7.55853e-05,7.55853e-05,-2.25399e-12,3.75665e-12,0.000151171,7.55853e-05,9.01597e-12,-6.99932e-12,0.000226756,7.55853e-05,-1.1982e-11,2.41277e-12,0.000302341,7.55853e-05,-4.74369e-12,1.19001e-11,0.000377927,7.55853e-05,3.09568e-11,-2.09095e-11,0.000453512,7.55853e-05,-3.17718e-11,1.35303e-11,0.000529097,7.55853e-05,8.81905e-12,-4.10782e-12,0.000604683,7.55853e-05,-3.50439e-12,2.90097e-12,0.000680268,7.55853e-05,5.19852e-12,-7.49607e-12,0.000755853,7.55853e-05,-1.72897e-11,2.70833e-11,0.000831439,7.55854e-05,6.39602e-11,-4.26295e-11,0.000907024,7.55854e-05,-6.39282e-11,2.70193e-11,0.000982609,7.55853e-05,1.71298e-11,-7.24017e-12,0.00105819,7.55853e-05,-4.59077e-12,1.94137e-12,0.00113378,7.55853e-05,1.23333e-12,-5.25291e-13,0.00120937,7.55853e-05,-3.42545e-13,1.59799e-13,0.00128495,7.55853e-05,1.36852e-13,-1.13904e-13,0.00136054,7.55853e-05,-2.04861e-13,2.95818e-13,0.00143612,7.55853e-05,6.82594e-13,-1.06937e-12,0.00151171,7.55853e-05,-2.52551e-12,3.98166e-12,0.00158729,7.55853e-05,9.41946e-12,-1.48573e-11,0.00166288,7.55853e-05,-3.51523e-11,5.54474e-11,0.00173846,7.55854e-05,1.3119e-10,-9.0517e-11,0.00181405,7.55854e-05,-1.40361e-10,7.37899e-11,0.00188963,7.55853e-05,8.10085e-11,-8.82272e-11,0.00196522,7.55852e-05,-1.83673e-10,1.62704e-10,0.0020408,7.55853e-05,3.04438e-10,-2.13341e-10,0.00211639,7.55853e-05,-3.35586e-10,2.25e-10,0.00219197,7.55853e-05,3.39414e-10,-2.20997e-10,0.00226756,7.55853e-05,-3.23576e-10,1.93326e-10,0.00234315,7.55853e-05,2.564e-10,-8.66446e-11,0.00241873,7.55855e-05,-3.53328e-12,-7.9578e-11,0.00249432,7.55853e-05,-2.42267e-10,1.72126e-10,0.0025699,7.55853e-05,2.74111e-10,-1.43265e-10,0.00264549,7.55854e-05,-1.55683e-10,-6.47292e-11,0.00272107,7.55849e-05,-3.4987e-10,8.67842e-10,0.00279666,7.55868e-05,2.25366e-09,-3.8723e-09,0.00287224,7.55797e-05,-9.36325e-09,1.5087e-08,0.00294783,7.56063e-05,3.58978e-08,-5.69415e-08,0.00302341,7.55072e-05,-1.34927e-07,2.13144e-07,0.003099,7.58768e-05,5.04507e-07,1.38713e-07,0.00317552,7.7302e-05,9.20646e-07,-1.55186e-07,0.00325359,7.86777e-05,4.55087e-07,4.26813e-08,0.00333276,7.97159e-05,5.83131e-07,-1.06495e-08,0.00341305,8.08502e-05,5.51182e-07,3.87467e-09,0.00349446,8.19642e-05,5.62806e-07,-1.92586e-10,0.00357698,8.30892e-05,5.62228e-07,1.0866e-09,0.00366063,8.4217e-05,5.65488e-07,5.02818e-10,0.00374542,8.53494e-05,5.66997e-07,8.60211e-10,0.00383133,8.6486e-05,5.69577e-07,7.13044e-10,0.00391839,8.76273e-05,5.71716e-07,4.78527e-10,0.00400659,8.87722e-05,5.73152e-07,1.09818e-09,0.00409594,8.99218e-05,5.76447e-07,2.50964e-10,0.00418644,9.10754e-05,5.772e-07,1.15762e-09,0.00427809,9.22333e-05,5.80672e-07,2.40865e-10,0.0043709,9.33954e-05,5.81395e-07,1.13854e-09,0.00446488,9.45616e-05,5.84811e-07,3.27267e-10,0.00456003,9.57322e-05,5.85792e-07,8.1197e-10,0.00465635,9.69062e-05,5.88228e-07,6.15823e-10,0.00475384,9.80845e-05,5.90076e-07,9.15747e-10,0.00485252,9.92674e-05,5.92823e-07,3.778e-10,0.00495238,0.000100454,5.93956e-07,8.32623e-10,0.00505343,0.000101645,5.96454e-07,4.82695e-10,0.00515567,0.000102839,5.97902e-07,9.61904e-10,0.00525911,0.000104038,6.00788e-07,3.26281e-10,0.00536375,0.00010524,6.01767e-07,9.926e-10,0.00546959,0.000106447,6.04745e-07,3.59933e-10,0.00557664,0.000107657,6.05824e-07,8.2728e-10,0.0056849,0.000108871,6.08306e-07,5.21898e-10,0.00579438,0.00011009,6.09872e-07,8.10492e-10,0.00590508,0.000111312,6.12303e-07,4.27046e-10,0.00601701,0.000112538,6.13585e-07,7.40878e-10,0.00613016,0.000113767,6.15807e-07,8.00469e-10,0.00624454,0.000115001,6.18209e-07,2.48178e-10,0.00636016,0.000116238,6.18953e-07,1.00073e-09,0.00647702,0.000117479,6.21955e-07,4.05654e-10,0.00659512,0.000118724,6.23172e-07,6.36192e-10,0.00671447,0.000119973,6.25081e-07,7.74927e-10,0.00683507,0.000121225,6.27406e-07,4.54975e-10,0.00695692,0.000122481,6.28771e-07,6.64841e-10,0.00708003,0.000123741,6.30765e-07,6.10972e-10,0.00720441,0.000125004,6.32598e-07,6.16543e-10,0.00733004,0.000126271,6.34448e-07,6.48204e-10,0.00745695,0.000127542,6.36392e-07,5.15835e-10,0.00758513,0.000128816,6.3794e-07,5.48103e-10,0.00771458,0.000130094,6.39584e-07,1.01706e-09,0.00784532,0.000131376,6.42635e-07,4.0283e-11,0.00797734,0.000132661,6.42756e-07,6.84471e-10,0.00811064,0.000133949,6.4481e-07,9.47144e-10,0.00824524,0.000135241,6.47651e-07,1.83472e-10,0.00838112,0.000136537,6.48201e-07,1.11296e-09,0.00851831,0.000137837,6.5154e-07,2.13163e-11,0.0086568,0.00013914,6.51604e-07,6.64462e-10,0.00879659,0.000140445,6.53598e-07,1.04613e-09,0.00893769,0.000141756,6.56736e-07,-1.92377e-10,0.0090801,0.000143069,6.56159e-07,1.58601e-09,0.00922383,0.000144386,6.60917e-07,-5.63754e-10,0.00936888,0.000145706,6.59226e-07,1.60033e-09,0.00951524,0.000147029,6.64027e-07,-2.49543e-10,0.00966294,0.000148356,6.63278e-07,1.26043e-09,0.00981196,0.000149687,6.67059e-07,-1.35572e-10,0.00996231,0.00015102,6.66653e-07,1.14458e-09,0.010114,0.000152357,6.70086e-07,2.13864e-10,0.010267,0.000153698,6.70728e-07,7.93856e-10,0.0104214,0.000155042,6.73109e-07,3.36077e-10,0.0105771,0.000156389,6.74118e-07,6.55765e-10,0.0107342,0.000157739,6.76085e-07,7.66211e-10,0.0108926,0.000159094,6.78384e-07,4.66116e-12,0.0110524,0.000160451,6.78398e-07,1.07775e-09,0.0112135,0.000161811,6.81631e-07,3.41023e-10,0.011376,0.000163175,6.82654e-07,3.5205e-10,0.0115398,0.000164541,6.8371e-07,1.04473e-09,0.0117051,0.000165912,6.86844e-07,1.25757e-10,0.0118717,0.000167286,6.87222e-07,3.14818e-10,0.0120396,0.000168661,6.88166e-07,1.40886e-09,0.012209,0.000170042,6.92393e-07,-3.62244e-10,0.0123797,0.000171425,6.91306e-07,9.71397e-10,0.0125518,0.000172811,6.9422e-07,2.02003e-10,0.0127253,0.0001742,6.94826e-07,1.01448e-09,0.0129002,0.000175593,6.97869e-07,3.96653e-10,0.0130765,0.00017699,6.99059e-07,1.92927e-10,0.0132542,0.000178388,6.99638e-07,6.94305e-10,0.0134333,0.00017979,7.01721e-07,7.55108e-10,0.0136138,0.000181195,7.03986e-07,1.05918e-11,0.0137957,0.000182603,7.04018e-07,1.06513e-09,0.013979,0.000184015,7.07214e-07,3.85512e-10,0.0141637,0.00018543,7.0837e-07,1.86769e-10,0.0143499,0.000186848,7.0893e-07,7.30116e-10,0.0145374,0.000188268,7.11121e-07,6.17983e-10,0.0147264,0.000189692,7.12975e-07,5.23282e-10,0.0149168,0.000191119,7.14545e-07,8.28398e-11,0.0151087,0.000192549,7.14793e-07,1.0081e-09,0.0153019,0.000193981,7.17817e-07,5.41244e-10,0.0154966,0.000195418,7.19441e-07,-3.7907e-10,0.0156928,0.000196856,7.18304e-07,1.90641e-09,0.0158903,0.000198298,7.24023e-07,-7.27387e-10,0.0160893,0.000199744,7.21841e-07,1.00317e-09,0.0162898,0.000201191,7.24851e-07,4.39949e-10,0.0164917,0.000202642,7.2617e-07,9.6234e-10,0.0166951,0.000204097,7.29057e-07,-5.64019e-10,0.0168999,0.000205554,7.27365e-07,1.29374e-09,0.0171062,0.000207012,7.31247e-07,9.77025e-10,0.017314,0.000208478,7.34178e-07,-1.47651e-09,0.0175232,0.000209942,7.29748e-07,3.06636e-09,0.0177338,0.00021141,7.38947e-07,-1.47573e-09,0.017946,0.000212884,7.3452e-07,9.7386e-10,0.0181596,0.000214356,7.37442e-07,1.30562e-09,0.0183747,0.000215835,7.41358e-07,-6.08376e-10,0.0185913,0.000217315,7.39533e-07,1.12785e-09,0.0188093,0.000218798,7.42917e-07,-1.77711e-10,0.0190289,0.000220283,7.42384e-07,1.44562e-09,0.0192499,0.000221772,7.46721e-07,-1.68825e-11,0.0194724,0.000223266,7.4667e-07,4.84533e-10,0.0196964,0.000224761,7.48124e-07,-5.85298e-11,0.0199219,0.000226257,7.47948e-07,1.61217e-09,0.0201489,0.000227757,7.52785e-07,-8.02136e-10,0.0203775,0.00022926,7.50378e-07,1.59637e-09,0.0206075,0.000230766,7.55167e-07,4.47168e-12,0.020839,0.000232276,7.55181e-07,2.48387e-10,0.021072,0.000233787,7.55926e-07,8.6474e-10,0.0213066,0.000235302,7.5852e-07,1.78299e-11,0.0215426,0.000236819,7.58573e-07,9.26567e-10,0.0217802,0.000238339,7.61353e-07,1.34529e-12,0.0220193,0.000239862,7.61357e-07,9.30659e-10,0.0222599,0.000241387,7.64149e-07,1.34529e-12,0.0225021,0.000242915,7.64153e-07,9.26567e-10,0.0227458,0.000244447,7.66933e-07,1.76215e-11,0.022991,0.00024598,7.66986e-07,8.65536e-10,0.0232377,0.000247517,7.69582e-07,2.45677e-10,0.023486,0.000249057,7.70319e-07,1.44193e-11,0.0237358,0.000250598,7.70363e-07,1.55918e-09,0.0239872,0.000252143,7.7504e-07,-6.63173e-10,0.0242401,0.000253691,7.73051e-07,1.09357e-09,0.0244946,0.000255241,7.76331e-07,1.41919e-11,0.0247506,0.000256793,7.76374e-07,7.12248e-10,0.0250082,0.000258348,7.78511e-07,8.62049e-10,0.0252673,0.000259908,7.81097e-07,-4.35061e-10,0.025528,0.000261469,7.79792e-07,8.7825e-10,0.0257902,0.000263031,7.82426e-07,6.47181e-10,0.0260541,0.000264598,7.84368e-07,2.58448e-10,0.0263194,0.000266167,7.85143e-07,1.81558e-10,0.0265864,0.000267738,7.85688e-07,8.78041e-10,0.0268549,0.000269312,7.88322e-07,3.15102e-11,0.027125,0.000270889,7.88417e-07,8.58525e-10,0.0273967,0.000272468,7.90992e-07,2.59812e-10,0.02767,0.000274051,7.91772e-07,-3.5224e-11,0.0279448,0.000275634,7.91666e-07,1.74377e-09,0.0282212,0.000277223,7.96897e-07,-1.35196e-09,0.0284992,0.000278813,7.92841e-07,1.80141e-09,0.0287788,0.000280404,7.98246e-07,-2.65629e-10,0.0290601,0.000281999,7.97449e-07,1.12374e-09,0.0293428,0.000283598,8.0082e-07,-5.04106e-10,0.0296272,0.000285198,7.99308e-07,8.92764e-10,0.0299132,0.000286799,8.01986e-07,6.58379e-10,0.0302008,0.000288405,8.03961e-07,1.98971e-10,0.0304901,0.000290014,8.04558e-07,4.08382e-10,0.0307809,0.000291624,8.05783e-07,3.01839e-11,0.0310733,0.000293236,8.05874e-07,1.33343e-09,0.0313673,0.000294851,8.09874e-07,2.2419e-10,0.031663,0.000296472,8.10547e-07,-3.67606e-10,0.0319603,0.000298092,8.09444e-07,1.24624e-09,0.0322592,0.000299714,8.13182e-07,-8.92025e-10,0.0325597,0.000301338,8.10506e-07,2.32183e-09,0.0328619,0.000302966,8.17472e-07,-9.44719e-10,0.0331657,0.000304598,8.14638e-07,1.45703e-09,0.0334711,0.000306232,8.19009e-07,-1.15805e-09,0.0337781,0.000307866,8.15535e-07,3.17507e-09,0.0340868,0.000309507,8.2506e-07,-4.09161e-09,0.0343971,0.000311145,8.12785e-07,5.74079e-09,0.0347091,0.000312788,8.30007e-07,-3.97034e-09,0.0350227,0.000314436,8.18096e-07,2.68985e-09,0.035338,0.00031608,8.26166e-07,6.61676e-10,0.0356549,0.000317734,8.28151e-07,-1.61123e-09,0.0359734,0.000319386,8.23317e-07,2.05786e-09,0.0362936,0.000321038,8.29491e-07,8.30388e-10,0.0366155,0.0003227,8.31982e-07,-1.65424e-09,0.036939,0.000324359,8.27019e-07,2.06129e-09,0.0372642,0.000326019,8.33203e-07,8.59719e-10,0.0375911,0.000327688,8.35782e-07,-1.77488e-09,0.0379196,0.000329354,8.30458e-07,2.51464e-09,0.0382498,0.000331023,8.38002e-07,-8.33135e-10,0.0385817,0.000332696,8.35502e-07,8.17825e-10,0.0389152,0.00033437,8.37956e-07,1.28718e-09,0.0392504,0.00033605,8.41817e-07,-2.2413e-09,0.0395873,0.000337727,8.35093e-07,3.95265e-09,0.0399258,0.000339409,8.46951e-07,-2.39332e-09,0.0402661,0.000341095,8.39771e-07,1.89533e-09,0.040608,0.000342781,8.45457e-07,-1.46271e-09,0.0409517,0.000344467,8.41069e-07,3.95554e-09,0.041297,0.000346161,8.52936e-07,-3.18369e-09,0.041644,0.000347857,8.43385e-07,1.32873e-09,0.0419927,0.000349548,8.47371e-07,1.59402e-09,0.0423431,0.000351248,8.52153e-07,-2.54336e-10,0.0426952,0.000352951,8.5139e-07,-5.76676e-10,0.043049,0.000354652,8.4966e-07,2.56114e-09,0.0434045,0.000356359,8.57343e-07,-2.21744e-09,0.0437617,0.000358067,8.50691e-07,2.58344e-09,0.0441206,0.000359776,8.58441e-07,-6.65826e-10,0.0444813,0.000361491,8.56444e-07,7.99218e-11,0.0448436,0.000363204,8.56684e-07,3.46063e-10,0.0452077,0.000364919,8.57722e-07,2.26116e-09,0.0455734,0.000366641,8.64505e-07,-1.94005e-09,0.045941,0.000368364,8.58685e-07,1.77384e-09,0.0463102,0.000370087,8.64007e-07,-1.43005e-09,0.0466811,0.000371811,8.59717e-07,3.94634e-09,0.0470538,0.000373542,8.71556e-07,-3.17946e-09,0.0474282,0.000375276,8.62017e-07,1.32104e-09,0.0478043,0.000377003,8.6598e-07,1.62045e-09,0.0481822,0.00037874,8.70842e-07,-3.52297e-10,0.0485618,0.000380481,8.69785e-07,-2.11211e-10,0.0489432,0.00038222,8.69151e-07,1.19716e-09,0.0493263,0.000383962,8.72743e-07,-8.52026e-10,0.0497111,0.000385705,8.70187e-07,2.21092e-09,0.0500977,0.000387452,8.76819e-07,-5.41339e-10,0.050486,0.000389204,8.75195e-07,-4.5361e-11,0.0508761,0.000390954,8.75059e-07,7.22669e-10,0.0512679,0.000392706,8.77227e-07,8.79936e-10,0.0516615,0.000394463,8.79867e-07,-5.17048e-10,0.0520568,0.000396222,8.78316e-07,1.18833e-09,0.0524539,0.000397982,8.81881e-07,-5.11022e-10,0.0528528,0.000399744,8.80348e-07,8.55683e-10,0.0532534,0.000401507,8.82915e-07,8.13562e-10,0.0536558,0.000403276,8.85356e-07,-3.84603e-10,0.05406,0.000405045,8.84202e-07,7.24962e-10,0.0544659,0.000406816,8.86377e-07,1.20986e-09,0.0548736,0.000408592,8.90006e-07,-1.83896e-09,0.0552831,0.000410367,8.84489e-07,2.42071e-09,0.0556944,0.000412143,8.91751e-07,-3.93413e-10,0.0561074,0.000413925,8.90571e-07,-8.46967e-10,0.0565222,0.000415704,8.8803e-07,3.78122e-09,0.0569388,0.000417491,8.99374e-07,-3.1021e-09,0.0573572,0.000419281,8.90068e-07,1.17658e-09,0.0577774,0.000421064,8.93597e-07,2.12117e-09,0.0581993,0.000422858,8.99961e-07,-2.21068e-09,0.0586231,0.000424651,8.93329e-07,2.9961e-09,0.0590486,0.000426447,9.02317e-07,-2.32311e-09,0.059476,0.000428244,8.95348e-07,2.57122e-09,0.0599051,0.000430043,9.03062e-07,-5.11098e-10,0.0603361,0.000431847,9.01528e-07,-5.27166e-10,0.0607688,0.000433649,8.99947e-07,2.61984e-09,0.0612034,0.000435457,9.07806e-07,-2.50141e-09,0.0616397,0.000437265,9.00302e-07,3.66045e-09,0.0620779,0.000439076,9.11283e-07,-4.68977e-09,0.0625179,0.000440885,8.97214e-07,7.64783e-09,0.0629597,0.000442702,9.20158e-07,-7.27499e-09,0.0634033,0.000444521,8.98333e-07,6.55113e-09,0.0638487,0.000446337,9.17986e-07,-4.02844e-09,0.0642959,0.000448161,9.05901e-07,2.11196e-09,0.064745,0.000449979,9.12236e-07,3.03125e-09,0.0651959,0.000451813,9.2133e-07,-6.78648e-09,0.0656486,0.000453635,9.00971e-07,9.21375e-09,0.0661032,0.000455464,9.28612e-07,-7.71684e-09,0.0665596,0.000457299,9.05462e-07,6.7522e-09,0.0670178,0.00045913,9.25718e-07,-4.3907e-09,0.0674778,0.000460968,9.12546e-07,3.36e-09,0.0679397,0.000462803,9.22626e-07,-1.59876e-09,0.0684034,0.000464644,9.1783e-07,3.0351e-09,0.068869,0.000466488,9.26935e-07,-3.09101e-09,0.0693364,0.000468333,9.17662e-07,1.8785e-09,0.0698057,0.000470174,9.23298e-07,3.02733e-09,0.0702768,0.00047203,9.3238e-07,-6.53722e-09,0.0707497,0.000473875,9.12768e-07,8.22054e-09,0.0712245,0.000475725,9.37429e-07,-3.99325e-09,0.0717012,0.000477588,9.2545e-07,3.01839e-10,0.0721797,0.00047944,9.26355e-07,2.78597e-09,0.0726601,0.000481301,9.34713e-07,-3.99507e-09,0.0731423,0.000483158,9.22728e-07,5.7435e-09,0.0736264,0.000485021,9.39958e-07,-4.07776e-09,0.0741123,0.000486888,9.27725e-07,3.11695e-09,0.0746002,0.000488753,9.37076e-07,-9.39394e-10,0.0750898,0.000490625,9.34258e-07,6.4055e-10,0.0755814,0.000492495,9.3618e-07,-1.62265e-09,0.0760748,0.000494363,9.31312e-07,5.84995e-09,0.0765701,0.000496243,9.48861e-07,-6.87601e-09,0.0770673,0.00049812,9.28233e-07,6.75296e-09,0.0775664,0.000499997,9.48492e-07,-5.23467e-09,0.0780673,0.000501878,9.32788e-07,6.73523e-09,0.0785701,0.000503764,9.52994e-07,-6.80514e-09,0.0790748,0.000505649,9.32578e-07,5.5842e-09,0.0795814,0.000507531,9.49331e-07,-6.30583e-10,0.0800899,0.000509428,9.47439e-07,-3.0618e-09,0.0806003,0.000511314,9.38254e-07,5.4273e-09,0.0811125,0.000513206,9.54536e-07,-3.74627e-09,0.0816267,0.000515104,9.43297e-07,2.10713e-09,0.0821427,0.000516997,9.49618e-07,2.76839e-09,0.0826607,0.000518905,9.57924e-07,-5.73006e-09,0.0831805,0.000520803,9.40733e-07,5.25072e-09,0.0837023,0.0005227,9.56486e-07,-3.71718e-10,0.084226,0.000524612,9.5537e-07,-3.76404e-09,0.0847515,0.000526512,9.44078e-07,7.97735e-09,0.085279,0.000528424,9.6801e-07,-5.79367e-09,0.0858084,0.000530343,9.50629e-07,2.96268e-10,0.0863397,0.000532245,9.51518e-07,4.6086e-09,0.0868729,0.000534162,9.65344e-07,-3.82947e-09,0.087408,0.000536081,9.53856e-07,3.25861e-09,0.087945,0.000537998,9.63631e-07,-1.7543e-09,0.088484,0.00053992,9.58368e-07,3.75849e-09,0.0890249,0.000541848,9.69644e-07,-5.82891e-09,0.0895677,0.00054377,9.52157e-07,4.65593e-09,0.0901124,0.000545688,9.66125e-07,2.10643e-09,0.0906591,0.000547627,9.72444e-07,-5.63099e-09,0.0912077,0.000549555,9.55551e-07,5.51627e-09,0.0917582,0.000551483,9.721e-07,-1.53292e-09,0.0923106,0.000553422,9.67501e-07,6.15311e-10,0.092865,0.000555359,9.69347e-07,-9.28291e-10,0.0934213,0.000557295,9.66562e-07,3.09774e-09,0.0939796,0.000559237,9.75856e-07,-4.01186e-09,0.0945398,0.000561177,9.6382e-07,5.49892e-09,0.095102,0.000563121,9.80317e-07,-3.08258e-09,0.0956661,0.000565073,9.71069e-07,-6.19176e-10,0.0962321,0.000567013,9.69212e-07,5.55932e-09,0.0968001,0.000568968,9.8589e-07,-6.71704e-09,0.09737,0.00057092,9.65738e-07,6.40762e-09,0.0979419,0.00057287,9.84961e-07,-4.0122e-09,0.0985158,0.000574828,9.72925e-07,2.19059e-09,0.0990916,0.000576781,9.79496e-07,2.70048e-09,0.0996693,0.000578748,9.87598e-07,-5.54193e-09,0.100249,0.000580706,9.70972e-07,4.56597e-09,0.100831,0.000582662,9.8467e-07,2.17923e-09,0.101414,0.000584638,9.91208e-07,-5.83232e-09,0.102,0.000586603,9.73711e-07,6.24884e-09,0.102588,0.000588569,9.92457e-07,-4.26178e-09,0.103177,0.000590541,9.79672e-07,3.34781e-09,0.103769,0.00059251,9.89715e-07,-1.67904e-09,0.104362,0.000594485,9.84678e-07,3.36839e-09,0.104958,0.000596464,9.94783e-07,-4.34397e-09,0.105555,0.000598441,9.81751e-07,6.55696e-09,0.106155,0.000600424,1.00142e-06,-6.98272e-09,0.106756,0.000602406,9.80474e-07,6.4728e-09,0.107359,0.000604386,9.99893e-07,-4.00742e-09,0.107965,0.000606374,9.8787e-07,2.10654e-09,0.108572,0.000608356,9.9419e-07,3.0318e-09,0.109181,0.000610353,1.00329e-06,-6.7832e-09,0.109793,0.00061234,9.82936e-07,9.1998e-09,0.110406,0.000614333,1.01054e-06,-7.6642e-09,0.111021,0.000616331,9.87543e-07,6.55579e-09,0.111639,0.000618326,1.00721e-06,-3.65791e-09,0.112258,0.000620329,9.96236e-07,6.25467e-10,0.112879,0.000622324,9.98113e-07,1.15593e-09,0.113503,0.000624323,1.00158e-06,2.20158e-09,0.114128,0.000626333,1.00819e-06,-2.51191e-09,0.114755,0.000628342,1.00065e-06,3.95517e-10,0.115385,0.000630345,1.00184e-06,9.29807e-10,0.116016,0.000632351,1.00463e-06,3.33599e-09,0.116649,0.00063437,1.01463e-06,-6.82329e-09,0.117285,0.000636379,9.94163e-07,9.05595e-09,0.117922,0.000638395,1.02133e-06,-7.04862e-09,0.118562,0.000640416,1.00019e-06,4.23737e-09,0.119203,0.000642429,1.0129e-06,-2.45033e-09,0.119847,0.000644448,1.00555e-06,5.56395e-09,0.120492,0.000646475,1.02224e-06,-4.9043e-09,0.121139,0.000648505,1.00753e-06,-8.47952e-10,0.121789,0.000650518,1.00498e-06,8.29622e-09,0.122441,0.000652553,1.02987e-06,-9.98538e-09,0.123094,0.000654582,9.99914e-07,9.2936e-09,0.12375,0.00065661,1.02779e-06,-4.83707e-09,0.124407,0.000658651,1.01328e-06,2.60411e-09,0.125067,0.000660685,1.0211e-06,-5.57945e-09,0.125729,0.000662711,1.00436e-06,1.22631e-08,0.126392,0.000664756,1.04115e-06,-1.36704e-08,0.127058,0.000666798,1.00014e-06,1.26161e-08,0.127726,0.000668836,1.03798e-06,-6.99155e-09,0.128396,0.000670891,1.01701e-06,4.48836e-10,0.129068,0.000672926,1.01836e-06,5.19606e-09,0.129742,0.000674978,1.03394e-06,-6.3319e-09,0.130418,0.000677027,1.01495e-06,5.2305e-09,0.131096,0.000679073,1.03064e-06,3.11123e-10,0.131776,0.000681135,1.03157e-06,-6.47511e-09,0.132458,0.000683179,1.01215e-06,1.06882e-08,0.133142,0.000685235,1.04421e-06,-6.47519e-09,0.133829,0.000687304,1.02479e-06,3.11237e-10,0.134517,0.000689355,1.02572e-06,5.23035e-09,0.135207,0.000691422,1.04141e-06,-6.3316e-09,0.1359,0.000693486,1.02242e-06,5.19484e-09,0.136594,0.000695546,1.038e-06,4.53497e-10,0.137291,0.000697623,1.03936e-06,-7.00891e-09,0.137989,0.000699681,1.01834e-06,1.2681e-08,0.13869,0.000701756,1.05638e-06,-1.39128e-08,0.139393,0.000703827,1.01464e-06,1.31679e-08,0.140098,0.000705896,1.05414e-06,-8.95659e-09,0.140805,0.000707977,1.02727e-06,7.75742e-09,0.141514,0.000710055,1.05055e-06,-7.17182e-09,0.142225,0.000712135,1.02903e-06,6.02862e-09,0.142938,0.000714211,1.04712e-06,-2.04163e-09,0.143653,0.000716299,1.04099e-06,2.13792e-09,0.144371,0.000718387,1.04741e-06,-6.51009e-09,0.14509,0.000720462,1.02787e-06,9.00123e-09,0.145812,0.000722545,1.05488e-06,3.07523e-10,0.146535,0.000724656,1.0558e-06,-1.02312e-08,0.147261,0.000726737,1.02511e-06,1.0815e-08,0.147989,0.000728819,1.05755e-06,-3.22681e-09,0.148719,0.000730925,1.04787e-06,2.09244e-09,0.14945,0.000733027,1.05415e-06,-5.143e-09,0.150185,0.00073512,1.03872e-06,3.57844e-09,0.150921,0.000737208,1.04946e-06,5.73027e-09,0.151659,0.000739324,1.06665e-06,-1.15983e-08,0.152399,0.000741423,1.03185e-06,1.08605e-08,0.153142,0.000743519,1.06443e-06,-2.04106e-09,0.153886,0.000745642,1.05831e-06,-2.69642e-09,0.154633,0.00074775,1.05022e-06,-2.07425e-09,0.155382,0.000749844,1.044e-06,1.09934e-08,0.156133,0.000751965,1.07698e-06,-1.20972e-08,0.156886,0.000754083,1.04069e-06,7.59288e-09,0.157641,0.000756187,1.06347e-06,-3.37305e-09,0.158398,0.000758304,1.05335e-06,5.89921e-09,0.159158,0.000760428,1.07104e-06,-5.32248e-09,0.159919,0.000762554,1.05508e-06,4.8927e-10,0.160683,0.000764666,1.05654e-06,3.36547e-09,0.161448,0.000766789,1.06664e-06,9.50081e-10,0.162216,0.000768925,1.06949e-06,-7.16568e-09,0.162986,0.000771043,1.04799e-06,1.28114e-08,0.163758,0.000773177,1.08643e-06,-1.42774e-08,0.164533,0.000775307,1.0436e-06,1.44956e-08,0.165309,0.000777438,1.08708e-06,-1.39025e-08,0.166087,0.00077957,1.04538e-06,1.13118e-08,0.166868,0.000781695,1.07931e-06,-1.54224e-09,0.167651,0.000783849,1.07468e-06,-5.14312e-09,0.168436,0.000785983,1.05925e-06,7.21381e-09,0.169223,0.000788123,1.0809e-06,-8.81096e-09,0.170012,0.000790259,1.05446e-06,1.31289e-08,0.170803,0.000792407,1.09385e-06,-1.39022e-08,0.171597,0.000794553,1.05214e-06,1.26775e-08,0.172392,0.000796695,1.09018e-06,-7.00557e-09,0.17319,0.000798855,1.06916e-06,4.43796e-10,0.17399,0.000800994,1.07049e-06,5.23031e-09,0.174792,0.000803151,1.08618e-06,-6.46397e-09,0.175596,0.000805304,1.06679e-06,5.72444e-09,0.176403,0.000807455,1.08396e-06,-1.53254e-09,0.177211,0.000809618,1.07937e-06,4.05673e-10,0.178022,0.000811778,1.08058e-06,-9.01916e-11,0.178835,0.000813939,1.08031e-06,-4.49821e-11,0.17965,0.000816099,1.08018e-06,2.70234e-10,0.180467,0.00081826,1.08099e-06,-1.03603e-09,0.181286,0.000820419,1.07788e-06,3.87392e-09,0.182108,0.000822587,1.0895e-06,4.41522e-10,0.182932,0.000824767,1.09083e-06,-5.63997e-09,0.183758,0.000826932,1.07391e-06,7.21707e-09,0.184586,0.000829101,1.09556e-06,-8.32718e-09,0.185416,0.000831267,1.07058e-06,1.11907e-08,0.186248,0.000833442,1.10415e-06,-6.63336e-09,0.187083,0.00083563,1.08425e-06,4.41484e-10,0.187919,0.0008378,1.08557e-06,4.86754e-09,0.188758,0.000839986,1.10017e-06,-5.01041e-09,0.189599,0.000842171,1.08514e-06,2.72811e-10,0.190443,0.000844342,1.08596e-06,3.91916e-09,0.191288,0.000846526,1.09772e-06,-1.04819e-09,0.192136,0.000848718,1.09457e-06,2.73531e-10,0.192985,0.000850908,1.0954e-06,-4.58916e-11,0.193837,0.000853099,1.09526e-06,-9.01158e-11,0.194692,0.000855289,1.09499e-06,4.06506e-10,0.195548,0.00085748,1.09621e-06,-1.53595e-09,0.196407,0.000859668,1.0916e-06,5.73717e-09,0.197267,0.000861869,1.10881e-06,-6.51164e-09,0.19813,0.000864067,1.08928e-06,5.40831e-09,0.198995,0.000866261,1.1055e-06,-2.20401e-10,0.199863,0.000868472,1.10484e-06,-4.52652e-09,0.200732,0.000870668,1.09126e-06,3.42508e-09,0.201604,0.000872861,1.10153e-06,5.72762e-09,0.202478,0.000875081,1.11872e-06,-1.14344e-08,0.203354,0.000877284,1.08441e-06,1.02076e-08,0.204233,0.000879484,1.11504e-06,4.06355e-10,0.205113,0.000881715,1.11626e-06,-1.18329e-08,0.205996,0.000883912,1.08076e-06,1.71227e-08,0.206881,0.000886125,1.13213e-06,-1.19546e-08,0.207768,0.000888353,1.09626e-06,8.93465e-10,0.208658,0.000890548,1.09894e-06,8.38062e-09,0.209549,0.000892771,1.12408e-06,-4.61353e-09,0.210443,0.000895006,1.11024e-06,-4.82756e-09,0.211339,0.000897212,1.09576e-06,9.02245e-09,0.212238,0.00089943,1.12283e-06,-1.45997e-09,0.213138,0.000901672,1.11845e-06,-3.18255e-09,0.214041,0.000903899,1.1089e-06,-7.11073e-10,0.214946,0.000906115,1.10677e-06,6.02692e-09,0.215853,0.000908346,1.12485e-06,-8.49548e-09,0.216763,0.00091057,1.09936e-06,1.30537e-08,0.217675,0.000912808,1.13852e-06,-1.3917e-08,0.218588,0.000915044,1.09677e-06,1.28121e-08,0.219505,0.000917276,1.13521e-06,-7.5288e-09,0.220423,0.000919523,1.11262e-06,2.40205e-09,0.221344,0.000921756,1.11983e-06,-2.07941e-09,0.222267,0.000923989,1.11359e-06,5.91551e-09,0.223192,0.000926234,1.13134e-06,-6.68149e-09,0.224119,0.000928477,1.11129e-06,5.90929e-09,0.225049,0.000930717,1.12902e-06,-2.05436e-09,0.22598,0.000932969,1.12286e-06,2.30807e-09,0.226915,0.000935222,1.12978e-06,-7.17796e-09,0.227851,0.00093746,1.10825e-06,1.15028e-08,0.228789,0.000939711,1.14276e-06,-9.03083e-09,0.22973,0.000941969,1.11566e-06,9.71932e-09,0.230673,0.00094423,1.14482e-06,-1.49452e-08,0.231619,0.000946474,1.09998e-06,2.02591e-08,0.232566,0.000948735,1.16076e-06,-2.13879e-08,0.233516,0.000950993,1.0966e-06,2.05888e-08,0.234468,0.000953247,1.15837e-06,-1.62642e-08,0.235423,0.000955515,1.10957e-06,1.46658e-08,0.236379,0.000957779,1.15357e-06,-1.25966e-08,0.237338,0.000960048,1.11578e-06,5.91793e-09,0.238299,0.000962297,1.13353e-06,3.82602e-09,0.239263,0.000964576,1.14501e-06,-6.3208e-09,0.240229,0.000966847,1.12605e-06,6.55613e-09,0.241197,0.000969119,1.14572e-06,-5.00268e-09,0.242167,0.000971395,1.13071e-06,-1.44659e-09,0.243139,0.000973652,1.12637e-06,1.07891e-08,0.244114,0.000975937,1.15874e-06,-1.19073e-08,0.245091,0.000978219,1.12302e-06,7.03782e-09,0.246071,0.000980486,1.14413e-06,-1.34276e-09,0.247052,0.00098277,1.1401e-06,-1.66669e-09,0.248036,0.000985046,1.1351e-06,8.00935e-09,0.249022,0.00098734,1.15913e-06,-1.54694e-08,0.250011,0.000989612,1.11272e-06,2.4066e-08,0.251002,0.000991909,1.18492e-06,-2.11901e-08,0.251995,0.000994215,1.12135e-06,1.08973e-09,0.25299,0.000996461,1.12462e-06,1.68311e-08,0.253988,0.000998761,1.17511e-06,-8.8094e-09,0.254987,0.00100109,1.14868e-06,-1.13958e-08,0.25599,0.00100335,1.1145e-06,2.45902e-08,0.256994,0.00100565,1.18827e-06,-2.73603e-08,0.258001,0.00100795,1.10618e-06,2.52464e-08,0.25901,0.00101023,1.18192e-06,-1.40207e-08,0.260021,0.00101256,1.13986e-06,1.03387e-09,0.261035,0.00101484,1.14296e-06,9.8853e-09,0.262051,0.00101715,1.17262e-06,-1.07726e-08,0.263069,0.00101947,1.1403e-06,3.40272e-09,0.26409,0.00102176,1.15051e-06,-2.83827e-09,0.265113,0.00102405,1.142e-06,7.95039e-09,0.266138,0.00102636,1.16585e-06,8.39047e-10,0.267166,0.00102869,1.16836e-06,-1.13066e-08,0.268196,0.00103099,1.13444e-06,1.4585e-08,0.269228,0.00103331,1.1782e-06,-1.72314e-08,0.270262,0.00103561,1.1265e-06,2.45382e-08,0.271299,0.00103794,1.20012e-06,-2.13166e-08,0.272338,0.00104028,1.13617e-06,1.12364e-09,0.273379,0.00104255,1.13954e-06,1.68221e-08,0.274423,0.00104488,1.19001e-06,-8.80736e-09,0.275469,0.00104723,1.16358e-06,-1.13948e-08,0.276518,0.00104953,1.1294e-06,2.45839e-08,0.277568,0.00105186,1.20315e-06,-2.73361e-08,0.278621,0.00105418,1.12114e-06,2.51559e-08,0.279677,0.0010565,1.19661e-06,-1.36832e-08,0.280734,0.00105885,1.15556e-06,-2.25706e-10,0.281794,0.00106116,1.15488e-06,1.45862e-08,0.282857,0.00106352,1.19864e-06,-2.83167e-08,0.283921,0.00106583,1.11369e-06,3.90759e-08,0.284988,0.00106817,1.23092e-06,-3.85801e-08,0.286058,0.00107052,1.11518e-06,2.58375e-08,0.287129,0.00107283,1.19269e-06,-5.16498e-09,0.288203,0.0010752,1.1772e-06,-5.17768e-09,0.28928,0.00107754,1.16167e-06,-3.92671e-09,0.290358,0.00107985,1.14988e-06,2.08846e-08,0.29144,0.00108221,1.21254e-06,-2.00072e-08,0.292523,0.00108458,1.15252e-06,-4.60659e-10,0.293609,0.00108688,1.15114e-06,2.18499e-08,0.294697,0.00108925,1.21669e-06,-2.73343e-08,0.295787,0.0010916,1.13468e-06,2.78826e-08,0.29688,0.00109395,1.21833e-06,-2.45915e-08,0.297975,0.00109632,1.14456e-06,1.08787e-08,0.299073,0.00109864,1.17719e-06,1.08788e-08,0.300172,0.00110102,1.20983e-06,-2.45915e-08,0.301275,0.00110337,1.13605e-06,2.78828e-08,0.302379,0.00110573,1.2197e-06,-2.73348e-08,0.303486,0.00110808,1.1377e-06,2.18518e-08,0.304595,0.00111042,1.20325e-06,-4.67556e-10,0.305707,0.00111283,1.20185e-06,-1.99816e-08,0.306821,0.00111517,1.14191e-06,2.07891e-08,0.307937,0.00111752,1.20427e-06,-3.57026e-09,0.309056,0.00111992,1.19356e-06,-6.50797e-09,0.310177,0.00112228,1.17404e-06,-2.00165e-10,0.3113,0.00112463,1.17344e-06,7.30874e-09,0.312426,0.001127,1.19536e-06,7.67424e-10,0.313554,0.00112939,1.19767e-06,-1.03784e-08,0.314685,0.00113176,1.16653e-06,1.09437e-08,0.315818,0.00113412,1.19936e-06,-3.59406e-09,0.316953,0.00113651,1.18858e-06,3.43251e-09,0.318091,0.0011389,1.19888e-06,-1.0136e-08,0.319231,0.00114127,1.16847e-06,7.30915e-09,0.320374,0.00114363,1.1904e-06,1.07018e-08,0.321518,0.00114604,1.2225e-06,-2.03137e-08,0.322666,0.00114842,1.16156e-06,1.09484e-08,0.323815,0.00115078,1.19441e-06,6.32224e-09,0.324967,0.00115319,1.21337e-06,-6.43509e-09,0.326122,0.00115559,1.19407e-06,-1.03842e-08,0.327278,0.00115795,1.16291e-06,1.81697e-08,0.328438,0.00116033,1.21742e-06,-2.6901e-09,0.329599,0.00116276,1.20935e-06,-7.40939e-09,0.330763,0.00116515,1.18713e-06,2.52533e-09,0.331929,0.00116754,1.1947e-06,-2.69191e-09,0.333098,0.00116992,1.18663e-06,8.24218e-09,0.334269,0.00117232,1.21135e-06,-4.74377e-10,0.335443,0.00117474,1.20993e-06,-6.34471e-09,0.336619,0.00117714,1.1909e-06,-3.94922e-09,0.337797,0.00117951,1.17905e-06,2.21417e-08,0.338978,0.00118193,1.24547e-06,-2.50128e-08,0.340161,0.00118435,1.17043e-06,1.8305e-08,0.341346,0.00118674,1.22535e-06,-1.84048e-08,0.342534,0.00118914,1.17013e-06,2.55121e-08,0.343725,0.00119156,1.24667e-06,-2.40389e-08,0.344917,0.00119398,1.17455e-06,1.10389e-08,0.346113,0.00119636,1.20767e-06,9.68574e-09,0.34731,0.0011988,1.23673e-06,-1.99797e-08,0.34851,0.00120122,1.17679e-06,1.06284e-08,0.349713,0.0012036,1.20867e-06,7.26868e-09,0.350917,0.00120604,1.23048e-06,-9.90072e-09,0.352125,0.00120847,1.20078e-06,2.53177e-09,0.353334,0.00121088,1.20837e-06,-2.26199e-10,0.354546,0.0012133,1.20769e-06,-1.62705e-09,0.355761,0.00121571,1.20281e-06,6.73435e-09,0.356978,0.00121813,1.22302e-06,4.49207e-09,0.358197,0.00122059,1.23649e-06,-2.47027e-08,0.359419,0.00122299,1.16238e-06,3.47142e-08,0.360643,0.00122542,1.26653e-06,-2.47472e-08,0.36187,0.00122788,1.19229e-06,4.66965e-09,0.363099,0.00123028,1.20629e-06,6.06872e-09,0.36433,0.00123271,1.2245e-06,8.57729e-10,0.365564,0.00123516,1.22707e-06,-9.49952e-09,0.366801,0.00123759,1.19858e-06,7.33792e-09,0.36804,0.00124001,1.22059e-06,9.95025e-09,0.369281,0.00124248,1.25044e-06,-1.73366e-08,0.370525,0.00124493,1.19843e-06,-2.08464e-10,0.371771,0.00124732,1.1978e-06,1.81704e-08,0.373019,0.00124977,1.25232e-06,-1.28683e-08,0.37427,0.00125224,1.21371e-06,3.50042e-09,0.375524,0.00125468,1.22421e-06,-1.1335e-09,0.37678,0.00125712,1.22081e-06,1.03345e-09,0.378038,0.00125957,1.22391e-06,-3.00023e-09,0.379299,0.00126201,1.21491e-06,1.09676e-08,0.380562,0.00126447,1.24781e-06,-1.10676e-08,0.381828,0.00126693,1.21461e-06,3.50042e-09,0.383096,0.00126937,1.22511e-06,-2.93403e-09,0.384366,0.00127181,1.21631e-06,8.23574e-09,0.385639,0.00127427,1.24102e-06,-2.06607e-10,0.386915,0.00127675,1.2404e-06,-7.40935e-09,0.388193,0.00127921,1.21817e-06,4.1761e-11,0.389473,0.00128165,1.21829e-06,7.24223e-09,0.390756,0.0012841,1.24002e-06,7.91564e-10,0.392042,0.00128659,1.2424e-06,-1.04086e-08,0.393329,0.00128904,1.21117e-06,1.10405e-08,0.39462,0.0012915,1.24429e-06,-3.951e-09,0.395912,0.00129397,1.23244e-06,4.7634e-09,0.397208,0.00129645,1.24673e-06,-1.51025e-08,0.398505,0.0012989,1.20142e-06,2.58443e-08,0.399805,0.00130138,1.27895e-06,-2.86702e-08,0.401108,0.00130385,1.19294e-06,2.92318e-08,0.402413,0.00130632,1.28064e-06,-2.86524e-08,0.403721,0.0013088,1.19468e-06,2.57731e-08,0.405031,0.00131127,1.272e-06,-1.48355e-08,0.406343,0.00131377,1.2275e-06,3.76652e-09,0.407658,0.00131623,1.23879e-06,-2.30784e-10,0.408976,0.00131871,1.2381e-06,-2.84331e-09,0.410296,0.00132118,1.22957e-06,1.16041e-08,0.411618,0.00132367,1.26438e-06,-1.37708e-08,0.412943,0.00132616,1.22307e-06,1.36768e-08,0.41427,0.00132865,1.2641e-06,-1.1134e-08,0.4156,0.00133114,1.2307e-06,1.05714e-09,0.416933,0.00133361,1.23387e-06,6.90538e-09,0.418267,0.00133609,1.25459e-06,1.12372e-09,0.419605,0.00133861,1.25796e-06,-1.14002e-08,0.420945,0.00134109,1.22376e-06,1.46747e-08,0.422287,0.00134358,1.26778e-06,-1.7496e-08,0.423632,0.00134606,1.21529e-06,2.5507e-08,0.424979,0.00134857,1.29182e-06,-2.49272e-08,0.426329,0.00135108,1.21703e-06,1.45972e-08,0.427681,0.00135356,1.26083e-06,-3.65935e-09,0.429036,0.00135607,1.24985e-06,4.00178e-11,0.430393,0.00135857,1.24997e-06,3.49917e-09,0.431753,0.00136108,1.26047e-06,-1.40366e-08,0.433116,0.00136356,1.21836e-06,2.28448e-08,0.43448,0.00136606,1.28689e-06,-1.77378e-08,0.435848,0.00136858,1.23368e-06,1.83043e-08,0.437218,0.0013711,1.28859e-06,-2.56769e-08,0.43859,0.0013736,1.21156e-06,2.47987e-08,0.439965,0.0013761,1.28595e-06,-1.39133e-08,0.441342,0.00137863,1.24421e-06,1.05202e-09,0.442722,0.00138112,1.24737e-06,9.70507e-09,0.444104,0.00138365,1.27649e-06,-1.00698e-08,0.445489,0.00138617,1.24628e-06,7.72123e-10,0.446877,0.00138867,1.24859e-06,6.98132e-09,0.448267,0.00139118,1.26954e-06,1.10477e-09,0.449659,0.00139373,1.27285e-06,-1.14003e-08,0.451054,0.00139624,1.23865e-06,1.4694e-08,0.452452,0.00139876,1.28273e-06,-1.75734e-08,0.453852,0.00140127,1.23001e-06,2.5797e-08,0.455254,0.00140381,1.3074e-06,-2.60097e-08,0.456659,0.00140635,1.22937e-06,1.86371e-08,0.458067,0.00140886,1.28529e-06,-1.8736e-08,0.459477,0.00141137,1.22908e-06,2.65048e-08,0.46089,0.00141391,1.30859e-06,-2.76784e-08,0.462305,0.00141645,1.22556e-06,2.46043e-08,0.463722,0.00141897,1.29937e-06,-1.11341e-08,0.465143,0.00142154,1.26597e-06,-9.87033e-09,0.466565,0.00142404,1.23636e-06,2.08131e-08,0.467991,0.00142657,1.2988e-06,-1.37773e-08,0.469419,0.00142913,1.25746e-06,4.49378e-09,0.470849,0.00143166,1.27094e-06,-4.19781e-09,0.472282,0.00143419,1.25835e-06,1.22975e-08,0.473717,0.00143674,1.29524e-06,-1.51902e-08,0.475155,0.00143929,1.24967e-06,1.86608e-08,0.476596,0.00144184,1.30566e-06,-2.96506e-08,0.478039,0.00144436,1.2167e-06,4.03368e-08,0.479485,0.00144692,1.33771e-06,-4.22896e-08,0.480933,0.00144947,1.21085e-06,3.94148e-08,0.482384,0.00145201,1.32909e-06,-2.59626e-08,0.483837,0.00145459,1.2512e-06,4.83124e-09,0.485293,0.0014571,1.2657e-06,6.63757e-09,0.486751,0.00145966,1.28561e-06,-1.57911e-09,0.488212,0.00146222,1.28087e-06,-3.21468e-10,0.489676,0.00146478,1.27991e-06,2.86517e-09,0.491142,0.00146735,1.2885e-06,-1.11392e-08,0.49261,0.00146989,1.25508e-06,1.18893e-08,0.494081,0.00147244,1.29075e-06,-6.61574e-09,0.495555,0.001475,1.27091e-06,1.45736e-08,0.497031,0.00147759,1.31463e-06,-2.18759e-08,0.49851,0.00148015,1.249e-06,1.33252e-08,0.499992,0.00148269,1.28897e-06,-1.62277e-09,0.501476,0.00148526,1.28411e-06,-6.83421e-09,0.502962,0.00148781,1.2636e-06,2.89596e-08,0.504451,0.00149042,1.35048e-06,-4.93997e-08,0.505943,0.00149298,1.20228e-06,4.94299e-08,0.507437,0.00149553,1.35057e-06,-2.91107e-08,0.508934,0.00149814,1.26324e-06,7.40848e-09,0.510434,0.00150069,1.28547e-06,-5.23187e-10,0.511936,0.00150326,1.2839e-06,-5.31585e-09,0.51344,0.00150581,1.26795e-06,2.17866e-08,0.514947,0.00150841,1.33331e-06,-2.22257e-08,0.516457,0.00151101,1.26663e-06,7.51178e-09,0.517969,0.00151357,1.28917e-06,-7.82128e-09,0.519484,0.00151613,1.2657e-06,2.37733e-08,0.521002,0.00151873,1.33702e-06,-2.76674e-08,0.522522,0.00152132,1.25402e-06,2.72917e-08,0.524044,0.00152391,1.3359e-06,-2.18949e-08,0.525569,0.00152652,1.27021e-06,6.83372e-10,0.527097,0.00152906,1.27226e-06,1.91613e-08,0.528628,0.00153166,1.32974e-06,-1.77241e-08,0.53016,0.00153427,1.27657e-06,-7.86963e-09,0.531696,0.0015368,1.25296e-06,4.92027e-08,0.533234,0.00153945,1.40057e-06,-6.9732e-08,0.534775,0.00154204,1.19138e-06,5.09114e-08,0.536318,0.00154458,1.34411e-06,-1.4704e-08,0.537864,0.00154722,1.3e-06,7.9048e-09,0.539413,0.00154984,1.32371e-06,-1.69152e-08,0.540964,0.00155244,1.27297e-06,1.51355e-10,0.542517,0.00155499,1.27342e-06,1.63099e-08,0.544074,0.00155758,1.32235e-06,-5.78647e-09,0.545633,0.00156021,1.30499e-06,6.83599e-09,0.547194,0.00156284,1.3255e-06,-2.15575e-08,0.548758,0.00156543,1.26083e-06,1.97892e-08,0.550325,0.00156801,1.32019e-06,2.00525e-09,0.551894,0.00157065,1.32621e-06,-2.78103e-08,0.553466,0.00157322,1.24278e-06,4.96314e-08,0.555041,0.00157586,1.39167e-06,-5.1506e-08,0.556618,0.00157849,1.23716e-06,3.71835e-08,0.558198,0.00158107,1.34871e-06,-3.76233e-08,0.55978,0.00158366,1.23584e-06,5.37052e-08,0.561365,0.00158629,1.39695e-06,-5.79884e-08,0.562953,0.00158891,1.22299e-06,5.90392e-08,0.564543,0.00159153,1.4001e-06,-5.89592e-08,0.566136,0.00159416,1.22323e-06,5.7588e-08,0.567731,0.00159678,1.39599e-06,-5.21835e-08,0.569329,0.00159941,1.23944e-06,3.19369e-08,0.57093,0.00160199,1.33525e-06,-1.59594e-08,0.572533,0.00160461,1.28737e-06,3.19006e-08,0.574139,0.00160728,1.38307e-06,-5.20383e-08,0.575748,0.00160989,1.22696e-06,5.70431e-08,0.577359,0.00161251,1.39809e-06,-5.69247e-08,0.578973,0.00161514,1.22731e-06,5.14463e-08,0.580589,0.00161775,1.38165e-06,-2.9651e-08,0.582208,0.00162042,1.2927e-06,7.55339e-09,0.58383,0.00162303,1.31536e-06,-5.62636e-10,0.585455,0.00162566,1.31367e-06,-5.30281e-09,0.587081,0.00162827,1.29776e-06,2.17738e-08,0.588711,0.00163093,1.36309e-06,-2.21875e-08,0.590343,0.00163359,1.29652e-06,7.37164e-09,0.591978,0.00163621,1.31864e-06,-7.29907e-09,0.593616,0.00163882,1.29674e-06,2.18247e-08,0.595256,0.00164148,1.36221e-06,-2.03952e-08,0.596899,0.00164414,1.30103e-06,1.51241e-10,0.598544,0.00164675,1.30148e-06,1.97902e-08,0.600192,0.00164941,1.36085e-06,-1.97074e-08,0.601843,0.00165207,1.30173e-06,-5.65175e-10,0.603496,0.00165467,1.30004e-06,2.1968e-08,0.605152,0.00165734,1.36594e-06,-2.77024e-08,0.606811,0.00165999,1.28283e-06,2.92369e-08,0.608472,0.00166264,1.37054e-06,-2.96407e-08,0.610136,0.00166529,1.28162e-06,2.97215e-08,0.611803,0.00166795,1.37079e-06,-2.96408e-08,0.613472,0.0016706,1.28186e-06,2.92371e-08,0.615144,0.00167325,1.36957e-06,-2.77031e-08,0.616819,0.00167591,1.28647e-06,2.19708e-08,0.618496,0.00167855,1.35238e-06,-5.75407e-10,0.620176,0.00168125,1.35065e-06,-1.9669e-08,0.621858,0.00168389,1.29164e-06,1.96468e-08,0.623544,0.00168653,1.35058e-06,6.86403e-10,0.625232,0.00168924,1.35264e-06,-2.23924e-08,0.626922,0.00169187,1.28547e-06,2.92788e-08,0.628615,0.00169453,1.3733e-06,-3.51181e-08,0.630311,0.00169717,1.26795e-06,5.15889e-08,0.63201,0.00169987,1.42272e-06,-5.2028e-08,0.633711,0.00170255,1.26663e-06,3.73139e-08,0.635415,0.0017052,1.37857e-06,-3.76227e-08,0.637121,0.00170784,1.2657e-06,5.35722e-08,0.63883,0.00171054,1.42642e-06,-5.74567e-08,0.640542,0.00171322,1.25405e-06,5.70456e-08,0.642257,0.0017159,1.42519e-06,-5.15163e-08,0.643974,0.00171859,1.27064e-06,2.98103e-08,0.645694,0.00172122,1.36007e-06,-8.12016e-09,0.647417,0.00172392,1.33571e-06,2.67039e-09,0.649142,0.0017266,1.34372e-06,-2.56152e-09,0.65087,0.00172928,1.33604e-06,7.57571e-09,0.6526,0.00173197,1.35876e-06,-2.77413e-08,0.654334,0.00173461,1.27554e-06,4.3785e-08,0.65607,0.00173729,1.40689e-06,-2.81896e-08,0.657808,0.00174002,1.32233e-06,9.36893e-09,0.65955,0.00174269,1.35043e-06,-9.28617e-09,0.661294,0.00174536,1.32257e-06,2.77757e-08,0.66304,0.00174809,1.4059e-06,-4.2212e-08,0.66479,0.00175078,1.27926e-06,2.1863e-08,0.666542,0.0017534,1.34485e-06,1.43648e-08,0.668297,0.00175613,1.38795e-06,-1.97177e-08,0.670054,0.00175885,1.3288e-06,4.90115e-09,0.671814,0.00176152,1.3435e-06,1.13232e-10,0.673577,0.00176421,1.34384e-06,-5.3542e-09,0.675343,0.00176688,1.32778e-06,2.13035e-08,0.677111,0.0017696,1.39169e-06,-2.02553e-08,0.678882,0.00177232,1.33092e-06,1.13005e-10,0.680656,0.00177499,1.33126e-06,1.98031e-08,0.682432,0.00177771,1.39067e-06,-1.97211e-08,0.684211,0.00178043,1.33151e-06,-5.2349e-10,0.685993,0.00178309,1.32994e-06,2.18151e-08,0.687777,0.00178582,1.39538e-06,-2.71325e-08,0.689564,0.00178853,1.31398e-06,2.71101e-08,0.691354,0.00179124,1.39531e-06,-2.17035e-08,0.693147,0.00179396,1.3302e-06,9.92865e-11,0.694942,0.00179662,1.3305e-06,2.13063e-08,0.69674,0.00179935,1.39442e-06,-2.57198e-08,0.698541,0.00180206,1.31726e-06,2.19682e-08,0.700344,0.00180476,1.38317e-06,-2.54852e-09,0.70215,0.00180752,1.37552e-06,-1.17741e-08,0.703959,0.00181023,1.3402e-06,-9.95999e-09,0.705771,0.00181288,1.31032e-06,5.16141e-08,0.707585,0.00181566,1.46516e-06,-7.72869e-08,0.709402,0.00181836,1.2333e-06,7.87197e-08,0.711222,0.00182106,1.46946e-06,-5.87781e-08,0.713044,0.00182382,1.29312e-06,3.71834e-08,0.714869,0.00182652,1.40467e-06,-3.03511e-08,0.716697,0.00182924,1.31362e-06,2.46161e-08,0.718528,0.00183194,1.38747e-06,-8.5087e-09,0.720361,0.00183469,1.36194e-06,9.41892e-09,0.722197,0.00183744,1.3902e-06,-2.91671e-08,0.724036,0.00184014,1.3027e-06,4.76448e-08,0.725878,0.00184288,1.44563e-06,-4.22028e-08,0.727722,0.00184565,1.31902e-06,1.95682e-09,0.729569,0.00184829,1.3249e-06,3.43754e-08,0.731419,0.00185104,1.42802e-06,-2.0249e-08,0.733271,0.00185384,1.36727e-06,-1.29838e-08,0.735126,0.00185654,1.32832e-06,1.25794e-08,0.736984,0.00185923,1.36606e-06,2.22711e-08,0.738845,0.00186203,1.43287e-06,-4.20594e-08,0.740708,0.00186477,1.3067e-06,2.67571e-08,0.742574,0.00186746,1.38697e-06,-5.36424e-09,0.744443,0.00187022,1.37087e-06,-5.30023e-09,0.746315,0.00187295,1.35497e-06,2.65653e-08,0.748189,0.00187574,1.43467e-06,-4.13564e-08,0.750066,0.00187848,1.3106e-06,1.9651e-08,0.751946,0.00188116,1.36955e-06,2.23572e-08,0.753828,0.00188397,1.43663e-06,-4.9475e-08,0.755714,0.00188669,1.2882e-06,5.63335e-08,0.757602,0.00188944,1.4572e-06,-5.66499e-08,0.759493,0.00189218,1.28725e-06,5.10567e-08,0.761386,0.00189491,1.44042e-06,-2.83677e-08,0.763283,0.00189771,1.35532e-06,2.80962e-09,0.765182,0.00190042,1.36375e-06,1.71293e-08,0.767083,0.0019032,1.41513e-06,-1.17221e-08,0.768988,0.001906,1.37997e-06,-2.98453e-08,0.770895,0.00190867,1.29043e-06,7.14987e-08,0.772805,0.00191146,1.50493e-06,-7.73354e-08,0.774718,0.00191424,1.27292e-06,5.90292e-08,0.776634,0.00191697,1.45001e-06,-3.9572e-08,0.778552,0.00191975,1.33129e-06,3.9654e-08,0.780473,0.00192253,1.45026e-06,-5.94395e-08,0.782397,0.00192525,1.27194e-06,7.88945e-08,0.784324,0.00192803,1.50862e-06,-7.73249e-08,0.786253,0.00193082,1.27665e-06,5.15913e-08,0.788185,0.00193352,1.43142e-06,-9.83099e-09,0.79012,0.00193636,1.40193e-06,-1.22672e-08,0.792058,0.00193912,1.36513e-06,-7.05275e-10,0.793999,0.00194185,1.36301e-06,1.50883e-08,0.795942,0.00194462,1.40828e-06,-4.33147e-11,0.797888,0.00194744,1.40815e-06,-1.49151e-08,0.799837,0.00195021,1.3634e-06,9.93244e-11,0.801788,0.00195294,1.3637e-06,1.45179e-08,0.803743,0.00195571,1.40725e-06,1.43363e-09,0.8057,0.00195853,1.41155e-06,-2.02525e-08,0.80766,0.00196129,1.35079e-06,1.99718e-08,0.809622,0.00196405,1.41071e-06,-3.01649e-11,0.811588,0.00196687,1.41062e-06,-1.9851e-08,0.813556,0.00196964,1.35107e-06,1.98296e-08,0.815527,0.0019724,1.41056e-06,1.37485e-10,0.817501,0.00197522,1.41097e-06,-2.03796e-08,0.819477,0.00197798,1.34983e-06,2.17763e-08,0.821457,0.00198074,1.41516e-06,-7.12085e-09,0.823439,0.00198355,1.3938e-06,6.70707e-09,0.825424,0.00198636,1.41392e-06,-1.97074e-08,0.827412,0.00198913,1.35479e-06,1.25179e-08,0.829402,0.00199188,1.39235e-06,2.92405e-08,0.831396,0.00199475,1.48007e-06,-6.98755e-08,0.833392,0.0019975,1.27044e-06,7.14477e-08,0.835391,0.00200026,1.48479e-06,-3.71014e-08,0.837392,0.00200311,1.37348e-06,1.73533e-08,0.839397,0.00200591,1.42554e-06,-3.23118e-08,0.841404,0.00200867,1.32861e-06,5.2289e-08,0.843414,0.00201148,1.48547e-06,-5.76348e-08,0.845427,0.00201428,1.31257e-06,5.9041e-08,0.847443,0.00201708,1.48969e-06,-5.93197e-08,0.849461,0.00201988,1.31173e-06,5.90289e-08,0.851482,0.00202268,1.48882e-06,-5.75864e-08,0.853507,0.00202549,1.31606e-06,5.21075e-08,0.855533,0.00202828,1.47238e-06,-3.16344e-08,0.857563,0.00203113,1.37748e-06,1.48257e-08,0.859596,0.00203393,1.42196e-06,-2.76684e-08,0.861631,0.00203669,1.33895e-06,3.62433e-08,0.863669,0.00203947,1.44768e-06,1.90463e-09,0.86571,0.00204237,1.45339e-06,-4.38617e-08,0.867754,0.00204515,1.32181e-06,5.43328e-08,0.8698,0.00204796,1.48481e-06,-5.42603e-08,0.87185,0.00205076,1.32203e-06,4.34989e-08,0.873902,0.00205354,1.45252e-06,-5.26029e-10,0.875957,0.00205644,1.45095e-06,-4.13949e-08,0.878015,0.00205922,1.32676e-06,4.68962e-08,0.880075,0.00206201,1.46745e-06,-2.69807e-08,0.882139,0.00206487,1.38651e-06,1.42181e-09,0.884205,0.00206764,1.39077e-06,2.12935e-08,0.886274,0.00207049,1.45465e-06,-2.69912e-08,0.888346,0.00207332,1.37368e-06,2.70664e-08,0.890421,0.00207615,1.45488e-06,-2.16698e-08,0.892498,0.00207899,1.38987e-06,8.14756e-12,0.894579,0.00208177,1.38989e-06,2.16371e-08,0.896662,0.00208462,1.45481e-06,-2.6952e-08,0.898748,0.00208744,1.37395e-06,2.65663e-08,0.900837,0.00209027,1.45365e-06,-1.97084e-08,0.902928,0.00209312,1.39452e-06,-7.33731e-09,0.905023,0.00209589,1.37251e-06,4.90578e-08,0.90712,0.00209878,1.51968e-06,-6.96845e-08,0.90922,0.00210161,1.31063e-06,5.08664e-08,0.911323,0.00210438,1.46323e-06,-1.45717e-08,0.913429,0.00210727,1.41952e-06,7.42038e-09,0.915538,0.00211013,1.44178e-06,-1.51097e-08,0.917649,0.00211297,1.39645e-06,-6.58618e-09,0.919764,0.00211574,1.37669e-06,4.14545e-08,0.921881,0.00211862,1.50105e-06,-4.00222e-08,0.924001,0.0021215,1.38099e-06,-5.7518e-10,0.926124,0.00212426,1.37926e-06,4.23229e-08,0.92825,0.00212714,1.50623e-06,-4.9507e-08,0.930378,0.00213001,1.35771e-06,3.64958e-08,0.93251,0.00213283,1.4672e-06,-3.68713e-08,0.934644,0.00213566,1.35658e-06,5.13848e-08,0.936781,0.00213852,1.51074e-06,-4.94585e-08,0.938921,0.0021414,1.36236e-06,2.72399e-08,0.941064,0.0021442,1.44408e-06,1.0372e-10,0.943209,0.00214709,1.44439e-06,-2.76547e-08,0.945358,0.0021499,1.36143e-06,5.09106e-08,0.947509,0.00215277,1.51416e-06,-5.67784e-08,0.949663,0.00215563,1.34382e-06,5.69935e-08,0.95182,0.00215849,1.5148e-06,-5.19861e-08,0.95398,0.00216136,1.35885e-06,3.17417e-08,0.956143,0.00216418,1.45407e-06,-1.53758e-08,0.958309,0.00216704,1.40794e-06,2.97615e-08,0.960477,0.00216994,1.49723e-06,-4.40657e-08,0.962649,0.00217281,1.36503e-06,2.72919e-08,0.964823,0.00217562,1.44691e-06,-5.49729e-09,0.967,0.0021785,1.43041e-06,-5.30273e-09,0.96918,0.00218134,1.41451e-06,2.67084e-08,0.971363,0.00218425,1.49463e-06,-4.19265e-08,0.973548,0.00218711,1.36885e-06,2.17881e-08,0.975737,0.00218992,1.43422e-06,1.43789e-08,0.977928,0.00219283,1.47735e-06,-1.96989e-08,0.980122,0.00219572,1.41826e-06,4.81221e-09,0.98232,0.00219857,1.43269e-06,4.50048e-10,0.98452,0.00220144,1.43404e-06,-6.61237e-09,0.986722,0.00220429,1.41421e-06,2.59993e-08,0.988928,0.0022072,1.4922e-06,-3.77803e-08,0.991137,0.00221007,1.37886e-06,5.9127e-09,0.993348,0.00221284,1.3966e-06,1.33339e-07,0.995563,0.00221604,1.79662e-06,-5.98872e-07,0.99778,0.00222015,0.,0.}; + + template + __device__ __forceinline__ void RGB2LabConvert_f(const T& src, D& dst) + { + const float _1_3 = 1.0f / 3.0f; + const float _a = 16.0f / 116.0f; + + float B = blueIdx == 0 ? src.x : src.z; + float G = src.y; + float R = blueIdx == 0 ? src.z : src.x; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + } + + float X = B * 0.189828f + G * 0.376219f + R * 0.433953f; + float Y = B * 0.072169f + G * 0.715160f + R * 0.212671f; + float Z = B * 0.872766f + G * 0.109477f + R * 0.017758f; + + float FX = X > 0.008856f ? ::powf(X, _1_3) : (7.787f * X + _a); + float FY = Y > 0.008856f ? ::powf(Y, _1_3) : (7.787f * Y + _a); + float FZ = Z > 0.008856f ? ::powf(Z, _1_3) : (7.787f * Z + _a); + + float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y); + float a = 500.f * (FX - FY); + float b = 200.f * (FY - FZ); + + dst.x = L; + dst.y = a; + dst.z = b; + } + + template struct RGB2Lab; + template + struct RGB2Lab + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LabConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} + }; + template + struct RGB2Lab + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LabConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2Lab functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + __constant__ float c_sRGBInvGammaTab[] = {0,0.0126255,0.,-8.33961e-06,0.0126172,0.0126005,-2.50188e-05,4.1698e-05,0.0252344,0.0126756,0.000100075,-0.000158451,0.0378516,0.0124004,-0.000375277,-0.000207393,0.0496693,0.0110276,-0.000997456,0.00016837,0.0598678,0.00953783,-0.000492346,2.07235e-05,0.068934,0.00861531,-0.000430176,3.62876e-05,0.0771554,0.00786382,-0.000321313,1.87625e-05,0.0847167,0.00727748,-0.000265025,1.53594e-05,0.0917445,0.00679351,-0.000218947,1.10545e-05,0.0983301,0.00638877,-0.000185784,8.66984e-06,0.104542,0.00604322,-0.000159774,6.82996e-06,0.110432,0.00574416,-0.000139284,5.51008e-06,0.116042,0.00548212,-0.000122754,4.52322e-06,0.121406,0.00525018,-0.000109184,3.75557e-06,0.126551,0.00504308,-9.79177e-05,3.17134e-06,0.131499,0.00485676,-8.84037e-05,2.68469e-06,0.13627,0.004688,-8.03496e-05,2.31725e-06,0.14088,0.00453426,-7.33978e-05,2.00868e-06,0.145343,0.00439349,-6.73718e-05,1.74775e-06,0.149671,0.00426399,-6.21286e-05,1.53547e-06,0.153875,0.00414434,-5.75222e-05,1.364e-06,0.157963,0.00403338,-5.34301e-05,1.20416e-06,0.161944,0.00393014,-4.98177e-05,1.09114e-06,0.165825,0.00383377,-4.65443e-05,9.57987e-07,0.169613,0.00374356,-4.36703e-05,8.88359e-07,0.173314,0.00365888,-4.10052e-05,7.7849e-07,0.176933,0.00357921,-3.86697e-05,7.36254e-07,0.180474,0.00350408,-3.6461e-05,6.42534e-07,0.183942,0.00343308,-3.45334e-05,6.12614e-07,0.187342,0.00336586,-3.26955e-05,5.42894e-07,0.190675,0.00330209,-3.10669e-05,5.08967e-07,0.193947,0.00324149,-2.954e-05,4.75977e-07,0.197159,0.00318383,-2.8112e-05,4.18343e-07,0.200315,0.00312887,-2.6857e-05,4.13651e-07,0.203418,0.00307639,-2.5616e-05,3.70847e-07,0.206469,0.00302627,-2.45035e-05,3.3813e-07,0.209471,0.00297828,-2.34891e-05,3.32999e-07,0.212426,0.0029323,-2.24901e-05,2.96826e-07,0.215336,0.00288821,-2.15996e-05,2.82736e-07,0.218203,0.00284586,-2.07514e-05,2.70961e-07,0.221029,0.00280517,-1.99385e-05,2.42744e-07,0.223814,0.00276602,-1.92103e-05,2.33277e-07,0.226561,0.0027283,-1.85105e-05,2.2486e-07,0.229271,0.00269195,-1.78359e-05,2.08383e-07,0.231945,0.00265691,-1.72108e-05,1.93305e-07,0.234585,0.00262307,-1.66308e-05,1.80687e-07,0.237192,0.00259035,-1.60888e-05,1.86632e-07,0.239766,0.00255873,-1.55289e-05,1.60569e-07,0.24231,0.00252815,-1.50472e-05,1.54566e-07,0.244823,0.00249852,-1.45835e-05,1.59939e-07,0.247307,0.00246983,-1.41037e-05,1.29549e-07,0.249763,0.00244202,-1.3715e-05,1.41429e-07,0.252191,0.00241501,-1.32907e-05,1.39198e-07,0.254593,0.00238885,-1.28731e-05,1.06444e-07,0.256969,0.00236342,-1.25538e-05,1.2048e-07,0.25932,0.00233867,-1.21924e-05,1.26892e-07,0.261647,0.00231467,-1.18117e-05,8.72084e-08,0.26395,0.00229131,-1.15501e-05,1.20323e-07,0.26623,0.00226857,-1.11891e-05,8.71514e-08,0.268487,0.00224645,-1.09276e-05,9.73165e-08,0.270723,0.00222489,-1.06357e-05,8.98259e-08,0.272937,0.00220389,-1.03662e-05,7.98218e-08,0.275131,0.00218339,-1.01267e-05,9.75254e-08,0.277304,0.00216343,-9.83416e-06,6.65195e-08,0.279458,0.00214396,-9.63461e-06,8.34313e-08,0.281592,0.00212494,-9.38431e-06,7.65919e-08,0.283708,0.00210641,-9.15454e-06,5.7236e-08,0.285805,0.00208827,-8.98283e-06,8.18939e-08,0.287885,0.00207055,-8.73715e-06,6.2224e-08,0.289946,0.00205326,-8.55047e-06,5.66388e-08,0.291991,0.00203633,-8.38056e-06,6.88491e-08,0.294019,0.00201978,-8.17401e-06,5.53955e-08,0.296031,0.00200359,-8.00782e-06,6.71971e-08,0.298027,0.00198778,-7.80623e-06,3.34439e-08,0.300007,0.00197227,-7.7059e-06,6.7248e-08,0.301971,0.00195706,-7.50416e-06,5.51915e-08,0.303921,0.00194221,-7.33858e-06,3.98124e-08,0.305856,0.00192766,-7.21915e-06,5.37795e-08,0.307776,0.00191338,-7.05781e-06,4.30919e-08,0.309683,0.00189939,-6.92853e-06,4.20744e-08,0.311575,0.00188566,-6.80231e-06,5.68321e-08,0.313454,0.00187223,-6.63181e-06,2.86195e-08,0.31532,0.00185905,-6.54595e-06,3.73075e-08,0.317172,0.00184607,-6.43403e-06,6.05684e-08,0.319012,0.00183338,-6.25233e-06,1.84426e-08,0.320839,0.00182094,-6.197e-06,4.44757e-08,0.322654,0.00180867,-6.06357e-06,4.20729e-08,0.324456,0.00179667,-5.93735e-06,2.56511e-08,0.326247,0.00178488,-5.8604e-06,3.41368e-08,0.328026,0.00177326,-5.75799e-06,4.64177e-08,0.329794,0.00176188,-5.61874e-06,1.86107e-08,0.33155,0.0017507,-5.5629e-06,2.81511e-08,0.333295,0.00173966,-5.47845e-06,4.75987e-08,0.335029,0.00172884,-5.33565e-06,1.98726e-08,0.336753,0.00171823,-5.27604e-06,2.19226e-08,0.338466,0.00170775,-5.21027e-06,4.14483e-08,0.340169,0.00169745,-5.08592e-06,2.09017e-08,0.341861,0.00168734,-5.02322e-06,2.39561e-08,0.343543,0.00167737,-4.95135e-06,3.22852e-08,0.345216,0.00166756,-4.85449e-06,2.57173e-08,0.346878,0.00165793,-4.77734e-06,1.38569e-08,0.348532,0.00164841,-4.73577e-06,3.80634e-08,0.350175,0.00163906,-4.62158e-06,1.27043e-08,0.35181,0.00162985,-4.58347e-06,3.03279e-08,0.353435,0.00162078,-4.49249e-06,1.49961e-08,0.355051,0.00161184,-4.4475e-06,2.88977e-08,0.356659,0.00160303,-4.3608e-06,1.84241e-08,0.358257,0.00159436,-4.30553e-06,1.6616e-08,0.359848,0.0015858,-4.25568e-06,3.43218e-08,0.361429,0.00157739,-4.15272e-06,-4.89172e-09,0.363002,0.00156907,-4.16739e-06,4.48498e-08,0.364567,0.00156087,-4.03284e-06,4.30676e-09,0.366124,0.00155282,-4.01992e-06,2.73303e-08,0.367673,0.00154486,-3.93793e-06,5.58036e-09,0.369214,0.001537,-3.92119e-06,3.97554e-08,0.370747,0.00152928,-3.80193e-06,-1.55904e-08,0.372272,0.00152163,-3.8487e-06,5.24081e-08,0.37379,0.00151409,-3.69147e-06,-1.52272e-08,0.375301,0.00150666,-3.73715e-06,3.83028e-08,0.376804,0.0014993,-3.62225e-06,1.10278e-08,0.378299,0.00149209,-3.58916e-06,6.99326e-09,0.379788,0.00148493,-3.56818e-06,2.06038e-08,0.381269,0.00147786,-3.50637e-06,2.98009e-08,0.382744,0.00147093,-3.41697e-06,-2.05978e-08,0.384211,0.00146404,-3.47876e-06,5.25899e-08,0.385672,0.00145724,-3.32099e-06,-1.09471e-08,0.387126,0.00145056,-3.35383e-06,2.10009e-08,0.388573,0.00144392,-3.29083e-06,1.63501e-08,0.390014,0.00143739,-3.24178e-06,3.00641e-09,0.391448,0.00143091,-3.23276e-06,3.12282e-08,0.392875,0.00142454,-3.13908e-06,-8.70932e-09,0.394297,0.00141824,-3.16521e-06,3.34114e-08,0.395712,0.00141201,-3.06497e-06,-5.72754e-09,0.397121,0.00140586,-3.08215e-06,1.9301e-08,0.398524,0.00139975,-3.02425e-06,1.7931e-08,0.39992,0.00139376,-2.97046e-06,-1.61822e-09,0.401311,0.00138781,-2.97531e-06,1.83442e-08,0.402696,0.00138192,-2.92028e-06,1.76485e-08,0.404075,0.00137613,-2.86733e-06,4.68617e-10,0.405448,0.00137039,-2.86593e-06,1.02794e-08,0.406816,0.00136469,-2.83509e-06,1.80179e-08,0.408178,0.00135908,-2.78104e-06,7.05594e-09,0.409534,0.00135354,-2.75987e-06,1.33633e-08,0.410885,0.00134806,-2.71978e-06,-9.04568e-10,0.41223,0.00134261,-2.72249e-06,2.0057e-08,0.41357,0.00133723,-2.66232e-06,1.00841e-08,0.414905,0.00133194,-2.63207e-06,-7.88835e-10,0.416234,0.00132667,-2.63444e-06,2.28734e-08,0.417558,0.00132147,-2.56582e-06,-1.29785e-09,0.418877,0.00131633,-2.56971e-06,1.21205e-08,0.420191,0.00131123,-2.53335e-06,1.24202e-08,0.421499,0.0013062,-2.49609e-06,-2.19681e-09,0.422803,0.0013012,-2.50268e-06,2.61696e-08,0.424102,0.00129628,-2.42417e-06,-1.30747e-08,0.425396,0.00129139,-2.46339e-06,2.6129e-08,0.426685,0.00128654,-2.38501e-06,-2.03454e-09,0.427969,0.00128176,-2.39111e-06,1.18115e-08,0.429248,0.00127702,-2.35567e-06,1.43932e-08,0.430523,0.00127235,-2.31249e-06,-9.77965e-09,0.431793,0.00126769,-2.34183e-06,2.47253e-08,0.433058,0.00126308,-2.26766e-06,2.85278e-10,0.434319,0.00125855,-2.2668e-06,3.93614e-09,0.435575,0.00125403,-2.25499e-06,1.37722e-08,0.436827,0.00124956,-2.21368e-06,5.79803e-10,0.438074,0.00124513,-2.21194e-06,1.37112e-08,0.439317,0.00124075,-2.1708e-06,4.17973e-09,0.440556,0.00123642,-2.15826e-06,-6.27703e-10,0.44179,0.0012321,-2.16015e-06,2.81332e-08,0.44302,0.00122787,-2.07575e-06,-2.24985e-08,0.444246,0.00122365,-2.14324e-06,3.20586e-08,0.445467,0.00121946,-2.04707e-06,-1.6329e-08,0.446685,0.00121532,-2.09605e-06,3.32573e-08,0.447898,0.00121122,-1.99628e-06,-2.72927e-08,0.449107,0.00120715,-2.07816e-06,4.6111e-08,0.450312,0.00120313,-1.93983e-06,-3.79416e-08,0.451514,0.00119914,-2.05365e-06,4.60507e-08,0.452711,0.00119517,-1.9155e-06,-2.7052e-08,0.453904,0.00119126,-1.99666e-06,3.23551e-08,0.455093,0.00118736,-1.89959e-06,-1.29613e-08,0.456279,0.00118352,-1.93848e-06,1.94905e-08,0.45746,0.0011797,-1.88e-06,-5.39588e-09,0.458638,0.00117593,-1.89619e-06,2.09282e-09,0.459812,0.00117214,-1.88991e-06,2.68267e-08,0.460982,0.00116844,-1.80943e-06,-1.99925e-08,0.462149,0.00116476,-1.86941e-06,2.3341e-08,0.463312,0.00116109,-1.79939e-06,-1.37674e-08,0.464471,0.00115745,-1.84069e-06,3.17287e-08,0.465627,0.00115387,-1.7455e-06,-2.37407e-08,0.466779,0.00115031,-1.81673e-06,3.34315e-08,0.467927,0.00114677,-1.71643e-06,-2.05786e-08,0.469073,0.00114328,-1.77817e-06,1.90802e-08,0.470214,0.00113978,-1.72093e-06,3.86247e-09,0.471352,0.00113635,-1.70934e-06,-4.72759e-09,0.472487,0.00113292,-1.72352e-06,1.50478e-08,0.473618,0.00112951,-1.67838e-06,4.14108e-09,0.474746,0.00112617,-1.66595e-06,-1.80986e-09,0.47587,0.00112283,-1.67138e-06,3.09816e-09,0.476991,0.0011195,-1.66209e-06,1.92198e-08,0.478109,0.00111623,-1.60443e-06,-2.03726e-08,0.479224,0.00111296,-1.66555e-06,3.2468e-08,0.480335,0.00110973,-1.56814e-06,-2.00922e-08,0.481443,0.00110653,-1.62842e-06,1.80983e-08,0.482548,0.00110333,-1.57413e-06,7.30362e-09,0.48365,0.0011002,-1.55221e-06,-1.75107e-08,0.484749,0.00109705,-1.60475e-06,3.29373e-08,0.485844,0.00109393,-1.50594e-06,-2.48315e-08,0.486937,0.00109085,-1.58043e-06,3.65865e-08,0.488026,0.0010878,-1.47067e-06,-3.21078e-08,0.489112,0.00108476,-1.56699e-06,3.22397e-08,0.490195,0.00108172,-1.47027e-06,-7.44391e-09,0.491276,0.00107876,-1.49261e-06,-2.46428e-09,0.492353,0.00107577,-1.5e-06,1.73011e-08,0.493427,0.00107282,-1.4481e-06,-7.13552e-09,0.494499,0.0010699,-1.4695e-06,1.1241e-08,0.495567,0.001067,-1.43578e-06,-8.02637e-09,0.496633,0.0010641,-1.45986e-06,2.08645e-08,0.497695,0.00106124,-1.39726e-06,-1.58271e-08,0.498755,0.0010584,-1.44475e-06,1.26415e-08,0.499812,0.00105555,-1.40682e-06,2.48655e-08,0.500866,0.00105281,-1.33222e-06,-5.24988e-08,0.501918,0.00104999,-1.48972e-06,6.59206e-08,0.502966,0.00104721,-1.29196e-06,-3.237e-08,0.504012,0.00104453,-1.38907e-06,3.95479e-09,0.505055,0.00104176,-1.3772e-06,1.65509e-08,0.506096,0.00103905,-1.32755e-06,-1.05539e-08,0.507133,0.00103637,-1.35921e-06,2.56648e-08,0.508168,0.00103373,-1.28222e-06,-3.25007e-08,0.509201,0.00103106,-1.37972e-06,4.47336e-08,0.51023,0.00102844,-1.24552e-06,-2.72245e-08,0.511258,0.00102587,-1.32719e-06,4.55952e-09,0.512282,0.00102323,-1.31352e-06,8.98645e-09,0.513304,0.00102063,-1.28656e-06,1.90992e-08,0.514323,0.00101811,-1.22926e-06,-2.57786e-08,0.51534,0.00101557,-1.30659e-06,2.44104e-08,0.516355,0.00101303,-1.23336e-06,-1.22581e-08,0.517366,0.00101053,-1.27014e-06,2.4622e-08,0.518376,0.00100806,-1.19627e-06,-2.66253e-08,0.519383,0.00100559,-1.27615e-06,2.22744e-08,0.520387,0.00100311,-1.20932e-06,-2.8679e-09,0.521389,0.00100068,-1.21793e-06,-1.08029e-08,0.522388,0.000998211,-1.25034e-06,4.60795e-08,0.523385,0.000995849,-1.1121e-06,-5.4306e-08,0.52438,0.000993462,-1.27502e-06,5.19354e-08,0.525372,0.000991067,-1.11921e-06,-3.42262e-08,0.526362,0.000988726,-1.22189e-06,2.53646e-08,0.52735,0.000986359,-1.14579e-06,-7.62782e-09,0.528335,0.000984044,-1.16868e-06,5.14668e-09,0.529318,0.000981722,-1.15324e-06,-1.29589e-08,0.530298,0.000979377,-1.19211e-06,4.66888e-08,0.531276,0.000977133,-1.05205e-06,-5.45868e-08,0.532252,0.000974865,-1.21581e-06,5.24495e-08,0.533226,0.000972591,-1.05846e-06,-3.60019e-08,0.534198,0.000970366,-1.16647e-06,3.19537e-08,0.535167,0.000968129,-1.07061e-06,-3.2208e-08,0.536134,0.000965891,-1.16723e-06,3.72738e-08,0.537099,0.000963668,-1.05541e-06,2.32205e-09,0.538061,0.000961564,-1.04844e-06,-4.65618e-08,0.539022,0.000959328,-1.18813e-06,6.47159e-08,0.53998,0.000957146,-9.93979e-07,-3.3488e-08,0.540936,0.000955057,-1.09444e-06,9.63166e-09,0.54189,0.000952897,-1.06555e-06,-5.03871e-09,0.542842,0.000950751,-1.08066e-06,1.05232e-08,0.543792,0.000948621,-1.04909e-06,2.25503e-08,0.544739,0.000946591,-9.81444e-07,-4.11195e-08,0.545685,0.000944504,-1.1048e-06,2.27182e-08,0.546628,0.000942363,-1.03665e-06,9.85146e-09,0.54757,0.000940319,-1.00709e-06,-2.51938e-09,0.548509,0.000938297,-1.01465e-06,2.25858e-10,0.549446,0.000936269,-1.01397e-06,1.61598e-09,0.550381,0.000934246,-1.00913e-06,-6.68983e-09,0.551315,0.000932207,-1.0292e-06,2.51434e-08,0.552246,0.000930224,-9.53765e-07,-3.42793e-08,0.553175,0.000928214,-1.0566e-06,5.23688e-08,0.554102,0.000926258,-8.99497e-07,-5.59865e-08,0.555028,0.000924291,-1.06746e-06,5.23679e-08,0.555951,0.000922313,-9.10352e-07,-3.42763e-08,0.556872,0.00092039,-1.01318e-06,2.51326e-08,0.557792,0.000918439,-9.37783e-07,-6.64954e-09,0.558709,0.000916543,-9.57732e-07,1.46554e-09,0.559625,0.000914632,-9.53335e-07,7.87281e-10,0.560538,0.000912728,-9.50973e-07,-4.61466e-09,0.56145,0.000910812,-9.64817e-07,1.76713e-08,0.56236,0.000908935,-9.11804e-07,-6.46564e-09,0.563268,0.000907092,-9.312e-07,8.19121e-09,0.564174,0.000905255,-9.06627e-07,-2.62992e-08,0.565078,0.000903362,-9.85524e-07,3.74007e-08,0.565981,0.000901504,-8.73322e-07,-4.0942e-09,0.566882,0.000899745,-8.85605e-07,-2.1024e-08,0.56778,0.00089791,-9.48677e-07,2.85854e-08,0.568677,0.000896099,-8.62921e-07,-3.3713e-08,0.569573,0.000894272,-9.64059e-07,4.6662e-08,0.570466,0.000892484,-8.24073e-07,-3.37258e-08,0.571358,0.000890734,-9.25251e-07,2.86365e-08,0.572247,0.00088897,-8.39341e-07,-2.12155e-08,0.573135,0.000887227,-9.02988e-07,-3.37913e-09,0.574022,0.000885411,-9.13125e-07,3.47319e-08,0.574906,0.000883689,-8.08929e-07,-1.63394e-08,0.575789,0.000882022,-8.57947e-07,-2.8979e-08,0.57667,0.00088022,-9.44885e-07,7.26509e-08,0.57755,0.000878548,-7.26932e-07,-8.28106e-08,0.578427,0.000876845,-9.75364e-07,7.97774e-08,0.579303,0.000875134,-7.36032e-07,-5.74849e-08,0.580178,0.00087349,-9.08486e-07,3.09529e-08,0.58105,0.000871765,-8.15628e-07,-6.72206e-09,0.581921,0.000870114,-8.35794e-07,-4.06451e-09,0.582791,0.00086843,-8.47987e-07,2.29799e-08,0.583658,0.000866803,-7.79048e-07,-2.82503e-08,0.584524,0.00086516,-8.63799e-07,3.04167e-08,0.585388,0.000863524,-7.72548e-07,-3.38119e-08,0.586251,0.000861877,-8.73984e-07,4.52264e-08,0.587112,0.000860265,-7.38305e-07,-2.78842e-08,0.587972,0.000858705,-8.21958e-07,6.70567e-09,0.58883,0.000857081,-8.01841e-07,1.06161e-09,0.589686,0.000855481,-7.98656e-07,-1.09521e-08,0.590541,0.00085385,-8.31512e-07,4.27468e-08,0.591394,0.000852316,-7.03272e-07,-4.08257e-08,0.592245,0.000850787,-8.25749e-07,1.34677e-09,0.593095,0.000849139,-8.21709e-07,3.54387e-08,0.593944,0.000847602,-7.15393e-07,-2.38924e-08,0.59479,0.0008461,-7.8707e-07,5.26143e-10,0.595636,0.000844527,-7.85491e-07,2.17879e-08,0.596479,0.000843021,-7.20127e-07,-2.80733e-08,0.597322,0.000841497,-8.04347e-07,3.09005e-08,0.598162,0.000839981,-7.11646e-07,-3.5924e-08,0.599002,0.00083845,-8.19418e-07,5.3191e-08,0.599839,0.000836971,-6.59845e-07,-5.76307e-08,0.600676,0.000835478,-8.32737e-07,5.81227e-08,0.60151,0.000833987,-6.58369e-07,-5.56507e-08,0.602344,0.000832503,-8.25321e-07,4.52706e-08,0.603175,0.000830988,-6.89509e-07,-6.22236e-09,0.604006,0.000829591,-7.08176e-07,-2.03811e-08,0.604834,0.000828113,-7.6932e-07,2.8142e-08,0.605662,0.000826659,-6.84894e-07,-3.25822e-08,0.606488,0.000825191,-7.8264e-07,4.25823e-08,0.607312,0.000823754,-6.54893e-07,-1.85376e-08,0.608135,0.000822389,-7.10506e-07,-2.80365e-08,0.608957,0.000820883,-7.94616e-07,7.1079e-08,0.609777,0.000819507,-5.81379e-07,-7.74655e-08,0.610596,0.000818112,-8.13775e-07,5.9969e-08,0.611413,0.000816665,-6.33868e-07,-4.32013e-08,0.612229,0.000815267,-7.63472e-07,5.32313e-08,0.613044,0.0008139,-6.03778e-07,-5.05148e-08,0.613857,0.000812541,-7.55323e-07,2.96187e-08,0.614669,0.000811119,-6.66466e-07,-8.35545e-09,0.615479,0.000809761,-6.91533e-07,3.80301e-09,0.616288,0.00080839,-6.80124e-07,-6.85666e-09,0.617096,0.000807009,-7.00694e-07,2.36237e-08,0.617903,0.000805678,-6.29822e-07,-2.80336e-08,0.618708,0.000804334,-7.13923e-07,2.8906e-08,0.619511,0.000802993,-6.27205e-07,-2.79859e-08,0.620314,0.000801655,-7.11163e-07,2.34329e-08,0.621114,0.000800303,-6.40864e-07,-6.14108e-09,0.621914,0.000799003,-6.59287e-07,1.13151e-09,0.622712,0.000797688,-6.55893e-07,1.61507e-09,0.62351,0.000796381,-6.51048e-07,-7.59186e-09,0.624305,0.000795056,-6.73823e-07,2.87524e-08,0.6251,0.000793794,-5.87566e-07,-4.7813e-08,0.625893,0.000792476,-7.31005e-07,4.32901e-08,0.626685,0.000791144,-6.01135e-07,-6.13814e-09,0.627475,0.000789923,-6.19549e-07,-1.87376e-08,0.628264,0.000788628,-6.75762e-07,2.14837e-08,0.629052,0.000787341,-6.11311e-07,-7.59265e-09,0.629839,0.000786095,-6.34089e-07,8.88692e-09,0.630625,0.000784854,-6.07428e-07,-2.7955e-08,0.631409,0.000783555,-6.91293e-07,4.33285e-08,0.632192,0.000782302,-5.61307e-07,-2.61497e-08,0.632973,0.000781101,-6.39757e-07,1.6658e-09,0.633754,0.000779827,-6.34759e-07,1.94866e-08,0.634533,0.000778616,-5.76299e-07,-2.00076e-08,0.635311,0.000777403,-6.36322e-07,9.39091e-10,0.636088,0.000776133,-6.33505e-07,1.62512e-08,0.636863,0.000774915,-5.84751e-07,-6.33937e-09,0.637638,0.000773726,-6.03769e-07,9.10609e-09,0.638411,0.000772546,-5.76451e-07,-3.00849e-08,0.639183,0.000771303,-6.66706e-07,5.1629e-08,0.639953,0.000770125,-5.11819e-07,-5.7222e-08,0.640723,0.000768929,-6.83485e-07,5.80497e-08,0.641491,0.000767736,-5.09336e-07,-5.57674e-08,0.642259,0.000766551,-6.76638e-07,4.58105e-08,0.643024,0.000765335,-5.39206e-07,-8.26541e-09,0.643789,0.000764231,-5.64002e-07,-1.27488e-08,0.644553,0.000763065,-6.02249e-07,-3.44168e-10,0.645315,0.00076186,-6.03281e-07,1.41254e-08,0.646077,0.000760695,-5.60905e-07,3.44727e-09,0.646837,0.000759584,-5.50563e-07,-2.79144e-08,0.647596,0.000758399,-6.34307e-07,4.86057e-08,0.648354,0.000757276,-4.88489e-07,-4.72989e-08,0.64911,0.000756158,-6.30386e-07,2.13807e-08,0.649866,0.000754961,-5.66244e-07,2.13808e-08,0.65062,0.000753893,-5.02102e-07,-4.7299e-08,0.651374,0.000752746,-6.43999e-07,4.86059e-08,0.652126,0.000751604,-4.98181e-07,-2.79154e-08,0.652877,0.000750524,-5.81927e-07,3.45089e-09,0.653627,0.000749371,-5.71575e-07,1.41119e-08,0.654376,0.00074827,-5.29239e-07,-2.93748e-10,0.655123,0.00074721,-5.3012e-07,-1.29368e-08,0.65587,0.000746111,-5.68931e-07,-7.56355e-09,0.656616,0.000744951,-5.91621e-07,4.3191e-08,0.65736,0.000743897,-4.62048e-07,-4.59911e-08,0.658103,0.000742835,-6.00022e-07,2.15642e-08,0.658846,0.0007417,-5.35329e-07,1.93389e-08,0.659587,0.000740687,-4.77312e-07,-3.93152e-08,0.660327,0.000739615,-5.95258e-07,1.87126e-08,0.661066,0.00073848,-5.3912e-07,2.40695e-08,0.661804,0.000737474,-4.66912e-07,-5.53859e-08,0.662541,0.000736374,-6.33069e-07,7.82648e-08,0.663277,0.000735343,-3.98275e-07,-7.88593e-08,0.664012,0.00073431,-6.34853e-07,5.83585e-08,0.664745,0.000733215,-4.59777e-07,-3.53656e-08,0.665478,0.000732189,-5.65874e-07,2.34994e-08,0.66621,0.000731128,-4.95376e-07,9.72743e-10,0.66694,0.00073014,-4.92458e-07,-2.73903e-08,0.66767,0.000729073,-5.74629e-07,4.89839e-08,0.668398,0.000728071,-4.27677e-07,-4.93359e-08,0.669126,0.000727068,-5.75685e-07,2.91504e-08,0.669853,0.000726004,-4.88234e-07,-7.66109e-09,0.670578,0.000725004,-5.11217e-07,1.49392e-09,0.671303,0.000723986,-5.06735e-07,1.68533e-09,0.672026,0.000722978,-5.01679e-07,-8.23525e-09,0.672749,0.00072195,-5.26385e-07,3.12556e-08,0.67347,0.000720991,-4.32618e-07,-5.71825e-08,0.674191,0.000719954,-6.04166e-07,7.8265e-08,0.67491,0.00071898,-3.69371e-07,-7.70634e-08,0.675628,0.00071801,-6.00561e-07,5.11747e-08,0.676346,0.000716963,-4.47037e-07,-8.42615e-09,0.677062,0.000716044,-4.72315e-07,-1.747e-08,0.677778,0.000715046,-5.24725e-07,1.87015e-08,0.678493,0.000714053,-4.68621e-07,2.26856e-09,0.679206,0.000713123,-4.61815e-07,-2.77758e-08,0.679919,0.000712116,-5.45142e-07,4.92298e-08,0.68063,0.000711173,-3.97453e-07,-4.99339e-08,0.681341,0.000710228,-5.47255e-07,3.12967e-08,0.682051,0.000709228,-4.53365e-07,-1.56481e-08,0.68276,0.000708274,-5.00309e-07,3.12958e-08,0.683467,0.000707367,-4.06422e-07,-4.99303e-08,0.684174,0.000706405,-5.56213e-07,4.9216e-08,0.68488,0.00070544,-4.08565e-07,-2.77245e-08,0.685585,0.00070454,-4.91738e-07,2.07748e-09,0.686289,0.000703562,-4.85506e-07,1.94146e-08,0.686992,0.00070265,-4.27262e-07,-2.01314e-08,0.687695,0.000701735,-4.87656e-07,1.50616e-09,0.688396,0.000700764,-4.83137e-07,1.41067e-08,0.689096,0.00069984,-4.40817e-07,1.67168e-09,0.689795,0.000698963,-4.35802e-07,-2.07934e-08,0.690494,0.000698029,-4.98182e-07,2.18972e-08,0.691192,0.000697099,-4.32491e-07,-7.19092e-09,0.691888,0.000696212,-4.54064e-07,6.86642e-09,0.692584,0.000695325,-4.33464e-07,-2.02747e-08,0.693279,0.000694397,-4.94288e-07,1.46279e-08,0.693973,0.000693452,-4.50405e-07,2.13678e-08,0.694666,0.000692616,-3.86301e-07,-4.04945e-08,0.695358,0.000691721,-5.07785e-07,2.14009e-08,0.696049,0.00069077,-4.43582e-07,1.44955e-08,0.69674,0.000689926,-4.00096e-07,-1.97783e-08,0.697429,0.000689067,-4.5943e-07,5.01296e-09,0.698118,0.000688163,-4.44392e-07,-2.73521e-10,0.698805,0.000687273,-4.45212e-07,-3.91893e-09,0.699492,0.000686371,-4.56969e-07,1.59493e-08,0.700178,0.000685505,-4.09121e-07,-2.73351e-10,0.700863,0.000684686,-4.09941e-07,-1.4856e-08,0.701548,0.000683822,-4.54509e-07,9.25979e-11,0.702231,0.000682913,-4.54231e-07,1.44855e-08,0.702913,0.000682048,-4.10775e-07,1.56992e-09,0.703595,0.000681231,-4.06065e-07,-2.07652e-08,0.704276,0.000680357,-4.68361e-07,2.18864e-08,0.704956,0.000679486,-4.02701e-07,-7.17595e-09,0.705635,0.000678659,-4.24229e-07,6.81748e-09,0.706313,0.000677831,-4.03777e-07,-2.0094e-08,0.70699,0.000676963,-4.64059e-07,1.39538e-08,0.707667,0.000676077,-4.22197e-07,2.38835e-08,0.708343,0.000675304,-3.50547e-07,-4.98831e-08,0.709018,0.000674453,-5.00196e-07,5.64395e-08,0.709692,0.000673622,-3.30878e-07,-5.66657e-08,0.710365,0.00067279,-5.00875e-07,5.1014e-08,0.711037,0.000671942,-3.47833e-07,-2.81809e-08,0.711709,0.000671161,-4.32376e-07,2.10513e-09,0.712379,0.000670303,-4.2606e-07,1.97604e-08,0.713049,0.00066951,-3.66779e-07,-2.15422e-08,0.713718,0.000668712,-4.31406e-07,6.8038e-09,0.714387,0.000667869,-4.10994e-07,-5.67295e-09,0.715054,0.00066703,-4.28013e-07,1.5888e-08,0.715721,0.000666222,-3.80349e-07,1.72576e-09,0.716387,0.000665467,-3.75172e-07,-2.27911e-08,0.717052,0.000664648,-4.43545e-07,2.9834e-08,0.717716,0.00066385,-3.54043e-07,-3.69401e-08,0.718379,0.000663031,-4.64864e-07,5.83219e-08,0.719042,0.000662277,-2.89898e-07,-7.71382e-08,0.719704,0.000661465,-5.21313e-07,7.14171e-08,0.720365,0.000660637,-3.07061e-07,-2.97161e-08,0.721025,0.000659934,-3.96209e-07,-1.21575e-08,0.721685,0.000659105,-4.32682e-07,1.87412e-08,0.722343,0.000658296,-3.76458e-07,-3.2029e-09,0.723001,0.000657533,-3.86067e-07,-5.9296e-09,0.723659,0.000656743,-4.03856e-07,2.69213e-08,0.724315,0.000656016,-3.23092e-07,-4.21511e-08,0.724971,0.000655244,-4.49545e-07,2.24737e-08,0.725625,0.000654412,-3.82124e-07,1.18611e-08,0.726279,0.000653683,-3.46541e-07,-1.03132e-08,0.726933,0.000652959,-3.7748e-07,-3.02128e-08,0.727585,0.000652114,-4.68119e-07,7.15597e-08,0.728237,0.000651392,-2.5344e-07,-7.72119e-08,0.728888,0.000650654,-4.85075e-07,5.8474e-08,0.729538,0.000649859,-3.09654e-07,-3.74746e-08,0.730188,0.000649127,-4.22077e-07,3.18197e-08,0.730837,0.000648379,-3.26618e-07,-3.01997e-08,0.731485,0.000647635,-4.17217e-07,2.93747e-08,0.732132,0.000646888,-3.29093e-07,-2.76943e-08,0.732778,0.000646147,-4.12176e-07,2.17979e-08,0.733424,0.000645388,-3.46783e-07,1.07292e-10,0.734069,0.000644695,-3.46461e-07,-2.22271e-08,0.734713,0.000643935,-4.13142e-07,2.91963e-08,0.735357,0.000643197,-3.25553e-07,-3.49536e-08,0.736,0.000642441,-4.30414e-07,5.10133e-08,0.736642,0.000641733,-2.77374e-07,-4.98904e-08,0.737283,0.000641028,-4.27045e-07,2.93392e-08,0.737924,0.000640262,-3.39028e-07,-7.86156e-09,0.738564,0.000639561,-3.62612e-07,2.10703e-09,0.739203,0.000638842,-3.56291e-07,-5.6653e-10,0.739842,0.000638128,-3.57991e-07,1.59086e-10,0.740479,0.000637412,-3.57513e-07,-6.98321e-11,0.741116,0.000636697,-3.57723e-07,1.20214e-10,0.741753,0.000635982,-3.57362e-07,-4.10987e-10,0.742388,0.000635266,-3.58595e-07,1.5237e-09,0.743023,0.000634553,-3.54024e-07,-5.68376e-09,0.743657,0.000633828,-3.71075e-07,2.12113e-08,0.744291,0.00063315,-3.07441e-07,-1.95569e-08,0.744924,0.000632476,-3.66112e-07,-2.58816e-09,0.745556,0.000631736,-3.73877e-07,2.99096e-08,0.746187,0.000631078,-2.84148e-07,-5.74454e-08,0.746818,0.000630337,-4.56484e-07,8.06629e-08,0.747448,0.000629666,-2.14496e-07,-8.63922e-08,0.748077,0.000628978,-4.73672e-07,8.60918e-08,0.748706,0.000628289,-2.15397e-07,-7.91613e-08,0.749334,0.000627621,-4.5288e-07,5.17393e-08,0.749961,0.00062687,-2.97663e-07,-8.58662e-09,0.750588,0.000626249,-3.23422e-07,-1.73928e-08,0.751214,0.00062555,-3.75601e-07,1.85532e-08,0.751839,0.000624855,-3.19941e-07,2.78479e-09,0.752463,0.000624223,-3.11587e-07,-2.96923e-08,0.753087,0.000623511,-4.00664e-07,5.63799e-08,0.75371,0.000622879,-2.31524e-07,-7.66179e-08,0.754333,0.000622186,-4.61378e-07,7.12778e-08,0.754955,0.000621477,-2.47545e-07,-2.96794e-08,0.755576,0.000620893,-3.36583e-07,-1.21648e-08,0.756196,0.000620183,-3.73077e-07,1.87339e-08,0.756816,0.000619493,-3.16875e-07,-3.16622e-09,0.757435,0.00061885,-3.26374e-07,-6.0691e-09,0.758054,0.000618179,-3.44581e-07,2.74426e-08,0.758672,0.000617572,-2.62254e-07,-4.40968e-08,0.759289,0.000616915,-3.94544e-07,2.97352e-08,0.759906,0.000616215,-3.05338e-07,-1.52393e-08,0.760522,0.000615559,-3.51056e-07,3.12221e-08,0.761137,0.000614951,-2.5739e-07,-5.00443e-08,0.761751,0.000614286,-4.07523e-07,4.9746e-08,0.762365,0.00061362,-2.58285e-07,-2.97303e-08,0.762979,0.000613014,-3.47476e-07,9.57079e-09,0.763591,0.000612348,-3.18764e-07,-8.55287e-09,0.764203,0.000611685,-3.44422e-07,2.46407e-08,0.764815,0.00061107,-2.705e-07,-3.04053e-08,0.765426,0.000610437,-3.61716e-07,3.73759e-08,0.766036,0.000609826,-2.49589e-07,-5.94935e-08,0.766645,0.000609149,-4.28069e-07,8.13889e-08,0.767254,0.000608537,-1.83902e-07,-8.72483e-08,0.767862,0.000607907,-4.45647e-07,8.87901e-08,0.76847,0.000607282,-1.79277e-07,-8.90983e-08,0.769077,0.000606656,-4.46572e-07,8.87892e-08,0.769683,0.000606029,-1.80204e-07,-8.72446e-08,0.770289,0.000605407,-4.41938e-07,8.13752e-08,0.770894,0.000604768,-1.97812e-07,-5.94423e-08,0.771498,0.000604194,-3.76139e-07,3.71848e-08,0.772102,0.000603553,-2.64585e-07,-2.96922e-08,0.772705,0.000602935,-3.53661e-07,2.19793e-08,0.773308,0.000602293,-2.87723e-07,1.37955e-09,0.77391,0.000601722,-2.83585e-07,-2.74976e-08,0.774512,0.000601072,-3.66077e-07,4.9006e-08,0.775112,0.000600487,-2.19059e-07,-4.93171e-08,0.775712,0.000599901,-3.67011e-07,2.90531e-08,0.776312,0.000599254,-2.79851e-07,-7.29081e-09,0.776911,0.000598673,-3.01724e-07,1.10077e-10,0.777509,0.00059807,-3.01393e-07,6.85053e-09,0.778107,0.000597487,-2.80842e-07,-2.75123e-08,0.778704,0.000596843,-3.63379e-07,4.35939e-08,0.779301,0.000596247,-2.32597e-07,-2.7654e-08,0.779897,0.000595699,-3.15559e-07,7.41741e-09,0.780492,0.00059509,-2.93307e-07,-2.01562e-09,0.781087,0.000594497,-2.99354e-07,6.45059e-10,0.781681,0.000593901,-2.97418e-07,-5.64635e-10,0.782275,0.000593304,-2.99112e-07,1.61347e-09,0.782868,0.000592711,-2.94272e-07,-5.88926e-09,0.78346,0.000592105,-3.1194e-07,2.19436e-08,0.784052,0.000591546,-2.46109e-07,-2.22805e-08,0.784643,0.000590987,-3.1295e-07,7.57368e-09,0.785234,0.000590384,-2.90229e-07,-8.01428e-09,0.785824,0.00058978,-3.14272e-07,2.44834e-08,0.786414,0.000589225,-2.40822e-07,-3.03148e-08,0.787003,0.000588652,-3.31766e-07,3.7171e-08,0.787591,0.0005881,-2.20253e-07,-5.87646e-08,0.788179,0.000587483,-3.96547e-07,7.86782e-08,0.788766,0.000586926,-1.60512e-07,-7.71342e-08,0.789353,0.000586374,-3.91915e-07,5.10444e-08,0.789939,0.000585743,-2.38782e-07,-7.83422e-09,0.790524,0.000585242,-2.62284e-07,-1.97076e-08,0.791109,0.000584658,-3.21407e-07,2.70598e-08,0.791693,0.000584097,-2.40228e-07,-2.89269e-08,0.792277,0.000583529,-3.27008e-07,2.90431e-08,0.792861,0.000582963,-2.39879e-07,-2.76409e-08,0.793443,0.0005824,-3.22802e-07,2.1916e-08,0.794025,0.00058182,-2.57054e-07,-4.18368e-10,0.794607,0.000581305,-2.58309e-07,-2.02425e-08,0.795188,0.000580727,-3.19036e-07,2.17838e-08,0.795768,0.000580155,-2.53685e-07,-7.28814e-09,0.796348,0.000579625,-2.75549e-07,7.36871e-09,0.796928,0.000579096,-2.53443e-07,-2.21867e-08,0.797506,0.000578523,-3.20003e-07,2.17736e-08,0.798085,0.000577948,-2.54683e-07,-5.30296e-09,0.798662,0.000577423,-2.70592e-07,-5.61698e-10,0.799239,0.00057688,-2.72277e-07,7.54977e-09,0.799816,0.000576358,-2.49627e-07,-2.96374e-08,0.800392,0.00057577,-3.38539e-07,5.1395e-08,0.800968,0.000575247,-1.84354e-07,-5.67335e-08,0.801543,0.000574708,-3.54555e-07,5.63297e-08,0.802117,0.000574168,-1.85566e-07,-4.93759e-08,0.802691,0.000573649,-3.33693e-07,2.19646e-08,0.803264,0.000573047,-2.678e-07,2.1122e-08,0.803837,0.000572575,-2.04433e-07,-4.68482e-08,0.804409,0.000572026,-3.44978e-07,4.70613e-08,0.804981,0.000571477,-2.03794e-07,-2.21877e-08,0.805552,0.000571003,-2.70357e-07,-1.79153e-08,0.806123,0.000570408,-3.24103e-07,3.42443e-08,0.806693,0.000569863,-2.2137e-07,1.47556e-10,0.807263,0.000569421,-2.20928e-07,-3.48345e-08,0.807832,0.000568874,-3.25431e-07,1.99812e-08,0.808401,0.000568283,-2.65487e-07,1.45143e-08,0.808969,0.000567796,-2.21945e-07,-1.84338e-08,0.809536,0.000567297,-2.77246e-07,-3.83608e-10,0.810103,0.000566741,-2.78397e-07,1.99683e-08,0.81067,0.000566244,-2.18492e-07,-1.98848e-08,0.811236,0.000565747,-2.78146e-07,-3.38976e-11,0.811801,0.000565191,-2.78248e-07,2.00204e-08,0.812366,0.000564695,-2.18187e-07,-2.04429e-08,0.812931,0.000564197,-2.79516e-07,2.1467e-09,0.813495,0.000563644,-2.73076e-07,1.18561e-08,0.814058,0.000563134,-2.37507e-07,1.00334e-08,0.814621,0.000562689,-2.07407e-07,-5.19898e-08,0.815183,0.000562118,-3.63376e-07,7.87163e-08,0.815745,0.000561627,-1.27227e-07,-8.40616e-08,0.816306,0.000561121,-3.79412e-07,7.87163e-08,0.816867,0.000560598,-1.43263e-07,-5.19898e-08,0.817428,0.000560156,-2.99233e-07,1.00335e-08,0.817988,0.000559587,-2.69132e-07,1.18559e-08,0.818547,0.000559085,-2.33564e-07,2.14764e-09,0.819106,0.000558624,-2.27122e-07,-2.04464e-08,0.819664,0.000558108,-2.88461e-07,2.00334e-08,0.820222,0.000557591,-2.28361e-07,-8.24277e-11,0.820779,0.000557135,-2.28608e-07,-1.97037e-08,0.821336,0.000556618,-2.87719e-07,1.92925e-08,0.821893,0.000556101,-2.29841e-07,2.13831e-09,0.822448,0.000555647,-2.23427e-07,-2.78458e-08,0.823004,0.000555117,-3.06964e-07,4.96402e-08,0.823559,0.000554652,-1.58043e-07,-5.15058e-08,0.824113,0.000554181,-3.12561e-07,3.71737e-08,0.824667,0.000553668,-2.0104e-07,-3.75844e-08,0.82522,0.000553153,-3.13793e-07,5.35592e-08,0.825773,0.000552686,-1.53115e-07,-5.74431e-08,0.826326,0.000552207,-3.25444e-07,5.7004e-08,0.826878,0.000551728,-1.54433e-07,-5.13635e-08,0.827429,0.000551265,-3.08523e-07,2.92406e-08,0.82798,0.000550735,-2.20801e-07,-5.99424e-09,0.828531,0.000550276,-2.38784e-07,-5.26363e-09,0.829081,0.000549782,-2.54575e-07,2.70488e-08,0.82963,0.000549354,-1.73429e-07,-4.33268e-08,0.83018,0.000548878,-3.03409e-07,2.7049e-08,0.830728,0.000548352,-2.22262e-07,-5.26461e-09,0.831276,0.000547892,-2.38056e-07,-5.99057e-09,0.831824,0.000547397,-2.56027e-07,2.92269e-08,0.832371,0.000546973,-1.68347e-07,-5.13125e-08,0.832918,0.000546482,-3.22284e-07,5.68139e-08,0.833464,0.000546008,-1.51843e-07,-5.67336e-08,0.83401,0.000545534,-3.22043e-07,5.09113e-08,0.834555,0.000545043,-1.6931e-07,-2.77022e-08,0.8351,0.000544621,-2.52416e-07,2.92924e-10,0.835644,0.000544117,-2.51537e-07,2.65305e-08,0.836188,0.000543694,-1.71946e-07,-4.68105e-08,0.836732,0.00054321,-3.12377e-07,4.15021e-08,0.837275,0.000542709,-1.87871e-07,1.13355e-11,0.837817,0.000542334,-1.87837e-07,-4.15474e-08,0.838359,0.000541833,-3.12479e-07,4.69691e-08,0.838901,0.000541349,-1.71572e-07,-2.71196e-08,0.839442,0.000540925,-2.52931e-07,1.90462e-09,0.839983,0.000540425,-2.47217e-07,1.95011e-08,0.840523,0.000539989,-1.88713e-07,-2.03045e-08,0.841063,0.00053955,-2.49627e-07,2.11216e-09,0.841602,0.000539057,-2.4329e-07,1.18558e-08,0.842141,0.000538606,-2.07723e-07,1.00691e-08,0.842679,0.000538221,-1.77516e-07,-5.21324e-08,0.843217,0.00053771,-3.33913e-07,7.92513e-08,0.843755,0.00053728,-9.6159e-08,-8.60587e-08,0.844292,0.000536829,-3.54335e-07,8.61696e-08,0.844828,0.000536379,-9.58263e-08,-7.98057e-08,0.845364,0.000535948,-3.35243e-07,5.42394e-08,0.8459,0.00053544,-1.72525e-07,-1.79426e-08,0.846435,0.000535041,-2.26353e-07,1.75308e-08,0.84697,0.000534641,-1.73761e-07,-5.21806e-08,0.847505,0.000534137,-3.30302e-07,7.19824e-08,0.848038,0.000533692,-1.14355e-07,-5.69349e-08,0.848572,0.000533293,-2.8516e-07,3.65479e-08,0.849105,0.000532832,-1.75516e-07,-2.96519e-08,0.849638,0.000532392,-2.64472e-07,2.2455e-08,0.85017,0.000531931,-1.97107e-07,-5.63451e-10,0.850702,0.000531535,-1.98797e-07,-2.02011e-08,0.851233,0.000531077,-2.59401e-07,2.17634e-08,0.851764,0.000530623,-1.94111e-07,-7.24794e-09,0.852294,0.000530213,-2.15854e-07,7.22832e-09,0.852824,0.000529803,-1.94169e-07,-2.16653e-08,0.853354,0.00052935,-2.59165e-07,1.98283e-08,0.853883,0.000528891,-1.9968e-07,1.95678e-09,0.854412,0.000528497,-1.9381e-07,-2.76554e-08,0.85494,0.000528027,-2.76776e-07,4.90603e-08,0.855468,0.00052762,-1.29596e-07,-4.93764e-08,0.855995,0.000527213,-2.77725e-07,2.92361e-08,0.856522,0.000526745,-1.90016e-07,-7.96341e-09,0.857049,0.000526341,-2.13907e-07,2.61752e-09,0.857575,0.000525922,-2.06054e-07,-2.50665e-09,0.8581,0.000525502,-2.13574e-07,7.40906e-09,0.858626,0.000525097,-1.91347e-07,-2.71296e-08,0.859151,0.000524633,-2.72736e-07,4.15048e-08,0.859675,0.000524212,-1.48221e-07,-1.96802e-08,0.860199,0.000523856,-2.07262e-07,-2.23886e-08,0.860723,0.000523375,-2.74428e-07,4.96299e-08,0.861246,0.000522975,-1.25538e-07,-5.69216e-08,0.861769,0.000522553,-2.96303e-07,5.88473e-08,0.862291,0.000522137,-1.19761e-07,-5.92584e-08,0.862813,0.00052172,-2.97536e-07,5.8977e-08,0.863334,0.000521301,-1.20605e-07,-5.74403e-08,0.863855,0.000520888,-2.92926e-07,5.15751e-08,0.864376,0.000520457,-1.38201e-07,-2.96506e-08,0.864896,0.000520091,-2.27153e-07,7.42277e-09,0.865416,0.000519659,-2.04885e-07,-4.05057e-11,0.865936,0.00051925,-2.05006e-07,-7.26074e-09,0.866455,0.000518818,-2.26788e-07,2.90835e-08,0.866973,0.000518451,-1.39538e-07,-4.94686e-08,0.867492,0.000518024,-2.87944e-07,4.95814e-08,0.868009,0.000517597,-1.39199e-07,-2.96479e-08,0.868527,0.000517229,-2.28143e-07,9.40539e-09,0.869044,0.000516801,-1.99927e-07,-7.9737e-09,0.86956,0.000516378,-2.23848e-07,2.24894e-08,0.870077,0.000515997,-1.5638e-07,-2.23793e-08,0.870592,0.000515617,-2.23517e-07,7.42302e-09,0.871108,0.000515193,-2.01248e-07,-7.31283e-09,0.871623,0.000514768,-2.23187e-07,2.18283e-08,0.872137,0.000514387,-1.57702e-07,-2.03959e-08,0.872652,0.000514011,-2.1889e-07,1.50711e-10,0.873165,0.000513573,-2.18437e-07,1.97931e-08,0.873679,0.000513196,-1.59058e-07,-1.97183e-08,0.874192,0.000512819,-2.18213e-07,-5.24324e-10,0.874704,0.000512381,-2.19786e-07,2.18156e-08,0.875217,0.000512007,-1.54339e-07,-2.71336e-08,0.875728,0.000511616,-2.3574e-07,2.71141e-08,0.87624,0.000511226,-1.54398e-07,-2.17182e-08,0.876751,0.000510852,-2.19552e-07,1.54131e-10,0.877262,0.000510414,-2.1909e-07,2.11017e-08,0.877772,0.000510039,-1.55785e-07,-2.49562e-08,0.878282,0.000509652,-2.30654e-07,1.91183e-08,0.878791,0.000509248,-1.73299e-07,8.08751e-09,0.8793,0.000508926,-1.49036e-07,-5.14684e-08,0.879809,0.000508474,-3.03441e-07,7.85766e-08,0.880317,0.000508103,-6.77112e-08,-8.40242e-08,0.880825,0.000507715,-3.19784e-07,7.87063e-08,0.881333,0.000507312,-8.36649e-08,-5.19871e-08,0.88184,0.000506988,-2.39626e-07,1.00327e-08,0.882346,0.000506539,-2.09528e-07,1.18562e-08,0.882853,0.000506156,-1.73959e-07,2.14703e-09,0.883359,0.000505814,-1.67518e-07,-2.04444e-08,0.883864,0.000505418,-2.28851e-07,2.00258e-08,0.88437,0.00050502,-1.68774e-07,-5.42855e-11,0.884874,0.000504682,-1.68937e-07,-1.98087e-08,0.885379,0.000504285,-2.28363e-07,1.96842e-08,0.885883,0.000503887,-1.6931e-07,6.76342e-10,0.886387,0.000503551,-1.67281e-07,-2.23896e-08,0.88689,0.000503149,-2.3445e-07,2.92774e-08,0.887393,0.000502768,-1.46618e-07,-3.51152e-08,0.887896,0.00050237,-2.51963e-07,5.15787e-08,0.888398,0.00050202,-9.72271e-08,-5.19903e-08,0.8889,0.00050167,-2.53198e-07,3.71732e-08,0.889401,0.000501275,-1.41678e-07,-3.70978e-08,0.889902,0.00050088,-2.52972e-07,5.16132e-08,0.890403,0.000500529,-9.81321e-08,-5.01459e-08,0.890903,0.000500183,-2.4857e-07,2.9761e-08,0.891403,0.000499775,-1.59287e-07,-9.29351e-09,0.891903,0.000499428,-1.87167e-07,7.41301e-09,0.892402,0.000499076,-1.64928e-07,-2.03585e-08,0.892901,0.000498685,-2.26004e-07,1.44165e-08,0.893399,0.000498276,-1.82754e-07,2.22974e-08,0.893898,0.000497978,-1.15862e-07,-4.40013e-08,0.894395,0.000497614,-2.47866e-07,3.44985e-08,0.894893,0.000497222,-1.44371e-07,-3.43882e-08,0.89539,0.00049683,-2.47535e-07,4.34497e-08,0.895886,0.000496465,-1.17186e-07,-2.02012e-08,0.896383,0.00049617,-1.7779e-07,-2.22497e-08,0.896879,0.000495748,-2.44539e-07,4.95952e-08,0.897374,0.000495408,-9.57532e-08,-5.69217e-08,0.89787,0.000495045,-2.66518e-07,5.88823e-08,0.898364,0.000494689,-8.98713e-08,-5.93983e-08,0.898859,0.000494331,-2.68066e-07,5.95017e-08,0.899353,0.000493973,-8.95613e-08,-5.9399e-08,0.899847,0.000493616,-2.67758e-07,5.8885e-08,0.90034,0.000493257,-9.11033e-08,-5.69317e-08,0.900833,0.000492904,-2.61898e-07,4.96326e-08,0.901326,0.000492529,-1.13001e-07,-2.23893e-08,0.901819,0.000492236,-1.80169e-07,-1.968e-08,0.902311,0.000491817,-2.39209e-07,4.15047e-08,0.902802,0.000491463,-1.14694e-07,-2.71296e-08,0.903293,0.000491152,-1.96083e-07,7.409e-09,0.903784,0.000490782,-1.73856e-07,-2.50645e-09,0.904275,0.000490427,-1.81376e-07,2.61679e-09,0.904765,0.000490072,-1.73525e-07,-7.96072e-09,0.905255,0.000489701,-1.97407e-07,2.92261e-08,0.905745,0.000489394,-1.09729e-07,-4.93389e-08,0.906234,0.000489027,-2.57746e-07,4.89204e-08,0.906723,0.000488658,-1.10985e-07,-2.71333e-08,0.907211,0.000488354,-1.92385e-07,8.30861e-12,0.907699,0.00048797,-1.9236e-07,2.71001e-08,0.908187,0.000487666,-1.1106e-07,-4.88041e-08,0.908675,0.000487298,-2.57472e-07,4.89069e-08,0.909162,0.000486929,-1.10751e-07,-2.76143e-08,0.909649,0.000486625,-1.93594e-07,1.9457e-09,0.910135,0.000486244,-1.87757e-07,1.98315e-08,0.910621,0.000485928,-1.28262e-07,-2.16671e-08,0.911107,0.000485606,-1.93264e-07,7.23216e-09,0.911592,0.000485241,-1.71567e-07,-7.26152e-09,0.912077,0.000484877,-1.93352e-07,2.18139e-08,0.912562,0.000484555,-1.2791e-07,-2.03895e-08,0.913047,0.000484238,-1.89078e-07,1.39494e-10,0.913531,0.000483861,-1.8866e-07,1.98315e-08,0.914014,0.000483543,-1.29165e-07,-1.98609e-08,0.914498,0.000483225,-1.88748e-07,7.39912e-12,0.914981,0.000482847,-1.88726e-07,1.98313e-08,0.915463,0.000482529,-1.29232e-07,-1.9728e-08,0.915946,0.000482212,-1.88416e-07,-5.24035e-10,0.916428,0.000481833,-1.89988e-07,2.18241e-08,0.916909,0.000481519,-1.24516e-07,-2.71679e-08,0.917391,0.000481188,-2.06019e-07,2.72427e-08,0.917872,0.000480858,-1.24291e-07,-2.21985e-08,0.918353,0.000480543,-1.90886e-07,1.94644e-09,0.918833,0.000480167,-1.85047e-07,1.44127e-08,0.919313,0.00047984,-1.41809e-07,7.39438e-12,0.919793,0.000479556,-1.41787e-07,-1.44423e-08,0.920272,0.000479229,-1.85114e-07,-1.84291e-09,0.920751,0.000478854,-1.90642e-07,2.18139e-08,0.92123,0.000478538,-1.25201e-07,-2.58081e-08,0.921708,0.00047821,-2.02625e-07,2.18139e-08,0.922186,0.00047787,-1.37183e-07,-1.84291e-09,0.922664,0.00047759,-1.42712e-07,-1.44423e-08,0.923141,0.000477262,-1.86039e-07,7.34701e-12,0.923618,0.00047689,-1.86017e-07,1.44129e-08,0.924095,0.000476561,-1.42778e-07,1.94572e-09,0.924572,0.000476281,-1.36941e-07,-2.21958e-08,0.925048,0.000475941,-2.03528e-07,2.72327e-08,0.925523,0.000475615,-1.2183e-07,-2.71304e-08,0.925999,0.00047529,-2.03221e-07,2.16843e-08,0.926474,0.000474949,-1.38168e-07,-2.16005e-12,0.926949,0.000474672,-1.38175e-07,-2.16756e-08,0.927423,0.000474331,-2.03202e-07,2.71001e-08,0.927897,0.000474006,-1.21902e-07,-2.71201e-08,0.928371,0.000473681,-2.03262e-07,2.17757e-08,0.928845,0.00047334,-1.37935e-07,-3.78028e-10,0.929318,0.000473063,-1.39069e-07,-2.02636e-08,0.929791,0.000472724,-1.9986e-07,2.18276e-08,0.930263,0.000472389,-1.34377e-07,-7.44231e-09,0.930736,0.000472098,-1.56704e-07,7.94165e-09,0.931208,0.000471809,-1.32879e-07,-2.43243e-08,0.931679,0.00047147,-2.05851e-07,2.97508e-08,0.932151,0.000471148,-1.16599e-07,-3.50742e-08,0.932622,0.000470809,-2.21822e-07,5.09414e-08,0.933092,0.000470518,-6.89976e-08,-4.94821e-08,0.933563,0.000470232,-2.17444e-07,2.77775e-08,0.934033,0.00046988,-1.34111e-07,-2.02351e-09,0.934502,0.000469606,-1.40182e-07,-1.96835e-08,0.934972,0.000469267,-1.99232e-07,2.11529e-08,0.935441,0.000468932,-1.35774e-07,-5.32332e-09,0.93591,0.000468644,-1.51743e-07,1.40413e-10,0.936378,0.000468341,-1.51322e-07,4.76166e-09,0.936846,0.000468053,-1.37037e-07,-1.9187e-08,0.937314,0.000467721,-1.94598e-07,1.23819e-08,0.937782,0.000467369,-1.57453e-07,2.92642e-08,0.938249,0.000467142,-6.96601e-08,-6.98342e-08,0.938716,0.000466793,-2.79163e-07,7.12586e-08,0.939183,0.000466449,-6.53869e-08,-3.63863e-08,0.939649,0.000466209,-1.74546e-07,1.46818e-08,0.940115,0.000465904,-1.305e-07,-2.2341e-08,0.940581,0.000465576,-1.97523e-07,1.50774e-08,0.941046,0.000465226,-1.52291e-07,2.16359e-08,0.941511,0.000464986,-8.73832e-08,-4.20162e-08,0.941976,0.000464685,-2.13432e-07,2.72198e-08,0.942441,0.00046434,-1.31773e-07,-7.2581e-09,0.942905,0.000464055,-1.53547e-07,1.81263e-09,0.943369,0.000463753,-1.48109e-07,7.58386e-12,0.943832,0.000463457,-1.48086e-07,-1.84298e-09,0.944296,0.000463155,-1.53615e-07,7.36433e-09,0.944759,0.00046287,-1.31522e-07,-2.76143e-08,0.945221,0.000462524,-2.14365e-07,4.34883e-08,0.945684,0.000462226,-8.39003e-08,-2.71297e-08,0.946146,0.000461977,-1.65289e-07,5.42595e-09,0.946608,0.000461662,-1.49012e-07,5.42593e-09,0.947069,0.000461381,-1.32734e-07,-2.71297e-08,0.94753,0.000461034,-2.14123e-07,4.34881e-08,0.947991,0.000460736,-8.36585e-08,-2.76134e-08,0.948452,0.000460486,-1.66499e-07,7.36083e-09,0.948912,0.000460175,-1.44416e-07,-1.82993e-09,0.949372,0.000459881,-1.49906e-07,-4.11073e-11,0.949832,0.000459581,-1.50029e-07,1.99434e-09,0.950291,0.000459287,-1.44046e-07,-7.93627e-09,0.950751,0.000458975,-1.67855e-07,2.97507e-08,0.951209,0.000458728,-7.86029e-08,-5.1462e-08,0.951668,0.000458417,-2.32989e-07,5.6888e-08,0.952126,0.000458121,-6.2325e-08,-5.68806e-08,0.952584,0.000457826,-2.32967e-07,5.14251e-08,0.953042,0.000457514,-7.86914e-08,-2.96107e-08,0.953499,0.000457268,-1.67523e-07,7.41296e-09,0.953956,0.000456955,-1.45285e-07,-4.11262e-11,0.954413,0.000456665,-1.45408e-07,-7.24847e-09,0.95487,0.000456352,-1.67153e-07,2.9035e-08,0.955326,0.000456105,-8.00484e-08,-4.92869e-08,0.955782,0.000455797,-2.27909e-07,4.89032e-08,0.956238,0.000455488,-8.11994e-08,-2.71166e-08,0.956693,0.000455244,-1.62549e-07,-4.13678e-11,0.957148,0.000454919,-1.62673e-07,2.72821e-08,0.957603,0.000454675,-8.0827e-08,-4.94824e-08,0.958057,0.000454365,-2.29274e-07,5.14382e-08,0.958512,0.000454061,-7.49597e-08,-3.7061e-08,0.958965,0.0004538,-1.86143e-07,3.72013e-08,0.959419,0.000453539,-7.45389e-08,-5.21396e-08,0.959873,0.000453234,-2.30958e-07,5.21476e-08,0.960326,0.000452928,-7.45146e-08,-3.72416e-08,0.960778,0.000452667,-1.8624e-07,3.72143e-08,0.961231,0.000452407,-7.45967e-08,-5.20109e-08,0.961683,0.000452101,-2.30629e-07,5.16199e-08,0.962135,0.000451795,-7.57696e-08,-3.52595e-08,0.962587,0.000451538,-1.81548e-07,2.98133e-08,0.963038,0.000451264,-9.2108e-08,-2.43892e-08,0.963489,0.000451007,-1.65276e-07,8.13892e-09,0.96394,0.000450701,-1.40859e-07,-8.16647e-09,0.964391,0.000450394,-1.65358e-07,2.45269e-08,0.964841,0.000450137,-9.17775e-08,-3.03367e-08,0.965291,0.000449863,-1.82787e-07,3.7215e-08,0.965741,0.000449609,-7.11424e-08,-5.89188e-08,0.96619,0.00044929,-2.47899e-07,7.92509e-08,0.966639,0.000449032,-1.01462e-08,-7.92707e-08,0.967088,0.000448773,-2.47958e-07,5.90181e-08,0.967537,0.000448455,-7.0904e-08,-3.75925e-08,0.967985,0.0004482,-1.83681e-07,3.17471e-08,0.968433,0.000447928,-8.84401e-08,-2.97913e-08,0.968881,0.000447662,-1.77814e-07,2.78133e-08,0.969329,0.000447389,-9.4374e-08,-2.18572e-08,0.969776,0.000447135,-1.59946e-07,1.10134e-11,0.970223,0.000446815,-1.59913e-07,2.18132e-08,0.97067,0.000446561,-9.44732e-08,-2.76591e-08,0.971116,0.000446289,-1.7745e-07,2.92185e-08,0.971562,0.000446022,-8.97948e-08,-2.96104e-08,0.972008,0.000445753,-1.78626e-07,2.96185e-08,0.972454,0.000445485,-8.97706e-08,-2.92588e-08,0.972899,0.000445218,-1.77547e-07,2.78123e-08,0.973344,0.000444946,-9.41103e-08,-2.23856e-08,0.973789,0.000444691,-1.61267e-07,2.12559e-09,0.974233,0.000444374,-1.5489e-07,1.38833e-08,0.974678,0.000444106,-1.13241e-07,1.94591e-09,0.975122,0.000443886,-1.07403e-07,-2.16669e-08,0.975565,0.000443606,-1.72404e-07,2.5117e-08,0.976009,0.000443336,-9.70526e-08,-1.91963e-08,0.976452,0.000443085,-1.54642e-07,-7.93627e-09,0.976895,0.000442752,-1.7845e-07,5.09414e-08,0.977338,0.000442548,-2.56262e-08,-7.66201e-08,0.97778,0.000442266,-2.55486e-07,7.67249e-08,0.978222,0.000441986,-2.53118e-08,-5.14655e-08,0.978664,0.000441781,-1.79708e-07,9.92773e-09,0.979106,0.000441451,-1.49925e-07,1.17546e-08,0.979547,0.000441186,-1.14661e-07,2.65868e-09,0.979988,0.000440965,-1.06685e-07,-2.23893e-08,0.980429,0.000440684,-1.73853e-07,2.72939e-08,0.980869,0.000440419,-9.19716e-08,-2.71816e-08,0.98131,0.000440153,-1.73516e-07,2.18278e-08,0.98175,0.000439872,-1.08033e-07,-5.24833e-10,0.982189,0.000439654,-1.09607e-07,-1.97284e-08,0.982629,0.000439376,-1.68793e-07,1.98339e-08,0.983068,0.000439097,-1.09291e-07,-2.62901e-12,0.983507,0.000438879,-1.09299e-07,-1.98234e-08,0.983946,0.000438601,-1.68769e-07,1.96916e-08,0.984384,0.000438322,-1.09694e-07,6.6157e-10,0.984823,0.000438105,-1.0771e-07,-2.23379e-08,0.985261,0.000437823,-1.74723e-07,2.90855e-08,0.985698,0.00043756,-8.74669e-08,-3.43992e-08,0.986136,0.000437282,-1.90665e-07,4.89068e-08,0.986573,0.000437048,-4.39442e-08,-4.20188e-08,0.98701,0.000436834,-1.7e-07,-4.11073e-11,0.987446,0.000436494,-1.70124e-07,4.21832e-08,0.987883,0.00043628,-4.35742e-08,-4.94824e-08,0.988319,0.000436044,-1.92021e-07,3.6537e-08,0.988755,0.00043577,-8.24102e-08,-3.70611e-08,0.989191,0.000435494,-1.93593e-07,5.21026e-08,0.989626,0.000435263,-3.72855e-08,-5.21402e-08,0.990061,0.000435032,-1.93706e-07,3.7249e-08,0.990496,0.000434756,-8.19592e-08,-3.72512e-08,0.990931,0.000434481,-1.93713e-07,5.21511e-08,0.991365,0.00043425,-3.72595e-08,-5.21439e-08,0.991799,0.000434019,-1.93691e-07,3.72152e-08,0.992233,0.000433743,-8.20456e-08,-3.71123e-08,0.992667,0.000433468,-1.93382e-07,5.16292e-08,0.9931,0.000433236,-3.84947e-08,-5.01953e-08,0.993533,0.000433008,-1.89081e-07,2.99427e-08,0.993966,0.00043272,-9.92525e-08,-9.9708e-09,0.994399,0.000432491,-1.29165e-07,9.94051e-09,0.994831,0.000432263,-9.93434e-08,-2.97912e-08,0.995263,0.000431975,-1.88717e-07,4.96198e-08,0.995695,0.000431746,-3.98578e-08,-4.94785e-08,0.996127,0.000431518,-1.88293e-07,2.9085e-08,0.996558,0.000431229,-1.01038e-07,-7.25675e-09,0.996989,0.000431005,-1.22809e-07,-5.79945e-11,0.99742,0.000430759,-1.22983e-07,7.48873e-09,0.997851,0.000430536,-1.00516e-07,-2.98969e-08,0.998281,0.000430245,-1.90207e-07,5.24942e-08,0.998711,0.000430022,-3.27246e-08,-6.08706e-08,0.999141,0.000429774,-2.15336e-07,7.17788e-08,0.999571,0.000429392,0.,0.}; + + template + __device__ __forceinline__ void Lab2RGBConvert_f(const T& src, D& dst) + { + const float lThresh = 0.008856f * 903.3f; + const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f; + + float Y, fy; + + if (src.x <= lThresh) + { + Y = src.x / 903.3f; + fy = 7.787f * Y + 16.0f / 116.0f; + } + else + { + fy = (src.x + 16.0f) / 116.0f; + Y = fy * fy * fy; + } + + float X = src.y / 500.0f + fy; + float Z = fy - src.z / 200.0f; + + if (X <= fThresh) + X = (X - 16.0f / 116.0f) / 7.787f; + else + X = X * X * X; + + if (Z <= fThresh) + Z = (Z - 16.0f / 116.0f) / 7.787f; + else + Z = Z * Z * Z; + + float B = 0.052891f * X - 0.204043f * Y + 1.151152f * Z; + float G = -0.921235f * X + 1.875991f * Y + 0.045244f * Z; + float R = 3.079933f * X - 1.537150f * Y - 0.542782f * Z; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + } + + dst.x = blueIdx == 0 ? B : R; + dst.y = G; + dst.z = blueIdx == 0 ? R : B; + setAlpha(dst, ColorChannel::max()); + } + + template + __device__ __forceinline__ void Lab2RGBConvert_b(const T& src, D& dst) + { + float3 srcf, dstf; + + srcf.x = src.x * (100.f / 255.f); + srcf.y = src.y - 128; + srcf.z = src.z - 128; + + Lab2RGBConvert_f(srcf, dstf); + + dst.x = saturate_cast(dstf.x * 255.f); + dst.y = saturate_cast(dstf.y * 255.f); + dst.z = saturate_cast(dstf.z * 255.f); + setAlpha(dst, ColorChannel::max()); + } + + template struct Lab2RGB; + template + struct Lab2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Lab2RGBConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} + }; + template + struct Lab2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Lab2RGBConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Lab2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + +///////////////////////////////////// RGB <-> Luv ///////////////////////////////////// + + namespace color_detail + { + __constant__ float c_LabCbrtTab[] = {0.137931,0.0114066,0.,1.18859e-07,0.149338,0.011407,3.56578e-07,-5.79396e-07,0.160745,0.0114059,-1.38161e-06,2.16892e-06,0.172151,0.0114097,5.12516e-06,-8.0814e-06,0.183558,0.0113957,-1.9119e-05,3.01567e-05,0.194965,0.0114479,7.13509e-05,-0.000112545,0.206371,0.011253,-0.000266285,-0.000106493,0.217252,0.0104009,-0.000585765,7.32149e-05,0.22714,0.00944906,-0.00036612,1.21917e-05,0.236235,0.0087534,-0.000329545,2.01753e-05,0.244679,0.00815483,-0.000269019,1.24435e-05,0.252577,0.00765412,-0.000231689,1.05618e-05,0.26001,0.00722243,-0.000200003,8.26662e-06,0.267041,0.00684723,-0.000175203,6.76746e-06,0.27372,0.00651712,-0.000154901,5.61192e-06,0.280088,0.00622416,-0.000138065,4.67009e-06,0.286179,0.00596204,-0.000124055,3.99012e-06,0.292021,0.0057259,-0.000112085,3.36032e-06,0.297638,0.00551181,-0.000102004,2.95338e-06,0.30305,0.00531666,-9.31435e-05,2.52875e-06,0.308277,0.00513796,-8.55572e-05,2.22022e-06,0.313331,0.00497351,-7.88966e-05,1.97163e-06,0.318228,0.00482163,-7.29817e-05,1.7248e-06,0.322978,0.00468084,-6.78073e-05,1.55998e-06,0.327593,0.0045499,-6.31274e-05,1.36343e-06,0.332081,0.00442774,-5.90371e-05,1.27136e-06,0.336451,0.00431348,-5.5223e-05,1.09111e-06,0.34071,0.00420631,-5.19496e-05,1.0399e-06,0.344866,0.00410553,-4.88299e-05,9.18347e-07,0.348923,0.00401062,-4.60749e-05,8.29942e-07,0.352889,0.00392096,-4.35851e-05,7.98478e-07,0.356767,0.00383619,-4.11896e-05,6.84917e-07,0.360562,0.00375586,-3.91349e-05,6.63976e-07,0.36428,0.00367959,-3.7143e-05,5.93086e-07,0.367923,0.00360708,-3.53637e-05,5.6976e-07,0.371495,0.00353806,-3.36544e-05,4.95533e-07,0.375,0.00347224,-3.21678e-05,4.87951e-07,0.378441,0.00340937,-3.0704e-05,4.4349e-07,0.38182,0.00334929,-2.93735e-05,4.20297e-07,0.38514,0.0032918,-2.81126e-05,3.7872e-07,0.388404,0.00323671,-2.69764e-05,3.596e-07,0.391614,0.00318384,-2.58976e-05,3.5845e-07,0.394772,0.00313312,-2.48223e-05,2.92765e-07,0.397881,0.00308435,-2.3944e-05,3.18232e-07,0.400942,0.00303742,-2.29893e-05,2.82046e-07,0.403957,0.00299229,-2.21432e-05,2.52315e-07,0.406927,0.00294876,-2.13862e-05,2.58416e-07,0.409855,0.00290676,-2.0611e-05,2.33939e-07,0.412741,0.00286624,-1.99092e-05,2.36342e-07,0.415587,0.00282713,-1.92001e-05,1.916e-07,0.418396,0.00278931,-1.86253e-05,2.1915e-07,0.421167,0.00275271,-1.79679e-05,1.83498e-07,0.423901,0.00271733,-1.74174e-05,1.79343e-07,0.426602,0.00268303,-1.68794e-05,1.72013e-07,0.429268,0.00264979,-1.63633e-05,1.75686e-07,0.431901,0.00261759,-1.58363e-05,1.3852e-07,0.434503,0.00258633,-1.54207e-05,1.64304e-07,0.437074,0.00255598,-1.49278e-05,1.28136e-07,0.439616,0.00252651,-1.45434e-05,1.57618e-07,0.442128,0.0024979,-1.40705e-05,1.0566e-07,0.444612,0.00247007,-1.37535e-05,1.34998e-07,0.447068,0.00244297,-1.33485e-05,1.29207e-07,0.449498,0.00241666,-1.29609e-05,9.32347e-08,0.451902,0.00239102,-1.26812e-05,1.23703e-07,0.45428,0.00236603,-1.23101e-05,9.74072e-08,0.456634,0.0023417,-1.20179e-05,1.12518e-07,0.458964,0.002318,-1.16803e-05,7.83681e-08,0.46127,0.00229488,-1.14452e-05,1.10452e-07,0.463554,0.00227232,-1.11139e-05,7.58719e-08,0.465815,0.00225032,-1.08863e-05,9.2699e-08,0.468055,0.00222882,-1.06082e-05,8.97738e-08,0.470273,0.00220788,-1.03388e-05,5.4845e-08,0.47247,0.00218736,-1.01743e-05,1.0808e-07,0.474648,0.00216734,-9.85007e-06,4.9277e-08,0.476805,0.00214779,-9.70224e-06,8.22408e-08,0.478943,0.00212863,-9.45551e-06,6.87942e-08,0.481063,0.00210993,-9.24913e-06,5.98144e-08,0.483163,0.00209161,-9.06969e-06,7.93789e-08,0.485246,0.00207371,-8.83155e-06,3.99032e-08,0.487311,0.00205616,-8.71184e-06,8.88325e-08,0.489358,0.002039,-8.44534e-06,2.20004e-08,0.491389,0.00202218,-8.37934e-06,9.13872e-08,0.493403,0.0020057,-8.10518e-06,2.96829e-08,0.495401,0.00198957,-8.01613e-06,5.81028e-08,0.497382,0.00197372,-7.84183e-06,6.5731e-08,0.499348,0.00195823,-7.64463e-06,3.66019e-08,0.501299,0.00194305,-7.53483e-06,2.62811e-08,0.503234,0.00192806,-7.45598e-06,9.66907e-08,0.505155,0.00191344,-7.16591e-06,4.18928e-09,0.507061,0.00189912,-7.15334e-06,6.53665e-08,0.508953,0.00188501,-6.95724e-06,3.23686e-08,0.510831,0.00187119,-6.86014e-06,4.35774e-08,0.512696,0.0018576,-6.72941e-06,3.17406e-08,0.514547,0.00184424,-6.63418e-06,6.78785e-08,0.516384,0.00183117,-6.43055e-06,-5.23126e-09,0.518209,0.0018183,-6.44624e-06,7.22562e-08,0.520021,0.00180562,-6.22947e-06,1.42292e-08,0.52182,0.0017932,-6.18679e-06,4.9641e-08,0.523607,0.00178098,-6.03786e-06,2.56259e-08,0.525382,0.00176898,-5.96099e-06,2.66696e-08,0.527145,0.00175714,-5.88098e-06,4.65094e-08,0.528897,0.00174552,-5.74145e-06,2.57114e-08,0.530637,0.00173411,-5.66431e-06,2.94588e-08,0.532365,0.00172287,-5.57594e-06,3.52667e-08,0.534082,0.00171182,-5.47014e-06,8.28868e-09,0.535789,0.00170091,-5.44527e-06,5.07871e-08,0.537484,0.00169017,-5.29291e-06,2.69817e-08,0.539169,0.00167967,-5.21197e-06,2.01009e-08,0.540844,0.0016693,-5.15166e-06,1.18237e-08,0.542508,0.00165903,-5.11619e-06,5.18135e-08,0.544162,0.00164896,-4.96075e-06,1.9341e-08,0.545806,0.00163909,-4.90273e-06,-9.96867e-09,0.54744,0.00162926,-4.93263e-06,8.01382e-08,0.549064,0.00161963,-4.69222e-06,-1.25601e-08,0.550679,0.00161021,-4.7299e-06,2.97067e-08,0.552285,0.00160084,-4.64078e-06,1.29426e-08,0.553881,0.0015916,-4.60195e-06,3.77327e-08,0.555468,0.00158251,-4.48875e-06,1.49412e-08,0.557046,0.00157357,-4.44393e-06,2.17118e-08,0.558615,0.00156475,-4.3788e-06,1.74206e-08,0.560176,0.00155605,-4.32653e-06,2.78152e-08,0.561727,0.00154748,-4.24309e-06,-9.47239e-09,0.563271,0.00153896,-4.27151e-06,6.9679e-08,0.564805,0.00153063,-4.06247e-06,-3.08246e-08,0.566332,0.00152241,-4.15494e-06,5.36188e-08,0.56785,0.00151426,-3.99409e-06,-4.83594e-09,0.56936,0.00150626,-4.00859e-06,2.53293e-08,0.570863,0.00149832,-3.93261e-06,2.27286e-08,0.572357,0.00149052,-3.86442e-06,2.96541e-09,0.573844,0.0014828,-3.85552e-06,2.50147e-08,0.575323,0.00147516,-3.78048e-06,1.61842e-08,0.576794,0.00146765,-3.73193e-06,2.94582e-08,0.578258,0.00146028,-3.64355e-06,-1.48076e-08,0.579715,0.00145295,-3.68798e-06,2.97724e-08,0.581164,0.00144566,-3.59866e-06,1.49272e-08,0.582606,0.00143851,-3.55388e-06,2.97285e-08,0.584041,0.00143149,-3.46469e-06,-1.46323e-08,0.585469,0.00142451,-3.50859e-06,2.88004e-08,0.58689,0.00141758,-3.42219e-06,1.864e-08,0.588304,0.00141079,-3.36627e-06,1.58482e-08,0.589712,0.00140411,-3.31872e-06,-2.24279e-08,0.591112,0.00139741,-3.38601e-06,7.38639e-08,0.592507,0.00139085,-3.16441e-06,-3.46088e-08,0.593894,0.00138442,-3.26824e-06,4.96675e-09,0.595275,0.0013779,-3.25334e-06,7.4346e-08,0.59665,0.00137162,-3.0303e-06,-6.39319e-08,0.598019,0.00136536,-3.2221e-06,6.21725e-08,0.599381,0.00135911,-3.03558e-06,-5.94423e-09,0.600737,0.00135302,-3.05341e-06,2.12091e-08,0.602087,0.00134697,-2.98979e-06,-1.92876e-08,0.603431,0.00134094,-3.04765e-06,5.5941e-08,0.604769,0.00133501,-2.87983e-06,-2.56622e-08,0.606101,0.00132917,-2.95681e-06,4.67078e-08,0.607427,0.0013234,-2.81669e-06,-4.19592e-08,0.608748,0.00131764,-2.94257e-06,6.15243e-08,0.610062,0.00131194,-2.75799e-06,-2.53244e-08,0.611372,0.00130635,-2.83397e-06,3.97739e-08,0.612675,0.0013008,-2.71465e-06,-1.45618e-08,0.613973,0.00129533,-2.75833e-06,1.84733e-08,0.615266,0.00128986,-2.70291e-06,2.73606e-10,0.616553,0.00128446,-2.70209e-06,4.00367e-08,0.617835,0.00127918,-2.58198e-06,-4.12113e-08,0.619111,0.00127389,-2.70561e-06,6.52039e-08,0.620383,0.00126867,-2.51e-06,-4.07901e-08,0.621649,0.00126353,-2.63237e-06,3.83516e-08,0.62291,0.00125838,-2.51732e-06,6.59315e-09,0.624166,0.00125337,-2.49754e-06,-5.11939e-09,0.625416,0.00124836,-2.5129e-06,1.38846e-08,0.626662,0.00124337,-2.47124e-06,9.18514e-09,0.627903,0.00123846,-2.44369e-06,8.97952e-09,0.629139,0.0012336,-2.41675e-06,1.45012e-08,0.63037,0.00122881,-2.37325e-06,-7.37949e-09,0.631597,0.00122404,-2.39538e-06,1.50169e-08,0.632818,0.00121929,-2.35033e-06,6.91648e-09,0.634035,0.00121461,-2.32958e-06,1.69219e-08,0.635248,0.00121,-2.27882e-06,-1.49997e-08,0.636455,0.0012054,-2.32382e-06,4.30769e-08,0.637659,0.00120088,-2.19459e-06,-3.80986e-08,0.638857,0.00119638,-2.30888e-06,4.97134e-08,0.640051,0.00119191,-2.15974e-06,-4.15463e-08,0.641241,0.00118747,-2.28438e-06,5.68667e-08,0.642426,0.00118307,-2.11378e-06,-7.10641e-09,0.643607,0.00117882,-2.1351e-06,-2.8441e-08,0.644784,0.00117446,-2.22042e-06,6.12658e-08,0.645956,0.00117021,-2.03663e-06,-3.78083e-08,0.647124,0.00116602,-2.15005e-06,3.03627e-08,0.648288,0.00116181,-2.05896e-06,-2.40379e-08,0.649448,0.00115762,-2.13108e-06,6.57887e-08,0.650603,0.00115356,-1.93371e-06,-6.03028e-08,0.651755,0.00114951,-2.11462e-06,5.62134e-08,0.652902,0.00114545,-1.94598e-06,-4.53417e-08,0.654046,0.00114142,-2.082e-06,6.55489e-08,0.655185,0.00113745,-1.88536e-06,-3.80396e-08,0.656321,0.00113357,-1.99948e-06,2.70049e-08,0.657452,0.00112965,-1.91846e-06,-1.03755e-08,0.65858,0.00112578,-1.94959e-06,1.44973e-08,0.659704,0.00112192,-1.9061e-06,1.1991e-08,0.660824,0.00111815,-1.87012e-06,-2.85634e-09,0.66194,0.0011144,-1.87869e-06,-5.65782e-10,0.663053,0.00111064,-1.88039e-06,5.11947e-09,0.664162,0.0011069,-1.86503e-06,3.96924e-08,0.665267,0.00110328,-1.74595e-06,-4.46795e-08,0.666368,0.00109966,-1.87999e-06,1.98161e-08,0.667466,0.00109596,-1.82054e-06,2.502e-08,0.66856,0.00109239,-1.74548e-06,-6.86593e-10,0.669651,0.0010889,-1.74754e-06,-2.22739e-08,0.670738,0.00108534,-1.81437e-06,3.01776e-08,0.671821,0.0010818,-1.72383e-06,2.07732e-08,0.672902,0.00107841,-1.66151e-06,-5.36658e-08,0.673978,0.00107493,-1.82251e-06,7.46802e-08,0.675051,0.00107151,-1.59847e-06,-6.62411e-08,0.676121,0.00106811,-1.79719e-06,7.10748e-08,0.677188,0.00106473,-1.58397e-06,-3.92441e-08,0.678251,0.00106145,-1.7017e-06,2.62973e-08,0.679311,0.00105812,-1.62281e-06,-6.34035e-09,0.680367,0.00105486,-1.64183e-06,-9.36249e-10,0.68142,0.00105157,-1.64464e-06,1.00854e-08,0.68247,0.00104831,-1.61438e-06,2.01995e-08,0.683517,0.00104514,-1.55378e-06,-3.1279e-08,0.68456,0.00104194,-1.64762e-06,4.53114e-08,0.685601,0.00103878,-1.51169e-06,-3.07573e-08,0.686638,0.00103567,-1.60396e-06,1.81133e-08,0.687672,0.00103251,-1.54962e-06,1.79085e-08,0.688703,0.00102947,-1.49589e-06,-3.01428e-08,0.689731,0.00102639,-1.58632e-06,4.30583e-08,0.690756,0.00102334,-1.45715e-06,-2.28814e-08,0.691778,0.00102036,-1.52579e-06,-1.11373e-08,0.692797,0.00101727,-1.5592e-06,6.74305e-08,0.693812,0.00101436,-1.35691e-06,-7.97709e-08,0.694825,0.0010114,-1.59622e-06,7.28391e-08,0.695835,0.00100843,-1.37771e-06,-3.27715e-08,0.696842,0.00100558,-1.47602e-06,-1.35807e-09,0.697846,0.00100262,-1.48009e-06,3.82037e-08,0.698847,0.000999775,-1.36548e-06,-3.22474e-08,0.699846,0.000996948,-1.46223e-06,3.11809e-08,0.700841,0.000994117,-1.36868e-06,-3.28714e-08,0.701834,0.000991281,-1.4673e-06,4.07001e-08,0.702824,0.000988468,-1.3452e-06,-1.07197e-08,0.703811,0.000985746,-1.37736e-06,2.17866e-09,0.704795,0.000982998,-1.37082e-06,2.00521e-09,0.705777,0.000980262,-1.3648e-06,-1.01996e-08,0.706756,0.000977502,-1.3954e-06,3.87931e-08,0.707732,0.000974827,-1.27902e-06,-2.57632e-08,0.708706,0.000972192,-1.35631e-06,4.65513e-09,0.709676,0.000969493,-1.34235e-06,7.14257e-09,0.710645,0.00096683,-1.32092e-06,2.63791e-08,0.71161,0.000964267,-1.24178e-06,-5.30543e-08,0.712573,0.000961625,-1.40095e-06,6.66289e-08,0.713533,0.000959023,-1.20106e-06,-3.46474e-08,0.714491,0.000956517,-1.305e-06,1.23559e-08,0.715446,0.000953944,-1.26793e-06,-1.47763e-08,0.716399,0.000951364,-1.31226e-06,4.67494e-08,0.717349,0.000948879,-1.17201e-06,-5.3012e-08,0.718297,0.000946376,-1.33105e-06,4.60894e-08,0.719242,0.000943852,-1.19278e-06,-1.21366e-08,0.720185,0.00094143,-1.22919e-06,2.45673e-09,0.721125,0.000938979,-1.22182e-06,2.30966e-09,0.722063,0.000936543,-1.21489e-06,-1.16954e-08,0.722998,0.000934078,-1.24998e-06,4.44718e-08,0.723931,0.000931711,-1.11656e-06,-4.69823e-08,0.724861,0.000929337,-1.25751e-06,2.4248e-08,0.725789,0.000926895,-1.18477e-06,9.5949e-09,0.726715,0.000924554,-1.15598e-06,-3.02286e-09,0.727638,0.000922233,-1.16505e-06,2.49649e-09,0.72856,0.00091991,-1.15756e-06,-6.96321e-09,0.729478,0.000917575,-1.17845e-06,2.53564e-08,0.730395,0.000915294,-1.10238e-06,-3.48578e-08,0.731309,0.000912984,-1.20695e-06,5.44704e-08,0.732221,0.000910734,-1.04354e-06,-6.38144e-08,0.73313,0.000908455,-1.23499e-06,8.15781e-08,0.734038,0.00090623,-9.90253e-07,-8.3684e-08,0.734943,0.000903999,-1.2413e-06,7.43441e-08,0.735846,0.000901739,-1.01827e-06,-3.48787e-08,0.736746,0.000899598,-1.12291e-06,5.56596e-09,0.737645,0.000897369,-1.10621e-06,1.26148e-08,0.738541,0.000895194,-1.06837e-06,3.57935e-09,0.739435,0.000893068,-1.05763e-06,-2.69322e-08,0.740327,0.000890872,-1.13842e-06,4.45448e-08,0.741217,0.000888729,-1.00479e-06,-3.20376e-08,0.742105,0.000886623,-1.1009e-06,2.40011e-08,0.74299,0.000884493,-1.0289e-06,-4.36209e-09,0.743874,0.000882422,-1.04199e-06,-6.55268e-09,0.744755,0.000880319,-1.06164e-06,3.05728e-08,0.745634,0.000878287,-9.69926e-07,-5.61338e-08,0.746512,0.000876179,-1.13833e-06,7.4753e-08,0.747387,0.000874127,-9.14068e-07,-6.40644e-08,0.74826,0.000872106,-1.10626e-06,6.22955e-08,0.749131,0.000870081,-9.19375e-07,-6.59083e-08,0.75,0.000868044,-1.1171e-06,8.21284e-08,0.750867,0.000866056,-8.70714e-07,-8.37915e-08,0.751732,0.000864064,-1.12209e-06,7.42237e-08,0.752595,0.000862042,-8.99418e-07,-3.42894e-08,0.753456,0.00086014,-1.00229e-06,3.32955e-09,0.754315,0.000858146,-9.92297e-07,2.09712e-08,0.755173,0.000856224,-9.29384e-07,-2.76096e-08,0.756028,0.000854282,-1.01221e-06,2.98627e-08,0.756881,0.000852348,-9.22625e-07,-3.22365e-08,0.757733,0.000850406,-1.01933e-06,3.94786e-08,0.758582,0.000848485,-9.00898e-07,-6.46833e-09,0.75943,0.000846664,-9.20303e-07,-1.36052e-08,0.760275,0.000844783,-9.61119e-07,1.28447e-09,0.761119,0.000842864,-9.57266e-07,8.4674e-09,0.761961,0.000840975,-9.31864e-07,2.44506e-08,0.762801,0.000839185,-8.58512e-07,-4.6665e-08,0.763639,0.000837328,-9.98507e-07,4.30001e-08,0.764476,0.00083546,-8.69507e-07,-6.12609e-09,0.76531,0.000833703,-8.87885e-07,-1.84959e-08,0.766143,0.000831871,-9.43372e-07,2.05052e-08,0.766974,0.000830046,-8.81857e-07,-3.92026e-09,0.767803,0.000828271,-8.93618e-07,-4.82426e-09,0.768631,0.000826469,-9.0809e-07,2.32172e-08,0.769456,0.000824722,-8.38439e-07,-2.84401e-08,0.77028,0.00082296,-9.23759e-07,3.09386e-08,0.771102,0.000821205,-8.30943e-07,-3.57099e-08,0.771922,0.000819436,-9.38073e-07,5.22963e-08,0.772741,0.000817717,-7.81184e-07,-5.42658e-08,0.773558,0.000815992,-9.43981e-07,4.55579e-08,0.774373,0.000814241,-8.07308e-07,-8.75656e-09,0.775186,0.0008126,-8.33578e-07,-1.05315e-08,0.775998,0.000810901,-8.65172e-07,-8.72188e-09,0.776808,0.000809145,-8.91338e-07,4.54191e-08,0.777616,0.000807498,-7.5508e-07,-5.37454e-08,0.778423,0.000805827,-9.16317e-07,5.03532e-08,0.779228,0.000804145,-7.65257e-07,-2.84584e-08,0.780031,0.000802529,-8.50632e-07,3.87579e-09,0.780833,0.00080084,-8.39005e-07,1.29552e-08,0.781633,0.0007992,-8.00139e-07,3.90804e-09,0.782432,0.000797612,-7.88415e-07,-2.85874e-08,0.783228,0.000795949,-8.74177e-07,5.0837e-08,0.784023,0.000794353,-7.21666e-07,-5.55513e-08,0.784817,0.000792743,-8.8832e-07,5.21587e-08,0.785609,0.000791123,-7.31844e-07,-3.38744e-08,0.786399,0.000789558,-8.33467e-07,2.37342e-08,0.787188,0.000787962,-7.62264e-07,-1.45775e-09,0.787975,0.000786433,-7.66638e-07,-1.79034e-08,0.788761,0.000784846,-8.20348e-07,1.34665e-08,0.789545,0.000783246,-7.79948e-07,2.3642e-08,0.790327,0.000781757,-7.09022e-07,-4.84297e-08,0.791108,0.000780194,-8.54311e-07,5.08674e-08,0.791888,0.000778638,-7.01709e-07,-3.58303e-08,0.792666,0.000777127,-8.092e-07,3.28493e-08,0.793442,0.000775607,-7.10652e-07,-3.59624e-08,0.794217,0.000774078,-8.1854e-07,5.13959e-08,0.79499,0.000772595,-6.64352e-07,-5.04121e-08,0.795762,0.000771115,-8.15588e-07,3.10431e-08,0.796532,0.000769577,-7.22459e-07,-1.41557e-08,0.797301,0.00076809,-7.64926e-07,2.55795e-08,0.798069,0.000766636,-6.88187e-07,-2.85578e-08,0.798835,0.000765174,-7.73861e-07,2.90472e-08,0.799599,0.000763714,-6.86719e-07,-2.80262e-08,0.800362,0.000762256,-7.70798e-07,2.34531e-08,0.801123,0.000760785,-7.00438e-07,-6.18144e-09,0.801884,0.000759366,-7.18983e-07,1.27263e-09,0.802642,0.000757931,-7.15165e-07,1.09101e-09,0.803399,0.000756504,-7.11892e-07,-5.63675e-09,0.804155,0.000755064,-7.28802e-07,2.14559e-08,0.80491,0.00075367,-6.64434e-07,-2.05821e-08,0.805663,0.00075228,-7.26181e-07,1.26812e-09,0.806414,0.000750831,-7.22377e-07,1.55097e-08,0.807164,0.000749433,-6.75848e-07,-3.70216e-09,0.807913,0.00074807,-6.86954e-07,-7.0105e-10,0.80866,0.000746694,-6.89057e-07,6.5063e-09,0.809406,0.000745336,-6.69538e-07,-2.53242e-08,0.810151,0.000743921,-7.45511e-07,3.51858e-08,0.810894,0.000742535,-6.39953e-07,3.79034e-09,0.811636,0.000741267,-6.28582e-07,-5.03471e-08,0.812377,0.000739858,-7.79624e-07,7.83886e-08,0.813116,0.000738534,-5.44458e-07,-8.43935e-08,0.813854,0.000737192,-7.97638e-07,8.03714e-08,0.81459,0.000735838,-5.56524e-07,-5.82784e-08,0.815325,0.00073455,-7.31359e-07,3.35329e-08,0.816059,0.000733188,-6.3076e-07,-1.62486e-08,0.816792,0.000731878,-6.79506e-07,3.14614e-08,0.817523,0.000730613,-5.85122e-07,-4.99925e-08,0.818253,0.000729293,-7.35099e-07,4.92994e-08,0.818982,0.000727971,-5.87201e-07,-2.79959e-08,0.819709,0.000726712,-6.71189e-07,3.07959e-09,0.820435,0.000725379,-6.6195e-07,1.56777e-08,0.82116,0.000724102,-6.14917e-07,-6.18564e-09,0.821883,0.000722854,-6.33474e-07,9.06488e-09,0.822606,0.000721614,-6.06279e-07,-3.00739e-08,0.823327,0.000720311,-6.96501e-07,5.16262e-08,0.824046,0.000719073,-5.41623e-07,-5.72214e-08,0.824765,0.000717818,-7.13287e-07,5.80503e-08,0.825482,0.000716566,-5.39136e-07,-5.57703e-08,0.826198,0.00071532,-7.06447e-07,4.58215e-08,0.826912,0.000714045,-5.68983e-07,-8.30636e-09,0.827626,0.000712882,-5.93902e-07,-1.25961e-08,0.828338,0.000711656,-6.3169e-07,-9.13985e-10,0.829049,0.00071039,-6.34432e-07,1.62519e-08,0.829759,0.00070917,-5.85676e-07,-4.48904e-09,0.830468,0.000707985,-5.99143e-07,1.70418e-09,0.831175,0.000706792,-5.9403e-07,-2.32768e-09,0.831881,0.000705597,-6.01014e-07,7.60648e-09,0.832586,0.000704418,-5.78194e-07,-2.80982e-08,0.83329,0.000703177,-6.62489e-07,4.51817e-08,0.833993,0.000701988,-5.26944e-07,-3.34192e-08,0.834694,0.000700834,-6.27201e-07,2.88904e-08,0.835394,0.000699666,-5.4053e-07,-2.25378e-08,0.836093,0.000698517,-6.08143e-07,1.65589e-09,0.836791,0.000697306,-6.03176e-07,1.59142e-08,0.837488,0.000696147,-5.55433e-07,-5.70801e-09,0.838184,0.000695019,-5.72557e-07,6.91792e-09,0.838878,0.000693895,-5.51803e-07,-2.19637e-08,0.839571,0.000692725,-6.17694e-07,2.13321e-08,0.840263,0.000691554,-5.53698e-07,-3.75996e-09,0.840954,0.000690435,-5.64978e-07,-6.29219e-09,0.841644,0.000689287,-5.83855e-07,2.89287e-08,0.842333,0.000688206,-4.97068e-07,-4.98181e-08,0.843021,0.000687062,-6.46523e-07,5.11344e-08,0.843707,0.000685922,-4.9312e-07,-3.55102e-08,0.844393,0.00068483,-5.9965e-07,3.13019e-08,0.845077,0.000683724,-5.05745e-07,-3.00925e-08,0.84576,0.000682622,-5.96022e-07,2.94636e-08,0.846442,0.000681519,-5.07631e-07,-2.81572e-08,0.847123,0.000680419,-5.92103e-07,2.35606e-08,0.847803,0.000679306,-5.21421e-07,-6.48045e-09,0.848482,0.000678243,-5.40863e-07,2.36124e-09,0.849159,0.000677169,-5.33779e-07,-2.96461e-09,0.849836,0.000676092,-5.42673e-07,9.49728e-09,0.850512,0.000675035,-5.14181e-07,-3.50245e-08,0.851186,0.000673902,-6.19254e-07,7.09959e-08,0.851859,0.000672876,-4.06267e-07,-7.01453e-08,0.852532,0.000671853,-6.16703e-07,3.07714e-08,0.853203,0.000670712,-5.24388e-07,6.66423e-09,0.853873,0.000669684,-5.04396e-07,2.17629e-09,0.854542,0.000668681,-4.97867e-07,-1.53693e-08,0.855211,0.000667639,-5.43975e-07,-3.03752e-10,0.855878,0.000666551,-5.44886e-07,1.65844e-08,0.856544,0.000665511,-4.95133e-07,-6.42907e-09,0.857209,0.000664501,-5.1442e-07,9.13195e-09,0.857873,0.0006635,-4.87024e-07,-3.00987e-08,0.858536,0.000662435,-5.7732e-07,5.16584e-08,0.859198,0.000661436,-4.22345e-07,-5.73255e-08,0.859859,0.000660419,-5.94322e-07,5.84343e-08,0.860518,0.000659406,-4.19019e-07,-5.72022e-08,0.861177,0.000658396,-5.90626e-07,5.11653e-08,0.861835,0.000657368,-4.3713e-07,-2.82495e-08,0.862492,0.000656409,-5.21878e-07,2.22788e-09,0.863148,0.000655372,-5.15195e-07,1.9338e-08,0.863803,0.0006544,-4.5718e-07,-1.99754e-08,0.864457,0.000653425,-5.17107e-07,9.59024e-10,0.86511,0.000652394,-5.1423e-07,1.61393e-08,0.865762,0.000651414,-4.65812e-07,-5.91149e-09,0.866413,0.000650465,-4.83546e-07,7.50665e-09,0.867063,0.00064952,-4.61026e-07,-2.4115e-08,0.867712,0.000648526,-5.33371e-07,2.93486e-08,0.86836,0.000647547,-4.45325e-07,-3.36748e-08,0.869007,0.000646555,-5.4635e-07,4.57461e-08,0.869653,0.0006456,-4.09112e-07,-3.01002e-08,0.870298,0.000644691,-4.99412e-07,1.50501e-08,0.870942,0.000643738,-4.54262e-07,-3.01002e-08,0.871585,0.000642739,-5.44563e-07,4.57461e-08,0.872228,0.000641787,-4.07324e-07,-3.36748e-08,0.872869,0.000640871,-5.08349e-07,2.93486e-08,0.873509,0.000639943,-4.20303e-07,-2.4115e-08,0.874149,0.00063903,-4.92648e-07,7.50655e-09,0.874787,0.000638067,-4.70128e-07,-5.91126e-09,0.875425,0.000637109,-4.87862e-07,1.61385e-08,0.876062,0.000636182,-4.39447e-07,9.61961e-10,0.876697,0.000635306,-4.36561e-07,-1.99863e-08,0.877332,0.000634373,-4.9652e-07,1.93785e-08,0.877966,0.000633438,-4.38384e-07,2.07697e-09,0.878599,0.000632567,-4.32153e-07,-2.76864e-08,0.879231,0.00063162,-5.15212e-07,4.90641e-08,0.879862,0.000630737,-3.6802e-07,-4.93606e-08,0.880493,0.000629852,-5.16102e-07,2.9169e-08,0.881122,0.000628908,-4.28595e-07,-7.71083e-09,0.881751,0.000628027,-4.51727e-07,1.6744e-09,0.882378,0.000627129,-4.46704e-07,1.01317e-09,0.883005,0.000626239,-4.43665e-07,-5.72703e-09,0.883631,0.000625334,-4.60846e-07,2.1895e-08,0.884255,0.000624478,-3.95161e-07,-2.22481e-08,0.88488,0.000623621,-4.61905e-07,7.4928e-09,0.885503,0.00062272,-4.39427e-07,-7.72306e-09,0.886125,0.000621818,-4.62596e-07,2.33995e-08,0.886746,0.000620963,-3.92398e-07,-2.62704e-08,0.887367,0.000620099,-4.71209e-07,2.20775e-08,0.887987,0.000619223,-4.04976e-07,-2.43496e-09,0.888605,0.000618406,-4.12281e-07,-1.23377e-08,0.889223,0.000617544,-4.49294e-07,-7.81876e-09,0.88984,0.000616622,-4.72751e-07,4.36128e-08,0.890457,0.000615807,-3.41912e-07,-4.7423e-08,0.891072,0.000614981,-4.84181e-07,2.68698e-08,0.891687,0.000614093,-4.03572e-07,-4.51384e-10,0.8923,0.000613285,-4.04926e-07,-2.50643e-08,0.892913,0.0006124,-4.80119e-07,4.11038e-08,0.893525,0.000611563,-3.56808e-07,-2.01414e-08,0.894136,0.000610789,-4.17232e-07,-2.01426e-08,0.894747,0.000609894,-4.7766e-07,4.11073e-08,0.895356,0.000609062,-3.54338e-07,-2.50773e-08,0.895965,0.000608278,-4.2957e-07,-4.02954e-10,0.896573,0.000607418,-4.30779e-07,2.66891e-08,0.89718,0.000606636,-3.50711e-07,-4.67489e-08,0.897786,0.000605795,-4.90958e-07,4.10972e-08,0.898391,0.000604936,-3.67666e-07,1.56948e-09,0.898996,0.000604205,-3.62958e-07,-4.73751e-08,0.8996,0.000603337,-5.05083e-07,6.87214e-08,0.900202,0.000602533,-2.98919e-07,-4.86966e-08,0.900805,0.000601789,-4.45009e-07,6.85589e-09,0.901406,0.00060092,-4.24441e-07,2.1273e-08,0.902007,0.000600135,-3.60622e-07,-3.23434e-08,0.902606,0.000599317,-4.57652e-07,4.84959e-08,0.903205,0.000598547,-3.12164e-07,-4.24309e-08,0.903803,0.000597795,-4.39457e-07,2.01844e-09,0.904401,0.000596922,-4.33402e-07,3.43571e-08,0.904997,0.000596159,-3.30331e-07,-2.02374e-08,0.905593,0.000595437,-3.91043e-07,-1.30123e-08,0.906188,0.000594616,-4.3008e-07,1.26819e-08,0.906782,0.000593794,-3.92034e-07,2.18894e-08,0.907376,0.000593076,-3.26366e-07,-4.06349e-08,0.907968,0.000592301,-4.4827e-07,2.1441e-08,0.90856,0.000591469,-3.83947e-07,1.44754e-08,0.909151,0.000590744,-3.40521e-07,-1.97379e-08,0.909742,0.000590004,-3.99735e-07,4.87161e-09,0.910331,0.000589219,-3.8512e-07,2.51532e-10,0.91092,0.00058845,-3.84366e-07,-5.87776e-09,0.911508,0.000587663,-4.01999e-07,2.32595e-08,0.912096,0.000586929,-3.3222e-07,-2.75554e-08,0.912682,0.000586182,-4.14887e-07,2.73573e-08,0.913268,0.000585434,-3.32815e-07,-2.22692e-08,0.913853,0.000584702,-3.99622e-07,2.11486e-09,0.914437,0.000583909,-3.93278e-07,1.38098e-08,0.915021,0.000583164,-3.51848e-07,2.25042e-09,0.915604,0.000582467,-3.45097e-07,-2.28115e-08,0.916186,0.000581708,-4.13531e-07,2.93911e-08,0.916767,0.000580969,-3.25358e-07,-3.51481e-08,0.917348,0.000580213,-4.30803e-07,5.15967e-08,0.917928,0.000579506,-2.76012e-07,-5.20296e-08,0.918507,0.000578798,-4.32101e-07,3.73124e-08,0.919085,0.000578046,-3.20164e-07,-3.76154e-08,0.919663,0.000577293,-4.3301e-07,5.35447e-08,0.92024,0.000576587,-2.72376e-07,-5.7354e-08,0.920816,0.000575871,-4.44438e-07,5.66621e-08,0.921391,0.000575152,-2.74452e-07,-5.00851e-08,0.921966,0.000574453,-4.24707e-07,2.4469e-08,0.92254,0.000573677,-3.513e-07,1.18138e-08,0.923114,0.000573009,-3.15859e-07,-1.21195e-08,0.923686,0.000572341,-3.52217e-07,-2.29403e-08,0.924258,0.000571568,-4.21038e-07,4.4276e-08,0.924829,0.000570859,-2.8821e-07,-3.49546e-08,0.9254,0.000570178,-3.93074e-07,3.59377e-08,0.92597,0.000569499,-2.85261e-07,-4.91915e-08,0.926539,0.000568781,-4.32835e-07,4.16189e-08,0.927107,0.00056804,-3.07979e-07,1.92523e-09,0.927675,0.00056743,-3.02203e-07,-4.93198e-08,0.928242,0.000566678,-4.50162e-07,7.61447e-08,0.928809,0.000566006,-2.21728e-07,-7.6445e-08,0.929374,0.000565333,-4.51063e-07,5.08216e-08,0.929939,0.000564583,-2.98599e-07,-7.63212e-09,0.930503,0.000563963,-3.21495e-07,-2.02931e-08,0.931067,0.000563259,-3.82374e-07,2.92001e-08,0.93163,0.000562582,-2.94774e-07,-3.69025e-08,0.932192,0.000561882,-4.05482e-07,5.88053e-08,0.932754,0.000561247,-2.29066e-07,-7.91094e-08,0.933315,0.000560552,-4.66394e-07,7.88184e-08,0.933875,0.000559856,-2.29939e-07,-5.73501e-08,0.934434,0.000559224,-4.01989e-07,3.13727e-08,0.934993,0.000558514,-3.07871e-07,-8.53611e-09,0.935551,0.000557873,-3.33479e-07,2.77175e-09,0.936109,0.000557214,-3.25164e-07,-2.55091e-09,0.936666,0.000556556,-3.32817e-07,7.43188e-09,0.937222,0.000555913,-3.10521e-07,-2.71766e-08,0.937778,0.00055521,-3.92051e-07,4.167e-08,0.938333,0.000554551,-2.67041e-07,-2.02941e-08,0.938887,0.000553956,-3.27923e-07,-2.00984e-08,0.93944,0.00055324,-3.88218e-07,4.10828e-08,0.939993,0.000552587,-2.6497e-07,-2.50237e-08,0.940546,0.000551982,-3.40041e-07,-5.92583e-10,0.941097,0.0005513,-3.41819e-07,2.7394e-08,0.941648,0.000550698,-2.59637e-07,-4.93788e-08,0.942199,0.000550031,-4.07773e-07,5.09119e-08,0.942748,0.000549368,-2.55038e-07,-3.50595e-08,0.943297,0.000548753,-3.60216e-07,2.97214e-08,0.943846,0.000548122,-2.71052e-07,-2.42215e-08,0.944394,0.000547507,-3.43716e-07,7.55985e-09,0.944941,0.000546842,-3.21037e-07,-6.01796e-09,0.945487,0.000546182,-3.3909e-07,1.65119e-08,0.946033,0.000545553,-2.89555e-07,-4.2498e-10,0.946578,0.000544973,-2.9083e-07,-1.4812e-08,0.947123,0.000544347,-3.35266e-07,6.83068e-11,0.947667,0.000543676,-3.35061e-07,1.45388e-08,0.94821,0.00054305,-2.91444e-07,1.38123e-09,0.948753,0.000542471,-2.87301e-07,-2.00637e-08,0.949295,0.000541836,-3.47492e-07,1.92688e-08,0.949837,0.000541199,-2.89685e-07,2.59298e-09,0.950378,0.000540628,-2.81906e-07,-2.96407e-08,0.950918,0.000539975,-3.70829e-07,5.63652e-08,0.951458,0.000539402,-2.01733e-07,-7.66107e-08,0.951997,0.000538769,-4.31565e-07,7.12638e-08,0.952535,0.00053812,-2.17774e-07,-2.96305e-08,0.953073,0.000537595,-3.06665e-07,-1.23464e-08,0.95361,0.000536945,-3.43704e-07,1.94114e-08,0.954147,0.000536316,-2.8547e-07,-5.69451e-09,0.954683,0.000535728,-3.02554e-07,3.36666e-09,0.955219,0.000535133,-2.92454e-07,-7.77208e-09,0.955753,0.000534525,-3.1577e-07,2.77216e-08,0.956288,0.000533976,-2.32605e-07,-4.35097e-08,0.956821,0.00053338,-3.63134e-07,2.7108e-08,0.957354,0.000532735,-2.8181e-07,-5.31772e-09,0.957887,0.000532156,-2.97764e-07,-5.83718e-09,0.958419,0.000531543,-3.15275e-07,2.86664e-08,0.95895,0.000530998,-2.29276e-07,-4.9224e-08,0.959481,0.000530392,-3.76948e-07,4.90201e-08,0.960011,0.000529785,-2.29887e-07,-2.76471e-08,0.96054,0.000529243,-3.12829e-07,1.96385e-09,0.961069,0.000528623,-3.06937e-07,1.97917e-08,0.961598,0.000528068,-2.47562e-07,-2.15261e-08,0.962125,0.000527508,-3.1214e-07,6.70795e-09,0.962653,0.000526904,-2.92016e-07,-5.30573e-09,0.963179,0.000526304,-3.07934e-07,1.4515e-08,0.963705,0.000525732,-2.64389e-07,6.85048e-09,0.964231,0.000525224,-2.43837e-07,-4.19169e-08,0.964756,0.00052461,-3.69588e-07,4.1608e-08,0.96528,0.000523996,-2.44764e-07,-5.30598e-09,0.965804,0.000523491,-2.60682e-07,-2.03841e-08,0.966327,0.000522908,-3.21834e-07,2.72378e-08,0.966849,0.000522346,-2.40121e-07,-2.89625e-08,0.967371,0.000521779,-3.27008e-07,2.90075e-08,0.967893,0.000521212,-2.39986e-07,-2.74629e-08,0.968414,0.00052065,-3.22374e-07,2.12396e-08,0.968934,0.000520069,-2.58656e-07,2.10922e-09,0.969454,0.000519558,-2.52328e-07,-2.96765e-08,0.969973,0.000518964,-3.41357e-07,5.6992e-08,0.970492,0.000518452,-1.70382e-07,-7.90821e-08,0.97101,0.000517874,-4.07628e-07,8.05224e-08,0.971528,0.000517301,-1.66061e-07,-6.41937e-08,0.972045,0.000516776,-3.58642e-07,5.70429e-08,0.972561,0.00051623,-1.87513e-07,-4.47686e-08,0.973077,0.00051572,-3.21819e-07,2.82237e-09,0.973593,0.000515085,-3.13352e-07,3.34792e-08,0.974108,0.000514559,-2.12914e-07,-1.75298e-08,0.974622,0.000514081,-2.65503e-07,-2.29648e-08,0.975136,0.000513481,-3.34398e-07,4.97843e-08,0.975649,0.000512961,-1.85045e-07,-5.6963e-08,0.976162,0.00051242,-3.55934e-07,5.88585e-08,0.976674,0.000511885,-1.79359e-07,-5.92616e-08,0.977185,0.000511348,-3.57143e-07,5.89785e-08,0.977696,0.000510811,-1.80208e-07,-5.74433e-08,0.978207,0.000510278,-3.52538e-07,5.15854e-08,0.978717,0.000509728,-1.97781e-07,-2.9689e-08,0.979226,0.000509243,-2.86848e-07,7.56591e-09,0.979735,0.000508692,-2.64151e-07,-5.74649e-10,0.980244,0.000508162,-2.65875e-07,-5.26732e-09,0.980752,0.000507615,-2.81677e-07,2.16439e-08,0.981259,0.000507116,-2.16745e-07,-2.17037e-08,0.981766,0.000506618,-2.81856e-07,5.56636e-09,0.982272,0.000506071,-2.65157e-07,-5.61689e-10,0.982778,0.000505539,-2.66842e-07,-3.31963e-09,0.983283,0.000504995,-2.76801e-07,1.38402e-08,0.983788,0.000504483,-2.3528e-07,7.56339e-09,0.984292,0.000504035,-2.1259e-07,-4.40938e-08,0.984796,0.000503478,-3.44871e-07,4.96026e-08,0.985299,0.000502937,-1.96064e-07,-3.51071e-08,0.985802,0.000502439,-3.01385e-07,3.12212e-08,0.986304,0.00050193,-2.07721e-07,-3.0173e-08,0.986806,0.000501424,-2.9824e-07,2.9866e-08,0.987307,0.000500917,-2.08642e-07,-2.96865e-08,0.987808,0.000500411,-2.97702e-07,2.92753e-08,0.988308,0.000499903,-2.09876e-07,-2.78101e-08,0.988807,0.0004994,-2.93306e-07,2.23604e-08,0.989307,0.000498881,-2.26225e-07,-2.02681e-09,0.989805,0.000498422,-2.32305e-07,-1.42531e-08,0.990303,0.000497915,-2.75065e-07,-5.65232e-10,0.990801,0.000497363,-2.76761e-07,1.65141e-08,0.991298,0.000496859,-2.27218e-07,-5.88639e-09,0.991795,0.000496387,-2.44878e-07,7.0315e-09,0.992291,0.000495918,-2.23783e-07,-2.22396e-08,0.992787,0.000495404,-2.90502e-07,2.23224e-08,0.993282,0.00049489,-2.23535e-07,-7.44543e-09,0.993776,0.000494421,-2.45871e-07,7.45924e-09,0.994271,0.000493951,-2.23493e-07,-2.23915e-08,0.994764,0.000493437,-2.90668e-07,2.25021e-08,0.995257,0.000492923,-2.23161e-07,-8.01218e-09,0.99575,0.000492453,-2.47198e-07,9.54669e-09,0.996242,0.000491987,-2.18558e-07,-3.01746e-08,0.996734,0.000491459,-3.09082e-07,5.1547e-08,0.997225,0.000490996,-1.54441e-07,-5.68039e-08,0.997716,0.000490517,-3.24853e-07,5.64594e-08,0.998206,0.000490036,-1.55474e-07,-4.98245e-08,0.998696,0.000489576,-3.04948e-07,2.36292e-08,0.999186,0.000489037,-2.3406e-07,1.49121e-08,0.999674,0.000488613,-1.89324e-07,-2.3673e-08,1.00016,0.000488164,-2.60343e-07,2.01754e-08,1.00065,0.000487704,-1.99816e-07,-5.70288e-08,1.00114,0.000487133,-3.70903e-07,8.87303e-08,1.00162,0.000486657,-1.04712e-07,-5.94737e-08,1.00211,0.000486269,-2.83133e-07,2.99553e-08,1.0026,0.000485793,-1.93267e-07,-6.03474e-08,1.00308,0.000485225,-3.74309e-07,9.2225e-08,1.00357,0.000484754,-9.76345e-08,-7.0134e-08,1.00405,0.000484348,-3.08036e-07,6.91016e-08,1.00454,0.000483939,-1.00731e-07,-8.70633e-08,1.00502,0.000483476,-3.61921e-07,4.07328e-08,1.0055,0.000482875,-2.39723e-07,4.33413e-08,1.00599,0.000482525,-1.09699e-07,-9.48886e-08,1.00647,0.000482021,-3.94365e-07,9.77947e-08,1.00695,0.000481526,-1.00981e-07,-5.78713e-08,1.00743,0.00048115,-2.74595e-07,1.44814e-08,1.00791,0.000480645,-2.31151e-07,-5.42665e-11,1.00839,0.000480182,-2.31314e-07,-1.42643e-08,1.00887,0.000479677,-2.74106e-07,5.71115e-08,1.00935,0.0004793,-1.02772e-07,-9.49724e-08,1.00983,0.000478809,-3.87689e-07,8.43596e-08,1.01031,0.000478287,-1.3461e-07,-4.04755e-09,1.01079,0.000478006,-1.46753e-07,-6.81694e-08,1.01127,0.000477508,-3.51261e-07,3.83067e-08,1.01174,0.00047692,-2.36341e-07,3.41521e-08,1.01222,0.00047655,-1.33885e-07,-5.57058e-08,1.0127,0.000476115,-3.01002e-07,6.94616e-08,1.01317,0.000475721,-9.26174e-08,-1.02931e-07,1.01365,0.000475227,-4.01412e-07,1.03846e-07,1.01412,0.000474736,-8.98751e-08,-7.40321e-08,1.0146,0.000474334,-3.11971e-07,7.30735e-08,1.01507,0.00047393,-9.27508e-08,-9.90527e-08,1.01554,0.000473447,-3.89909e-07,8.47188e-08,1.01602,0.000472921,-1.35753e-07,-1.40381e-09,1.01649,0.000472645,-1.39964e-07,-7.91035e-08,1.01696,0.000472128,-3.77275e-07,7.93993e-08,1.01744,0.000471612,-1.39077e-07,-7.52607e-11,1.01791,0.000471334,-1.39302e-07,-7.90983e-08,1.01838,0.000470818,-3.76597e-07,7.80499e-08,1.01885,0.000470299,-1.42448e-07,5.31733e-09,1.01932,0.00047003,-1.26496e-07,-9.93193e-08,1.01979,0.000469479,-4.24453e-07,1.53541e-07,1.02026,0.00046909,3.617e-08,-1.57217e-07,1.02073,0.000468691,-4.35482e-07,1.177e-07,1.02119,0.000468173,-8.23808e-08,-7.51659e-08,1.02166,0.000467783,-3.07878e-07,6.37538e-08,1.02213,0.000467358,-1.16617e-07,-6.064e-08,1.0226,0.000466943,-2.98537e-07,5.9597e-08,1.02306,0.000466525,-1.19746e-07,-5.85386e-08,1.02353,0.00046611,-2.95362e-07,5.53482e-08,1.024,0.000465685,-1.29317e-07,-4.36449e-08,1.02446,0.000465296,-2.60252e-07,2.20268e-11,1.02493,0.000464775,-2.60186e-07,4.35568e-08,1.02539,0.000464386,-1.29516e-07,-5.50398e-08,1.02586,0.000463961,-2.94635e-07,5.73932e-08,1.02632,0.000463544,-1.22456e-07,-5.53236e-08,1.02678,0.000463133,-2.88426e-07,4.46921e-08,1.02725,0.000462691,-1.5435e-07,-4.23534e-09,1.02771,0.000462369,-1.67056e-07,-2.77507e-08,1.02817,0.000461952,-2.50308e-07,-3.97101e-09,1.02863,0.000461439,-2.62221e-07,4.36348e-08,1.02909,0.000461046,-1.31317e-07,-5.13589e-08,1.02955,0.000460629,-2.85394e-07,4.25913e-08,1.03001,0.000460186,-1.5762e-07,2.0285e-10,1.03047,0.000459871,-1.57011e-07,-4.34027e-08,1.03093,0.000459427,-2.87219e-07,5.41987e-08,1.03139,0.000459015,-1.24623e-07,-5.4183e-08,1.03185,0.000458604,-2.87172e-07,4.33239e-08,1.03231,0.000458159,-1.572e-07,9.65817e-11,1.03277,0.000457845,-1.56911e-07,-4.37103e-08,1.03323,0.0004574,-2.88041e-07,5.55351e-08,1.03368,0.000456991,-1.21436e-07,-5.9221e-08,1.03414,0.00045657,-2.99099e-07,6.21394e-08,1.0346,0.000456158,-1.1268e-07,-7.01275e-08,1.03505,0.000455723,-3.23063e-07,9.91614e-08,1.03551,0.000455374,-2.55788e-08,-8.80996e-08,1.03596,0.000455058,-2.89878e-07,1.48184e-08,1.03642,0.000454523,-2.45422e-07,2.88258e-08,1.03687,0.000454119,-1.58945e-07,-1.09125e-08,1.03733,0.000453768,-1.91682e-07,1.48241e-08,1.03778,0.000453429,-1.4721e-07,-4.83838e-08,1.03823,0.00045299,-2.92361e-07,5.95019e-08,1.03869,0.000452584,-1.13856e-07,-7.04146e-08,1.03914,0.000452145,-3.25099e-07,1.02947e-07,1.03959,0.000451803,-1.62583e-08,-1.02955e-07,1.04004,0.000451462,-3.25123e-07,7.04544e-08,1.04049,0.000451023,-1.1376e-07,-5.96534e-08,1.04094,0.000450616,-2.9272e-07,4.89499e-08,1.04139,0.000450178,-1.45871e-07,-1.69369e-08,1.04184,0.000449835,-1.96681e-07,1.87977e-08,1.04229,0.000449498,-1.40288e-07,-5.82539e-08,1.04274,0.000449043,-3.1505e-07,9.50087e-08,1.04319,0.000448698,-3.00238e-08,-8.33623e-08,1.04364,0.000448388,-2.80111e-07,2.20363e-11,1.04409,0.000447828,-2.80045e-07,8.32742e-08,1.04454,0.000447517,-3.02221e-08,-9.47002e-08,1.04498,0.000447173,-3.14323e-07,5.7108e-08,1.04543,0.000446716,-1.42999e-07,-1.45225e-08,1.04588,0.000446386,-1.86566e-07,9.82022e-10,1.04632,0.000446016,-1.8362e-07,1.05944e-08,1.04677,0.00044568,-1.51837e-07,-4.33597e-08,1.04721,0.000445247,-2.81916e-07,4.36352e-08,1.04766,0.000444814,-1.51011e-07,-1.19717e-08,1.0481,0.000444476,-1.86926e-07,4.25158e-09,1.04855,0.000444115,-1.74171e-07,-5.03461e-09,1.04899,0.000443751,-1.89275e-07,1.58868e-08,1.04944,0.00044342,-1.41614e-07,-5.85127e-08,1.04988,0.000442961,-3.17152e-07,9.89548e-08,1.05032,0.000442624,-2.0288e-08,-9.88878e-08,1.05076,0.000442287,-3.16951e-07,5.81779e-08,1.05121,0.000441827,-1.42418e-07,-1.46144e-08,1.05165,0.000441499,-1.86261e-07,2.79892e-10,1.05209,0.000441127,-1.85421e-07,1.34949e-08,1.05253,0.000440797,-1.44937e-07,-5.42594e-08,1.05297,0.000440344,-3.07715e-07,8.43335e-08,1.05341,0.000439982,-5.47146e-08,-4.46558e-08,1.05385,0.000439738,-1.88682e-07,-2.49193e-08,1.05429,0.000439286,-2.6344e-07,2.5124e-08,1.05473,0.000438835,-1.88068e-07,4.36328e-08,1.05517,0.000438589,-5.71699e-08,-8.04459e-08,1.05561,0.000438234,-2.98508e-07,3.97324e-08,1.05605,0.000437756,-1.79311e-07,4.07258e-08,1.05648,0.000437519,-5.71332e-08,-8.34263e-08,1.05692,0.000437155,-3.07412e-07,5.45608e-08,1.05736,0.000436704,-1.4373e-07,-1.56078e-08,1.05779,0.000436369,-1.90553e-07,7.87043e-09,1.05823,0.000436012,-1.66942e-07,-1.58739e-08,1.05867,0.00043563,-2.14563e-07,5.56251e-08,1.0591,0.000435368,-4.76881e-08,-8.74172e-08,1.05954,0.000435011,-3.0994e-07,5.56251e-08,1.05997,0.000434558,-1.43064e-07,-1.58739e-08,1.06041,0.000434224,-1.90686e-07,7.87042e-09,1.06084,0.000433866,-1.67075e-07,-1.56078e-08,1.06127,0.000433485,-2.13898e-07,5.45609e-08,1.06171,0.000433221,-5.02157e-08,-8.34263e-08,1.06214,0.00043287,-3.00495e-07,4.07258e-08,1.06257,0.000432391,-1.78317e-07,3.97325e-08,1.063,0.000432154,-5.91198e-08,-8.04464e-08,1.06344,0.000431794,-3.00459e-07,4.36347e-08,1.06387,0.000431324,-1.69555e-07,2.5117e-08,1.0643,0.000431061,-9.42041e-08,-2.48934e-08,1.06473,0.000430798,-1.68884e-07,-4.47527e-08,1.06516,0.000430326,-3.03142e-07,8.46951e-08,1.06559,0.000429973,-4.90573e-08,-5.56089e-08,1.06602,0.000429708,-2.15884e-07,1.85314e-08,1.06645,0.000429332,-1.6029e-07,-1.85166e-08,1.06688,0.000428956,-2.1584e-07,5.5535e-08,1.06731,0.000428691,-4.92347e-08,-8.44142e-08,1.06774,0.000428339,-3.02477e-07,4.37032e-08,1.06816,0.000427865,-1.71368e-07,2.88107e-08,1.06859,0.000427609,-8.49356e-08,-3.97367e-08,1.06902,0.00042732,-2.04146e-07,1.09267e-08,1.06945,0.000426945,-1.71365e-07,-3.97023e-09,1.06987,0.00042659,-1.83276e-07,4.9542e-09,1.0703,0.000426238,-1.68414e-07,-1.58466e-08,1.07073,0.000425854,-2.15953e-07,5.84321e-08,1.07115,0.000425597,-4.0657e-08,-9.86725e-08,1.07158,0.00042522,-3.36674e-07,9.78392e-08,1.072,0.00042484,-4.31568e-08,-5.42658e-08,1.07243,0.000424591,-2.05954e-07,1.45377e-11,1.07285,0.000424179,-2.0591e-07,5.42076e-08,1.07328,0.00042393,-4.32877e-08,-9.76357e-08,1.0737,0.00042355,-3.36195e-07,9.79165e-08,1.07412,0.000423172,-4.24451e-08,-5.56118e-08,1.07455,0.00042292,-2.09281e-07,5.32143e-09,1.07497,0.000422518,-1.93316e-07,3.43261e-08,1.07539,0.000422234,-9.0338e-08,-2.34165e-08,1.07581,0.000421983,-1.60588e-07,-5.98692e-08,1.07623,0.000421482,-3.40195e-07,1.43684e-07,1.07666,0.000421233,9.08574e-08,-1.5724e-07,1.07708,0.000420943,-3.80862e-07,1.27647e-07,1.0775,0.000420564,2.0791e-09,-1.1493e-07,1.07792,0.000420223,-3.4271e-07,9.36534e-08,1.07834,0.000419819,-6.17499e-08,-2.12653e-08,1.07876,0.000419632,-1.25546e-07,-8.59219e-09,1.07918,0.000419355,-1.51322e-07,-6.35752e-08,1.0796,0.000418861,-3.42048e-07,1.43684e-07,1.08002,0.000418608,8.90034e-08,-1.53532e-07,1.08043,0.000418326,-3.71593e-07,1.12817e-07,1.08085,0.000417921,-3.31414e-08,-5.93184e-08,1.08127,0.000417677,-2.11097e-07,5.24697e-09,1.08169,0.00041727,-1.95356e-07,3.83305e-08,1.0821,0.000416995,-8.03642e-08,-3.93597e-08,1.08252,0.000416716,-1.98443e-07,-1.0094e-10,1.08294,0.000416319,-1.98746e-07,3.97635e-08,1.08335,0.00041604,-7.94557e-08,-3.97437e-08,1.08377,0.000415762,-1.98687e-07,1.94215e-12,1.08419,0.000415365,-1.98681e-07,3.97359e-08,1.0846,0.000415087,-7.94732e-08,-3.97362e-08,1.08502,0.000414809,-1.98682e-07,-4.31063e-13,1.08543,0.000414411,-1.98683e-07,3.97379e-08,1.08584,0.000414133,-7.94694e-08,-3.97418e-08,1.08626,0.000413855,-1.98695e-07,2.00563e-11,1.08667,0.000413458,-1.98635e-07,3.96616e-08,1.08709,0.000413179,-7.965e-08,-3.9457e-08,1.0875,0.000412902,-1.98021e-07,-1.04281e-09,1.08791,0.000412502,-2.01149e-07,4.36282e-08,1.08832,0.000412231,-7.02648e-08,-5.42608e-08,1.08874,0.000411928,-2.33047e-07,5.42057e-08,1.08915,0.000411624,-7.04301e-08,-4.33527e-08,1.08956,0.000411353,-2.00488e-07,-4.07378e-12,1.08997,0.000410952,-2.005e-07,4.3369e-08,1.09038,0.000410681,-7.03934e-08,-5.42627e-08,1.09079,0.000410378,-2.33182e-07,5.44726e-08,1.0912,0.000410075,-6.97637e-08,-4.44186e-08,1.09161,0.000409802,-2.03019e-07,3.99235e-09,1.09202,0.000409408,-1.91042e-07,2.84491e-08,1.09243,0.000409111,-1.05695e-07,1.42043e-09,1.09284,0.000408904,-1.01434e-07,-3.41308e-08,1.09325,0.000408599,-2.03826e-07,1.58937e-08,1.09366,0.000408239,-1.56145e-07,-2.94438e-08,1.09406,0.000407838,-2.44476e-07,1.01881e-07,1.09447,0.000407655,6.11676e-08,-1.39663e-07,1.09488,0.000407358,-3.57822e-07,9.91432e-08,1.09529,0.00040694,-6.03921e-08,-1.84912e-08,1.09569,0.000406764,-1.15866e-07,-2.51785e-08,1.0961,0.000406457,-1.91401e-07,-4.03115e-12,1.09651,0.000406074,-1.91413e-07,2.51947e-08,1.09691,0.000405767,-1.15829e-07,1.84346e-08,1.09732,0.00040559,-6.05254e-08,-9.89332e-08,1.09772,0.000405172,-3.57325e-07,1.3888e-07,1.09813,0.000404874,5.93136e-08,-9.8957e-08,1.09853,0.000404696,-2.37557e-07,1.853e-08,1.09894,0.000404277,-1.81968e-07,2.48372e-08,1.09934,0.000403987,-1.07456e-07,1.33047e-09,1.09975,0.000403776,-1.03465e-07,-3.01591e-08,1.10015,0.000403479,-1.93942e-07,9.66054e-11,1.10055,0.000403091,-1.93652e-07,2.97727e-08,1.10096,0.000402793,-1.04334e-07,2.19273e-11,1.10136,0.000402585,-1.04268e-07,-2.98604e-08,1.10176,0.000402287,-1.93849e-07,2.10325e-10,1.10216,0.0004019,-1.93218e-07,2.90191e-08,1.10256,0.0004016,-1.06161e-07,2.92264e-09,1.10297,0.000401397,-9.73931e-08,-4.07096e-08,1.10337,0.00040108,-2.19522e-07,4.07067e-08,1.10377,0.000400763,-9.7402e-08,-2.90783e-09,1.10417,0.000400559,-1.06126e-07,-2.90754e-08,1.10457,0.00040026,-1.93352e-07,9.00021e-14,1.10497,0.000399873,-1.93351e-07,2.9075e-08,1.10537,0.000399574,-1.06126e-07,2.90902e-09,1.10577,0.00039937,-9.73992e-08,-4.07111e-08,1.10617,0.000399053,-2.19533e-07,4.07262e-08,1.10657,0.000398736,-9.73541e-08,-2.98424e-09,1.10697,0.000398533,-1.06307e-07,-2.87892e-08,1.10736,0.000398234,-1.92674e-07,-1.06824e-09,1.10776,0.000397845,-1.95879e-07,3.30622e-08,1.10816,0.000397552,-9.66926e-08,-1.19712e-08,1.10856,0.000397323,-1.32606e-07,1.48225e-08,1.10895,0.000397102,-8.81387e-08,-4.73187e-08,1.10935,0.000396784,-2.30095e-07,5.52429e-08,1.10975,0.00039649,-6.4366e-08,-5.44437e-08,1.11014,0.000396198,-2.27697e-07,4.33226e-08,1.11054,0.000395872,-9.77293e-08,3.62656e-10,1.11094,0.000395678,-9.66414e-08,-4.47732e-08,1.11133,0.00039535,-2.30961e-07,5.95208e-08,1.11173,0.000395067,-5.23985e-08,-7.41008e-08,1.11212,0.00039474,-2.74701e-07,1.17673e-07,1.11252,0.000394543,7.83181e-08,-1.58172e-07,1.11291,0.000394225,-3.96199e-07,1.57389e-07,1.1133,0.000393905,7.59679e-08,-1.13756e-07,1.1137,0.000393716,-2.653e-07,5.92165e-08,1.11409,0.000393363,-8.76507e-08,-3.90074e-09,1.11449,0.000393176,-9.93529e-08,-4.36136e-08,1.11488,0.000392846,-2.30194e-07,5.91457e-08,1.11527,0.000392563,-5.27564e-08,-7.376e-08,1.11566,0.000392237,-2.74037e-07,1.16685e-07,1.11606,0.000392039,7.60189e-08,-1.54562e-07,1.11645,0.000391727,-3.87667e-07,1.43935e-07,1.11684,0.000391384,4.4137e-08,-6.35487e-08,1.11723,0.000391281,-1.46509e-07,-8.94896e-09,1.11762,0.000390961,-1.73356e-07,-1.98647e-08,1.11801,0.000390555,-2.3295e-07,8.8408e-08,1.1184,0.000390354,3.22736e-08,-9.53486e-08,1.11879,0.000390133,-2.53772e-07,5.45677e-08,1.11918,0.000389789,-9.0069e-08,-3.71296e-09,1.11957,0.000389598,-1.01208e-07,-3.97159e-08,1.11996,0.000389276,-2.20355e-07,4.33671e-08,1.12035,0.000388966,-9.02542e-08,-1.45431e-08,1.12074,0.000388741,-1.33883e-07,1.48052e-08,1.12113,0.000388518,-8.94678e-08,-4.46778e-08,1.12152,0.000388205,-2.23501e-07,4.46966e-08,1.12191,0.000387892,-8.94114e-08,-1.48992e-08,1.12229,0.000387669,-1.34109e-07,1.49003e-08,1.12268,0.000387445,-8.94082e-08,-4.47019e-08,1.12307,0.000387132,-2.23514e-07,4.4698e-08,1.12345,0.000386819,-8.942e-08,-1.48806e-08,1.12384,0.000386596,-1.34062e-07,1.48245e-08,1.12423,0.000386372,-8.95885e-08,-4.44172e-08,1.12461,0.00038606,-2.2284e-07,4.36351e-08,1.125,0.000385745,-9.19348e-08,-1.09139e-08,1.12539,0.000385528,-1.24677e-07,2.05584e-11,1.12577,0.000385279,-1.24615e-07,1.08317e-08,1.12616,0.000385062,-9.21198e-08,-4.33473e-08,1.12654,0.000384748,-2.22162e-07,4.33481e-08,1.12693,0.000384434,-9.21174e-08,-1.08356e-08,1.12731,0.000384217,-1.24624e-07,-5.50907e-12,1.12769,0.000383968,-1.24641e-07,1.08577e-08,1.12808,0.000383751,-9.20679e-08,-4.34252e-08,1.12846,0.000383437,-2.22343e-07,4.36337e-08,1.12884,0.000383123,-9.14422e-08,-1.19005e-08,1.12923,0.000382904,-1.27144e-07,3.96813e-09,1.12961,0.000382662,-1.15239e-07,-3.97207e-09,1.12999,0.000382419,-1.27155e-07,1.19201e-08,1.13038,0.000382201,-9.1395e-08,-4.37085e-08,1.13076,0.000381887,-2.2252e-07,4.37046e-08,1.13114,0.000381573,-9.14068e-08,-1.19005e-08,1.13152,0.000381355,-1.27108e-07,3.89734e-09,1.1319,0.000381112,-1.15416e-07,-3.68887e-09,1.13228,0.00038087,-1.26483e-07,1.08582e-08,1.13266,0.00038065,-9.39083e-08,-3.97438e-08,1.13304,0.000380343,-2.1314e-07,2.89076e-08,1.13342,0.000380003,-1.26417e-07,4.33225e-08,1.1338,0.00037988,3.55072e-09,-8.29883e-08,1.13418,0.000379638,-2.45414e-07,5.0212e-08,1.13456,0.000379298,-9.47781e-08,1.34964e-09,1.13494,0.000379113,-9.07292e-08,-5.56105e-08,1.13532,0.000378764,-2.57561e-07,1.01883e-07,1.1357,0.000378555,4.80889e-08,-1.13504e-07,1.13608,0.000378311,-2.92423e-07,1.13713e-07,1.13646,0.000378067,4.87176e-08,-1.02931e-07,1.13683,0.000377856,-2.60076e-07,5.95923e-08,1.13721,0.000377514,-8.12988e-08,-1.62288e-08,1.13759,0.000377303,-1.29985e-07,5.32278e-09,1.13797,0.000377059,-1.14017e-07,-5.06237e-09,1.13834,0.000376816,-1.29204e-07,1.49267e-08,1.13872,0.000376602,-8.44237e-08,-5.46444e-08,1.1391,0.000376269,-2.48357e-07,8.44417e-08,1.13947,0.000376026,4.96815e-09,-4.47039e-08,1.13985,0.000375902,-1.29143e-07,-2.48355e-08,1.14023,0.000375569,-2.0365e-07,2.48368e-08,1.1406,0.000375236,-1.2914e-07,4.46977e-08,1.14098,0.000375112,4.95341e-09,-8.44184e-08,1.14135,0.000374869,-2.48302e-07,5.45572e-08,1.14173,0.000374536,-8.463e-08,-1.46013e-08,1.1421,0.000374323,-1.28434e-07,3.8478e-09,1.14247,0.000374077,-1.1689e-07,-7.89941e-10,1.14285,0.000373841,-1.1926e-07,-6.88042e-10,1.14322,0.0003736,-1.21324e-07,3.54213e-09,1.1436,0.000373368,-1.10698e-07,-1.34805e-08,1.14397,0.000373107,-1.51139e-07,5.03798e-08,1.14434,0.000372767,0.,0.}; + + template + __device__ __forceinline__ void RGB2LuvConvert_f(const T& src, D& dst) + { + const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3); + const float _un = 13 * (4 * 0.950456f * _d); + const float _vn = 13 * (9 * _d); + + float B = blueIdx == 0 ? src.x : src.z; + float G = src.y; + float R = blueIdx == 0 ? src.z : src.x; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBGammaTab, GAMMA_TAB_SIZE); + } + + float X = R * 0.412453f + G * 0.357580f + B * 0.180423f; + float Y = R * 0.212671f + G * 0.715160f + B * 0.072169f; + float Z = R * 0.019334f + G * 0.119193f + B * 0.950227f; + + float L = splineInterpolate(Y * (LAB_CBRT_TAB_SIZE / 1.5f), c_LabCbrtTab, LAB_CBRT_TAB_SIZE); + L = 116.f * L - 16.f; + + const float d = (4 * 13) / ::fmaxf(X + 15 * Y + 3 * Z, numeric_limits::epsilon()); + float u = L * (X * d - _un); + float v = L * ((9 * 0.25f) * Y * d - _vn); + + dst.x = L; + dst.y = u; + dst.z = v; + } + + template + __device__ __forceinline__ void RGB2LuvConvert_b(const T& src, D& dst) + { + float3 srcf, dstf; + + srcf.x = src.x * (1.f / 255.f); + srcf.y = src.y * (1.f / 255.f); + srcf.z = src.z * (1.f / 255.f); + + RGB2LuvConvert_f(srcf, dstf); + + dst.x = saturate_cast(dstf.x * 2.55f); + dst.y = saturate_cast(dstf.y * 0.72033898305084743f + 96.525423728813564f); + dst.z = saturate_cast(dstf.z * 0.9732824427480916f + 136.259541984732824f); + } + + template struct RGB2Luv; + template + struct RGB2Luv + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LuvConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} + }; + template + struct RGB2Luv + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + RGB2LuvConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::RGB2Luv functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + namespace color_detail + { + template + __device__ __forceinline__ void Luv2RGBConvert_f(const T& src, D& dst) + { + const float _d = 1.f / (0.950456f + 15 + 1.088754f * 3); + const float _un = 4 * 0.950456f * _d; + const float _vn = 9 * _d; + + float L = src.x; + float u = src.y; + float v = src.z; + + float Y = (L + 16.f) * (1.f / 116.f); + Y = Y * Y * Y; + + float d = (1.f / 13.f) / L; + u = u * d + _un; + v = v * d + _vn; + + float iv = 1.f / v; + float X = 2.25f * u * Y * iv; + float Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv; + + float B = 0.055648f * X - 0.204043f * Y + 1.057311f * Z; + float G = -0.969256f * X + 1.875991f * Y + 0.041556f * Z; + float R = 3.240479f * X - 1.537150f * Y - 0.498535f * Z; + + if (srgb) + { + B = splineInterpolate(B * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + G = splineInterpolate(G * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + R = splineInterpolate(R * GAMMA_TAB_SIZE, c_sRGBInvGammaTab, GAMMA_TAB_SIZE); + } + + dst.x = blueIdx == 0 ? B : R; + dst.y = G; + dst.z = blueIdx == 0 ? R : B; + setAlpha(dst, ColorChannel::max()); + } + + template + __device__ __forceinline__ void Luv2RGBConvert_b(const T& src, D& dst) + { + float3 srcf, dstf; + + srcf.x = src.x * (100.f / 255.f); + srcf.y = src.y * 1.388235294117647f - 134.f; + srcf.z = src.z * 1.027450980392157f - 140.f; + + Luv2RGBConvert_f(srcf, dstf); + + dst.x = saturate_cast(dstf.x * 255.f); + dst.y = saturate_cast(dstf.y * 255.f); + dst.z = saturate_cast(dstf.z * 255.f); + setAlpha(dst, ColorChannel::max()); + } + + template struct Luv2RGB; + template + struct Luv2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Luv2RGBConvert_b(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} + }; + template + struct Luv2RGB + : unary_function::vec_type, typename TypeVec::vec_type> + { + __device__ __forceinline__ typename TypeVec::vec_type operator ()(const typename TypeVec::vec_type& src) const + { + typename TypeVec::vec_type dst; + + Luv2RGBConvert_f(src, dst); + + return dst; + } + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} + }; + } + +#define OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \ + template struct name ## _traits \ + { \ + typedef ::cv::cuda::device::color_detail::Luv2RGB functor_type; \ + static __host__ __device__ __forceinline__ functor_type create_functor() \ + { \ + return functor_type(); \ + } \ + }; + + #undef CV_DESCALE + +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_COLOR_DETAIL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/reduce.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/reduce.hpp new file mode 100644 index 0000000..8af20b0 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/reduce.hpp @@ -0,0 +1,365 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP +#define OPENCV_CUDA_REDUCE_DETAIL_HPP + +#include +#include "../warp.hpp" +#include "../warp_shuffle.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace reduce_detail + { + template struct GetType; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) + { + thrust::get(smem)[tid] = thrust::get(val); + + For::loadToSmem(smem, val, tid); + } + template + static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) + { + thrust::get(val) = thrust::get(smem)[tid]; + + For::loadFromSmem(smem, val, tid); + } + + template + static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) + { + typename GetType::type>::type reg = thrust::get(smem)[tid + delta]; + thrust::get(smem)[tid] = thrust::get(val) = thrust::get(op)(thrust::get(val), reg); + + For::merge(smem, val, tid, delta, op); + } + template + static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op) + { + typename GetType::type>::type reg = shfl_down(thrust::get(val), delta, width); + thrust::get(val) = thrust::get(op)(thrust::get(val), reg); + + For::mergeShfl(val, delta, width, op); + } + }; + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int) + { + } + template + static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int) + { + } + + template + static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&) + { + } + template + static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&) + { + } + }; + + template + __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid) + { + smem[tid] = val; + } + template + __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid) + { + val = smem[tid]; + } + template + __device__ __forceinline__ void loadToSmem(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadToSmem(smem, val, tid); + } + template + __device__ __forceinline__ void loadFromSmem(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadFromSmem(smem, val, tid); + } + + template + __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op) + { + T reg = smem[tid + delta]; + smem[tid] = val = op(val, reg); + } + template + __device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op) + { + T reg = shfl_down(val, delta, width); + val = op(val, reg); + } + template + __device__ __forceinline__ void merge(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid, + unsigned int delta, + const thrust::tuple& op) + { + For<0, thrust::tuple_size >::value>::merge(smem, val, tid, delta, op); + } + template + __device__ __forceinline__ void mergeShfl(const thrust::tuple& val, + unsigned int delta, + unsigned int width, + const thrust::tuple& op) + { + For<0, thrust::tuple_size >::value>::mergeShfl(val, delta, width, op); + } + + template struct Generic + { + template + static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) + { + loadToSmem(smem, val, tid); + if (N >= 32) + __syncthreads(); + + if (N >= 2048) + { + if (tid < 1024) + merge(smem, val, tid, 1024, op); + + __syncthreads(); + } + if (N >= 1024) + { + if (tid < 512) + merge(smem, val, tid, 512, op); + + __syncthreads(); + } + if (N >= 512) + { + if (tid < 256) + merge(smem, val, tid, 256, op); + + __syncthreads(); + } + if (N >= 256) + { + if (tid < 128) + merge(smem, val, tid, 128, op); + + __syncthreads(); + } + if (N >= 128) + { + if (tid < 64) + merge(smem, val, tid, 64, op); + + __syncthreads(); + } + if (N >= 64) + { + if (tid < 32) + merge(smem, val, tid, 32, op); + } + + if (tid < 16) + { + merge(smem, val, tid, 16, op); + merge(smem, val, tid, 8, op); + merge(smem, val, tid, 4, op); + merge(smem, val, tid, 2, op); + merge(smem, val, tid, 1, op); + } + } + }; + + template + struct Unroll + { + static __device__ void loopShfl(Reference val, Op op, unsigned int N) + { + mergeShfl(val, I, N, op); + Unroll::loopShfl(val, op, N); + } + static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op) + { + merge(smem, val, tid, I, op); + Unroll::loop(smem, val, tid, op); + } + }; + template + struct Unroll<0, Pointer, Reference, Op> + { + static __device__ void loopShfl(Reference, Op, unsigned int) + { + } + static __device__ void loop(Pointer, Reference, unsigned int, Op) + { + } + }; + + template struct WarpOptimized + { + template + static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + CV_UNUSED(smem); + CV_UNUSED(tid); + + Unroll::loopShfl(val, op, N); + #else + loadToSmem(smem, val, tid); + + if (tid < N / 2) + Unroll::loop(smem, val, tid, op); + #endif + } + }; + + template struct GenericOptimized32 + { + enum { M = N / 32 }; + + template + static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op) + { + const unsigned int laneId = Warp::laneId(); + + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize); + + if (laneId == 0) + loadToSmem(smem, val, tid / 32); + #else + loadToSmem(smem, val, tid); + + if (laneId < 16) + Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op); + + __syncthreads(); + + if (laneId == 0) + loadToSmem(smem, val, tid / 32); + #endif + + __syncthreads(); + + loadFromSmem(smem, val, tid); + + if (tid < 32) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + Unroll::loopShfl(val, op, M); + #else + Unroll::loop(smem, val, tid, op); + #endif + } + } + }; + + template struct StaticIf; + template struct StaticIf + { + typedef T1 type; + }; + template struct StaticIf + { + typedef T2 type; + }; + + template struct IsPowerOf2 + { + enum { value = ((N != 0) && !(N & (N - 1))) }; + }; + + template struct Dispatcher + { + typedef typename StaticIf< + (N <= 32) && IsPowerOf2::value, + WarpOptimized, + typename StaticIf< + (N <= 1024) && IsPowerOf2::value, + GenericOptimized32, + Generic + >::type + >::type reductor; + }; + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/reduce_key_val.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/reduce_key_val.hpp new file mode 100644 index 0000000..df37c17 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/reduce_key_val.hpp @@ -0,0 +1,502 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP +#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP + +#include +#include "../warp.hpp" +#include "../warp_shuffle.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace reduce_key_val_detail + { + template struct GetType; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + template struct GetType + { + typedef T type; + }; + + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) + { + thrust::get(smem)[tid] = thrust::get(data); + + For::loadToSmem(smem, data, tid); + } + template + static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) + { + thrust::get(data) = thrust::get(smem)[tid]; + + For::loadFromSmem(smem, data, tid); + } + + template + static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width) + { + thrust::get(val) = shfl_down(thrust::get(val), delta, width); + + For::copyShfl(val, delta, width); + } + template + static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta) + { + thrust::get(svals)[tid] = thrust::get(val) = thrust::get(svals)[tid + delta]; + + For::copy(svals, val, tid, delta); + } + + template + static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width) + { + typename GetType::type>::type reg = shfl_down(thrust::get(key), delta, width); + + if (thrust::get(cmp)(reg, thrust::get(key))) + { + thrust::get(key) = reg; + thrust::get(val) = shfl_down(thrust::get(val), delta, width); + } + + For::mergeShfl(key, val, cmp, delta, width); + } + template + static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key, + const ValPointerTuple& svals, const ValReferenceTuple& val, + const CmpTuple& cmp, + unsigned int tid, unsigned int delta) + { + typename GetType::type>::type reg = thrust::get(skeys)[tid + delta]; + + if (thrust::get(cmp)(reg, thrust::get(key))) + { + thrust::get(skeys)[tid] = thrust::get(key) = reg; + thrust::get(svals)[tid] = thrust::get(val) = thrust::get(svals)[tid + delta]; + } + + For::merge(skeys, key, svals, val, cmp, tid, delta); + } + }; + template + struct For + { + template + static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) + { + } + template + static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int) + { + } + + template + static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int) + { + } + template + static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int) + { + } + + template + static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int) + { + } + template + static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&, + const ValPointerTuple&, const ValReferenceTuple&, + const CmpTuple&, + unsigned int, unsigned int) + { + } + }; + + ////////////////////////////////////////////////////// + // loadToSmem + + template + __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid) + { + smem[tid] = data; + } + template + __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid) + { + data = smem[tid]; + } + template + __device__ __forceinline__ void loadToSmem(const thrust::tuple& smem, + const thrust::tuple& data, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadToSmem(smem, data, tid); + } + template + __device__ __forceinline__ void loadFromSmem(const thrust::tuple& smem, + const thrust::tuple& data, + unsigned int tid) + { + For<0, thrust::tuple_size >::value>::loadFromSmem(smem, data, tid); + } + + ////////////////////////////////////////////////////// + // copyVals + + template + __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width) + { + val = shfl_down(val, delta, width); + } + template + __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta) + { + svals[tid] = val = svals[tid + delta]; + } + template + __device__ __forceinline__ void copyValsShfl(const thrust::tuple& val, + unsigned int delta, + int width) + { + For<0, thrust::tuple_size >::value>::copyShfl(val, delta, width); + } + template + __device__ __forceinline__ void copyVals(const thrust::tuple& svals, + const thrust::tuple& val, + unsigned int tid, unsigned int delta) + { + For<0, thrust::tuple_size >::value>::copy(svals, val, tid, delta); + } + + ////////////////////////////////////////////////////// + // merge + + template + __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width) + { + K reg = shfl_down(key, delta, width); + + if (cmp(reg, key)) + { + key = reg; + copyValsShfl(val, delta, width); + } + } + template + __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta) + { + K reg = skeys[tid + delta]; + + if (cmp(reg, key)) + { + skeys[tid] = key = reg; + copyVals(svals, val, tid, delta); + } + } + template + __device__ __forceinline__ void mergeShfl(K& key, + const thrust::tuple& val, + const Cmp& cmp, + unsigned int delta, int width) + { + K reg = shfl_down(key, delta, width); + + if (cmp(reg, key)) + { + key = reg; + copyValsShfl(val, delta, width); + } + } + template + __device__ __forceinline__ void merge(volatile K* skeys, K& key, + const thrust::tuple& svals, + const thrust::tuple& val, + const Cmp& cmp, unsigned int tid, unsigned int delta) + { + K reg = skeys[tid + delta]; + + if (cmp(reg, key)) + { + skeys[tid] = key = reg; + copyVals(svals, val, tid, delta); + } + } + template + __device__ __forceinline__ void mergeShfl(const thrust::tuple& key, + const thrust::tuple& val, + const thrust::tuple& cmp, + unsigned int delta, int width) + { + For<0, thrust::tuple_size >::value>::mergeShfl(key, val, cmp, delta, width); + } + template + __device__ __forceinline__ void merge(const thrust::tuple& skeys, + const thrust::tuple& key, + const thrust::tuple& svals, + const thrust::tuple& val, + const thrust::tuple& cmp, + unsigned int tid, unsigned int delta) + { + For<0, thrust::tuple_size >::value>::merge(skeys, key, svals, val, cmp, tid, delta); + } + + ////////////////////////////////////////////////////// + // Generic + + template struct Generic + { + template + static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + loadToSmem(skeys, key, tid); + loadValsToSmem(svals, val, tid); + if (N >= 32) + __syncthreads(); + + if (N >= 2048) + { + if (tid < 1024) + merge(skeys, key, svals, val, cmp, tid, 1024); + + __syncthreads(); + } + if (N >= 1024) + { + if (tid < 512) + merge(skeys, key, svals, val, cmp, tid, 512); + + __syncthreads(); + } + if (N >= 512) + { + if (tid < 256) + merge(skeys, key, svals, val, cmp, tid, 256); + + __syncthreads(); + } + if (N >= 256) + { + if (tid < 128) + merge(skeys, key, svals, val, cmp, tid, 128); + + __syncthreads(); + } + if (N >= 128) + { + if (tid < 64) + merge(skeys, key, svals, val, cmp, tid, 64); + + __syncthreads(); + } + if (N >= 64) + { + if (tid < 32) + merge(skeys, key, svals, val, cmp, tid, 32); + } + + if (tid < 16) + { + merge(skeys, key, svals, val, cmp, tid, 16); + merge(skeys, key, svals, val, cmp, tid, 8); + merge(skeys, key, svals, val, cmp, tid, 4); + merge(skeys, key, svals, val, cmp, tid, 2); + merge(skeys, key, svals, val, cmp, tid, 1); + } + } + }; + + template + struct Unroll + { + static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N) + { + mergeShfl(key, val, cmp, I, N); + Unroll::loopShfl(key, val, cmp, N); + } + static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + merge(skeys, key, svals, val, cmp, tid, I); + Unroll::loop(skeys, key, svals, val, tid, cmp); + } + }; + template + struct Unroll<0, KP, KR, VP, VR, Cmp> + { + static __device__ void loopShfl(KR, VR, Cmp, unsigned int) + { + } + static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp) + { + } + }; + + template struct WarpOptimized + { + template + static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + #if 0 // __CUDA_ARCH__ >= 300 + CV_UNUSED(skeys); + CV_UNUSED(svals); + CV_UNUSED(tid); + + Unroll::loopShfl(key, val, cmp, N); + #else + loadToSmem(skeys, key, tid); + loadToSmem(svals, val, tid); + + if (tid < N / 2) + Unroll::loop(skeys, key, svals, val, tid, cmp); + #endif + } + }; + + template struct GenericOptimized32 + { + enum { M = N / 32 }; + + template + static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp) + { + const unsigned int laneId = Warp::laneId(); + + #if 0 // __CUDA_ARCH__ >= 300 + Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize); + + if (laneId == 0) + { + loadToSmem(skeys, key, tid / 32); + loadToSmem(svals, val, tid / 32); + } + #else + loadToSmem(skeys, key, tid); + loadToSmem(svals, val, tid); + + if (laneId < 16) + Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp); + + __syncthreads(); + + if (laneId == 0) + { + loadToSmem(skeys, key, tid / 32); + loadToSmem(svals, val, tid / 32); + } + #endif + + __syncthreads(); + + loadFromSmem(skeys, key, tid); + + if (tid < 32) + { + #if 0 // __CUDA_ARCH__ >= 300 + loadFromSmem(svals, val, tid); + + Unroll::loopShfl(key, val, cmp, M); + #else + Unroll::loop(skeys, key, svals, val, tid, cmp); + #endif + } + } + }; + + template struct StaticIf; + template struct StaticIf + { + typedef T1 type; + }; + template struct StaticIf + { + typedef T2 type; + }; + + template struct IsPowerOf2 + { + enum { value = ((N != 0) && !(N & (N - 1))) }; + }; + + template struct Dispatcher + { + typedef typename StaticIf< + (N <= 32) && IsPowerOf2::value, + WarpOptimized, + typename StaticIf< + (N <= 1024) && IsPowerOf2::value, + GenericOptimized32, + Generic + >::type + >::type reductor; + }; + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/transform_detail.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/transform_detail.hpp new file mode 100644 index 0000000..1919848 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/transform_detail.hpp @@ -0,0 +1,392 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP +#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP + +#include "../common.hpp" +#include "../vec_traits.hpp" +#include "../functional.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace transform_detail + { + //! Read Write Traits + + template struct UnaryReadWriteTraits + { + typedef typename TypeVec::vec_type read_type; + typedef typename TypeVec::vec_type write_type; + }; + + template struct BinaryReadWriteTraits + { + typedef typename TypeVec::vec_type read_type1; + typedef typename TypeVec::vec_type read_type2; + typedef typename TypeVec::vec_type write_type; + }; + + //! Transform kernels + + template struct OpUnroller; + template <> struct OpUnroller<1> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + } + }; + template <> struct OpUnroller<2> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + } + }; + template <> struct OpUnroller<3> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src.z); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src1.z, src2.z); + } + }; + template <> struct OpUnroller<4> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src.z); + if (mask(y, x_shifted + 3)) + dst.w = op(src.w); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.x = op(src1.x, src2.x); + if (mask(y, x_shifted + 1)) + dst.y = op(src1.y, src2.y); + if (mask(y, x_shifted + 2)) + dst.z = op(src1.z, src2.z); + if (mask(y, x_shifted + 3)) + dst.w = op(src1.w, src2.w); + } + }; + template <> struct OpUnroller<8> + { + template + static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.a0 = op(src.a0); + if (mask(y, x_shifted + 1)) + dst.a1 = op(src.a1); + if (mask(y, x_shifted + 2)) + dst.a2 = op(src.a2); + if (mask(y, x_shifted + 3)) + dst.a3 = op(src.a3); + if (mask(y, x_shifted + 4)) + dst.a4 = op(src.a4); + if (mask(y, x_shifted + 5)) + dst.a5 = op(src.a5); + if (mask(y, x_shifted + 6)) + dst.a6 = op(src.a6); + if (mask(y, x_shifted + 7)) + dst.a7 = op(src.a7); + } + + template + static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y) + { + if (mask(y, x_shifted)) + dst.a0 = op(src1.a0, src2.a0); + if (mask(y, x_shifted + 1)) + dst.a1 = op(src1.a1, src2.a1); + if (mask(y, x_shifted + 2)) + dst.a2 = op(src1.a2, src2.a2); + if (mask(y, x_shifted + 3)) + dst.a3 = op(src1.a3, src2.a3); + if (mask(y, x_shifted + 4)) + dst.a4 = op(src1.a4, src2.a4); + if (mask(y, x_shifted + 5)) + dst.a5 = op(src1.a5, src2.a5); + if (mask(y, x_shifted + 6)) + dst.a6 = op(src1.a6, src2.a6); + if (mask(y, x_shifted + 7)) + dst.a7 = op(src1.a7, src2.a7); + } + }; + + template + static __global__ void transformSmart(const PtrStepSz src_, PtrStep dst_, const Mask mask, const UnOp op) + { + typedef TransformFunctorTraits ft; + typedef typename UnaryReadWriteTraits::read_type read_type; + typedef typename UnaryReadWriteTraits::write_type write_type; + + const int x = threadIdx.x + blockIdx.x * blockDim.x; + const int y = threadIdx.y + blockIdx.y * blockDim.y; + const int x_shifted = x * ft::smart_shift; + + if (y < src_.rows) + { + const T* src = src_.ptr(y); + D* dst = dst_.ptr(y); + + if (x_shifted + ft::smart_shift - 1 < src_.cols) + { + const read_type src_n_el = ((const read_type*)src)[x]; + OpUnroller::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y); + } + else + { + for (int real_x = x_shifted; real_x < src_.cols; ++real_x) + { + if (mask(y, real_x)) + dst[real_x] = op(src[real_x]); + } + } + } + } + + template + __global__ static void transformSimple(const PtrStepSz src, PtrStep dst, const Mask mask, const UnOp op) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < src.cols && y < src.rows && mask(y, x)) + { + dst.ptr(y)[x] = op(src.ptr(y)[x]); + } + } + + template + static __global__ void transformSmart(const PtrStepSz src1_, const PtrStep src2_, PtrStep dst_, + const Mask mask, const BinOp op) + { + typedef TransformFunctorTraits ft; + typedef typename BinaryReadWriteTraits::read_type1 read_type1; + typedef typename BinaryReadWriteTraits::read_type2 read_type2; + typedef typename BinaryReadWriteTraits::write_type write_type; + + const int x = threadIdx.x + blockIdx.x * blockDim.x; + const int y = threadIdx.y + blockIdx.y * blockDim.y; + const int x_shifted = x * ft::smart_shift; + + if (y < src1_.rows) + { + const T1* src1 = src1_.ptr(y); + const T2* src2 = src2_.ptr(y); + D* dst = dst_.ptr(y); + + if (x_shifted + ft::smart_shift - 1 < src1_.cols) + { + const read_type1 src1_n_el = ((const read_type1*)src1)[x]; + const read_type2 src2_n_el = ((const read_type2*)src2)[x]; + + OpUnroller::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y); + } + else + { + for (int real_x = x_shifted; real_x < src1_.cols; ++real_x) + { + if (mask(y, real_x)) + dst[real_x] = op(src1[real_x], src2[real_x]); + } + } + } + } + + template + static __global__ void transformSimple(const PtrStepSz src1, const PtrStep src2, PtrStep dst, + const Mask mask, const BinOp op) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < src1.cols && y < src1.rows && mask(y, x)) + { + const T1 src1_data = src1.ptr(y)[x]; + const T2 src2_data = src2.ptr(y)[x]; + dst.ptr(y)[x] = op(src1_data, src2_data); + } + } + + template struct TransformDispatcher; + template<> struct TransformDispatcher + { + template + static void call(PtrStepSz src, PtrStepSz dst, UnOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1); + const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1); + + transformSimple<<>>(src, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template + static void call(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, BinOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1); + const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1); + + transformSimple<<>>(src1, src2, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template<> struct TransformDispatcher + { + template + static void call(PtrStepSz src, PtrStepSz dst, UnOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + CV_StaticAssert(ft::smart_shift != 1, ""); + + if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) || + !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D))) + { + TransformDispatcher::call(src, dst, op, mask, stream); + return; + } + + const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1); + const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1); + + transformSmart<<>>(src, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template + static void call(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, BinOp op, Mask mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + + CV_StaticAssert(ft::smart_shift != 1, ""); + + if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) || + !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) || + !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D))) + { + TransformDispatcher::call(src1, src2, dst, op, mask, stream); + return; + } + + const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1); + const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1); + + transformSmart<<>>(src1, src2, dst, mask, op); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + } // namespace transform_detail +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/type_traits_detail.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/type_traits_detail.hpp new file mode 100644 index 0000000..a78bd2c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/type_traits_detail.hpp @@ -0,0 +1,191 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP +#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP + +#include "../common.hpp" +#include "../vec_traits.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace type_traits_detail + { + template struct Select { typedef T1 type; }; + template struct Select { typedef T2 type; }; + + template struct IsSignedIntergral { enum {value = 0}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + template <> struct IsSignedIntergral { enum {value = 1}; }; + + template struct IsUnsignedIntegral { enum {value = 0}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + template <> struct IsUnsignedIntegral { enum {value = 1}; }; + + template struct IsIntegral { enum {value = IsSignedIntergral::value || IsUnsignedIntegral::value}; }; + template <> struct IsIntegral { enum {value = 1}; }; + template <> struct IsIntegral { enum {value = 1}; }; + + template struct IsFloat { enum {value = 0}; }; + template <> struct IsFloat { enum {value = 1}; }; + template <> struct IsFloat { enum {value = 1}; }; + + template struct IsVec { enum {value = 0}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + template <> struct IsVec { enum {value = 1}; }; + + template struct AddParameterType { typedef const U& type; }; + template struct AddParameterType { typedef U& type; }; + template <> struct AddParameterType { typedef void type; }; + + template struct ReferenceTraits + { + enum { value = false }; + typedef U type; + }; + template struct ReferenceTraits + { + enum { value = true }; + typedef U type; + }; + + template struct PointerTraits + { + enum { value = false }; + typedef void type; + }; + template struct PointerTraits + { + enum { value = true }; + typedef U type; + }; + template struct PointerTraits + { + enum { value = true }; + typedef U type; + }; + + template struct UnConst + { + typedef U type; + enum { value = 0 }; + }; + template struct UnConst + { + typedef U type; + enum { value = 1 }; + }; + template struct UnConst + { + typedef U& type; + enum { value = 1 }; + }; + + template struct UnVolatile + { + typedef U type; + enum { value = 0 }; + }; + template struct UnVolatile + { + typedef U type; + enum { value = 1 }; + }; + template struct UnVolatile + { + typedef U& type; + enum { value = 1 }; + }; + } // namespace type_traits_detail +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/vec_distance_detail.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/vec_distance_detail.hpp new file mode 100644 index 0000000..8283a99 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/detail/vec_distance_detail.hpp @@ -0,0 +1,121 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP +#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP + +#include "../datamov_utils.hpp" + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + namespace vec_distance_detail + { + template struct UnrollVecDiffCached + { + template + static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind) + { + if (ind < len) + { + T1 val1 = *vecCached++; + + T2 val2; + ForceGlob::Load(vecGlob, ind, val2); + + dist.reduceIter(val1, val2); + + UnrollVecDiffCached::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM); + } + } + + template + static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist) + { + T1 val1 = *vecCached++; + + T2 val2; + ForceGlob::Load(vecGlob, 0, val2); + vecGlob += THREAD_DIM; + + dist.reduceIter(val1, val2); + + UnrollVecDiffCached::calcWithoutCheck(vecCached, vecGlob, dist); + } + }; + template struct UnrollVecDiffCached + { + template + static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int) + { + } + + template + static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&) + { + } + }; + + template struct VecDiffCachedCalculator; + template struct VecDiffCachedCalculator + { + template + static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid) + { + UnrollVecDiffCached::calcCheck(vecCached, vecGlob, len, dist, tid); + } + }; + template struct VecDiffCachedCalculator + { + template + static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid) + { + UnrollVecDiffCached::calcWithoutCheck(vecCached, vecGlob + tid, dist); + } + }; + } // namespace vec_distance_detail +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/dynamic_smem.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/dynamic_smem.hpp new file mode 100644 index 0000000..42570c6 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/dynamic_smem.hpp @@ -0,0 +1,88 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP +#define OPENCV_CUDA_DYNAMIC_SMEM_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct DynamicSharedMem + { + __device__ __forceinline__ operator T*() + { + extern __shared__ int __smem[]; + return (T*)__smem; + } + + __device__ __forceinline__ operator const T*() const + { + extern __shared__ int __smem[]; + return (T*)__smem; + } + }; + + // specialize for double to avoid unaligned memory access compile errors + template<> struct DynamicSharedMem + { + __device__ __forceinline__ operator double*() + { + extern __shared__ double __smem_d[]; + return (double*)__smem_d; + } + + __device__ __forceinline__ operator const double*() const + { + extern __shared__ double __smem_d[]; + return (double*)__smem_d; + } + }; +}}} + +//! @endcond + +#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/emulation.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/emulation.hpp new file mode 100644 index 0000000..17dc117 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/emulation.hpp @@ -0,0 +1,269 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_EMULATION_HPP_ +#define OPENCV_CUDA_EMULATION_HPP_ + +#include "common.hpp" +#include "warp_reduce.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct Emulation + { + + static __device__ __forceinline__ int syncthreadsOr(int pred) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) + // just campilation stab + return 0; +#else + return __syncthreads_or(pred); +#endif + } + + template + static __forceinline__ __device__ int Ballot(int predicate) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) + return __ballot(predicate); +#else + __shared__ volatile int cta_buffer[CTA_SIZE]; + + int tid = threadIdx.x; + cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0; + return warp_reduce(cta_buffer); +#endif + } + + struct smem + { + enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U }; + + template + static __device__ __forceinline__ T atomicInc(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count; + unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); + do + { + count = *address & TAG_MASK; + count = tag | (count + 1); + *address = count; + } while (*address != count); + + return (count & TAG_MASK) - 1; +#else + return ::atomicInc(address, val); +#endif + } + + template + static __device__ __forceinline__ T atomicAdd(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count; + unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); + do + { + count = *address & TAG_MASK; + count = tag | (count + val); + *address = count; + } while (*address != count); + + return (count & TAG_MASK) - val; +#else + return ::atomicAdd(address, val); +#endif + } + + template + static __device__ __forceinline__ T atomicMin(T* address, T val) + { +#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) + T count = ::min(*address, val); + do + { + *address = count; + } while (*address > count); + + return count; +#else + return ::atomicMin(address, val); +#endif + } + }; // struct cmem + + struct glob + { + static __device__ __forceinline__ int atomicAdd(int* address, int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ float atomicAdd(float* address, float val) + { + #if __CUDA_ARCH__ >= 200 + return ::atomicAdd(address, val); + #else + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(val + __int_as_float(assumed))); + } while (assumed != old); + return __int_as_float(old); + #endif + } + static __device__ __forceinline__ double atomicAdd(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMin(int* address, int val) + { + return ::atomicMin(address, val); + } + static __device__ __forceinline__ float atomicMin(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fminf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMin(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMax(int* address, int val) + { + return ::atomicMax(address, val); + } + static __device__ __forceinline__ float atomicMax(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fmaxf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMax(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + CV_UNUSED(address); + CV_UNUSED(val); + return 0.0; + #endif + } + }; + }; //struct Emulation +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif /* OPENCV_CUDA_EMULATION_HPP_ */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/filters.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/filters.hpp new file mode 100644 index 0000000..bf3147e --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/filters.hpp @@ -0,0 +1,293 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_FILTERS_HPP +#define OPENCV_CUDA_FILTERS_HPP + +#include "saturate_cast.hpp" +#include "vec_traits.hpp" +#include "vec_math.hpp" +#include "type_traits.hpp" +#include "nppdefs.h" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct PointFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) + : src(src_) + { + CV_UNUSED(fx); + CV_UNUSED(fy); + } + + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + return src(__float2int_rz(y), __float2int_rz(x)); + } + + Ptr2D src; + }; + + template struct LinearFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) + : src(src_) + { + CV_UNUSED(fx); + CV_UNUSED(fy); + } + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + typedef typename TypeVec::cn>::vec_type work_type; + + work_type out = VecTraits::all(0); + + const int x1 = __float2int_rd(x); + const int y1 = __float2int_rd(y); + if (x1 <= NPP_MIN_32S || x1 >= NPP_MAX_32S || y1 <= NPP_MIN_32S || y1 >= NPP_MAX_32S) + { + elem_type src_reg = src(y1, x1); + out = out + src_reg * 1.0f; + return saturate_cast(out); + } + const int x2 = x1 + 1; + const int y2 = y1 + 1; + + elem_type src_reg = src(y1, x1); + out = out + src_reg * ((x2 - x) * (y2 - y)); + + src_reg = src(y1, x2); + out = out + src_reg * ((x - x1) * (y2 - y)); + + src_reg = src(y2, x1); + out = out + src_reg * ((x2 - x) * (y - y1)); + + src_reg = src(y2, x2); + out = out + src_reg * ((x - x1) * (y - y1)); + + return saturate_cast(out); + } + + Ptr2D src; + }; + + template struct CubicFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + typedef typename TypeVec::cn>::vec_type work_type; + + explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) + : src(src_) + { + CV_UNUSED(fx); + CV_UNUSED(fy); + } + + static __device__ __forceinline__ float bicubicCoeff(float x_) + { + float x = fabsf(x_); + if (x <= 1.0f) + { + return x * x * (1.5f * x - 2.5f) + 1.0f; + } + else if (x < 2.0f) + { + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + } + else + { + return 0.0f; + } + } + + __device__ elem_type operator ()(float y, float x) const + { + const float xmin = ::ceilf(x - 2.0f); + const float xmax = ::floorf(x + 2.0f); + + const float ymin = ::ceilf(y - 2.0f); + const float ymax = ::floorf(y + 2.0f); + + work_type sum = VecTraits::all(0); + float wsum = 0.0f; + + for (float cy = ymin; cy <= ymax; cy += 1.0f) + { + for (float cx = xmin; cx <= xmax; cx += 1.0f) + { + const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy); + sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx)); + wsum += w; + } + } + + work_type res = (!wsum)? VecTraits::all(0) : sum / wsum; + + return saturate_cast(res); + } + + Ptr2D src; + }; + // for integer scaling + template struct IntegerAreaFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_) + : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {} + + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + float fsx1 = x * scale_x; + float fsx2 = fsx1 + scale_x; + + int sx1 = __float2int_ru(fsx1); + int sx2 = __float2int_rd(fsx2); + + float fsy1 = y * scale_y; + float fsy2 = fsy1 + scale_y; + + int sy1 = __float2int_ru(fsy1); + int sy2 = __float2int_rd(fsy2); + + typedef typename TypeVec::cn>::vec_type work_type; + work_type out = VecTraits::all(0.f); + + for(int dy = sy1; dy < sy2; ++dy) + for(int dx = sx1; dx < sx2; ++dx) + { + out = out + src(dy, dx) * scale; + } + + return saturate_cast(out); + } + + Ptr2D src; + float scale_x, scale_y ,scale; + }; + + template struct AreaFilter + { + typedef typename Ptr2D::elem_type elem_type; + typedef float index_type; + + explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_) + : src(src_), scale_x(scale_x_), scale_y(scale_y_){} + + __device__ __forceinline__ elem_type operator ()(float y, float x) const + { + float fsx1 = x * scale_x; + float fsx2 = fsx1 + scale_x; + + int sx1 = __float2int_ru(fsx1); + int sx2 = __float2int_rd(fsx2); + + float fsy1 = y * scale_y; + float fsy2 = fsy1 + scale_y; + + int sy1 = __float2int_ru(fsy1); + int sy2 = __float2int_rd(fsy2); + + float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1)); + + typedef typename TypeVec::cn>::vec_type work_type; + work_type out = VecTraits::all(0.f); + + for (int dy = sy1; dy < sy2; ++dy) + { + for (int dx = sx1; dx < sx2; ++dx) + out = out + src(dy, dx) * scale; + + if (sx1 > fsx1) + out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale); + + if (sx2 < fsx2) + out = out + src(dy, sx2) * ((fsx2 -sx2) * scale); + } + + if (sy1 > fsy1) + for (int dx = sx1; dx < sx2; ++dx) + out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale); + + if (sy2 < fsy2) + for (int dx = sx1; dx < sx2; ++dx) + out = out + src(sy2, dx) * ((fsy2 -sy2) * scale); + + if ((sy1 > fsy1) && (sx1 > fsx1)) + out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale); + + if ((sy1 > fsy1) && (sx2 < fsx2)) + out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale); + + if ((sy2 < fsy2) && (sx2 < fsx2)) + out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale); + + if ((sy2 < fsy2) && (sx1 > fsx1)) + out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale); + + return saturate_cast(out); + } + + Ptr2D src; + float scale_x, scale_y; + int width, haight; + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_FILTERS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/funcattrib.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/funcattrib.hpp new file mode 100644 index 0000000..f582080 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/funcattrib.hpp @@ -0,0 +1,79 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP +#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP + +#include + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + void printFuncAttrib(Func& func) + { + + cudaFuncAttributes attrs; + cudaFuncGetAttributes(&attrs, func); + + printf("=== Function stats ===\n"); + printf("Name: \n"); + printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes); + printf("constSizeBytes = %d\n", attrs.constSizeBytes); + printf("localSizeBytes = %d\n", attrs.localSizeBytes); + printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock); + printf("numRegs = %d\n", attrs.numRegs); + printf("ptxVersion = %d\n", attrs.ptxVersion); + printf("binaryVersion = %d\n", attrs.binaryVersion); + printf("\n"); + fflush(stdout); + } +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/functional.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/functional.hpp new file mode 100644 index 0000000..9e5ec76 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/functional.hpp @@ -0,0 +1,805 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_FUNCTIONAL_HPP +#define OPENCV_CUDA_FUNCTIONAL_HPP + +#include +#include "saturate_cast.hpp" +#include "vec_traits.hpp" +#include "type_traits.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + // Function Objects + template struct unary_function + { + typedef Argument argument_type; + typedef Result result_type; + }; + template struct binary_function + { + typedef Argument1 first_argument_type; + typedef Argument2 second_argument_type; + typedef Result result_type; + }; + + // Arithmetic Operations + template struct plus : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a + b; + } + __host__ __device__ __forceinline__ plus() {} + __host__ __device__ __forceinline__ plus(const plus&) {} + }; + + template struct minus : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a - b; + } + __host__ __device__ __forceinline__ minus() {} + __host__ __device__ __forceinline__ minus(const minus&) {} + }; + + template struct multiplies : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a * b; + } + __host__ __device__ __forceinline__ multiplies() {} + __host__ __device__ __forceinline__ multiplies(const multiplies&) {} + }; + + template struct divides : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a / b; + } + __host__ __device__ __forceinline__ divides() {} + __host__ __device__ __forceinline__ divides(const divides&) {} + }; + + template struct modulus : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a % b; + } + __host__ __device__ __forceinline__ modulus() {} + __host__ __device__ __forceinline__ modulus(const modulus&) {} + }; + + template struct negate : unary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a) const + { + return -a; + } + __host__ __device__ __forceinline__ negate() {} + __host__ __device__ __forceinline__ negate(const negate&) {} + }; + + // Comparison Operations + template struct equal_to : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a == b; + } + __host__ __device__ __forceinline__ equal_to() {} + __host__ __device__ __forceinline__ equal_to(const equal_to&) {} + }; + + template struct not_equal_to : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a != b; + } + __host__ __device__ __forceinline__ not_equal_to() {} + __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {} + }; + + template struct greater : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a > b; + } + __host__ __device__ __forceinline__ greater() {} + __host__ __device__ __forceinline__ greater(const greater&) {} + }; + + template struct less : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a < b; + } + __host__ __device__ __forceinline__ less() {} + __host__ __device__ __forceinline__ less(const less&) {} + }; + + template struct greater_equal : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a >= b; + } + __host__ __device__ __forceinline__ greater_equal() {} + __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {} + }; + + template struct less_equal : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a <= b; + } + __host__ __device__ __forceinline__ less_equal() {} + __host__ __device__ __forceinline__ less_equal(const less_equal&) {} + }; + + // Logical Operations + template struct logical_and : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a && b; + } + __host__ __device__ __forceinline__ logical_and() {} + __host__ __device__ __forceinline__ logical_and(const logical_and&) {} + }; + + template struct logical_or : binary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a || b; + } + __host__ __device__ __forceinline__ logical_or() {} + __host__ __device__ __forceinline__ logical_or(const logical_or&) {} + }; + + template struct logical_not : unary_function + { + __device__ __forceinline__ bool operator ()(typename TypeTraits::ParameterType a) const + { + return !a; + } + __host__ __device__ __forceinline__ logical_not() {} + __host__ __device__ __forceinline__ logical_not(const logical_not&) {} + }; + + // Bitwise Operations + template struct bit_and : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a & b; + } + __host__ __device__ __forceinline__ bit_and() {} + __host__ __device__ __forceinline__ bit_and(const bit_and&) {} + }; + + template struct bit_or : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a | b; + } + __host__ __device__ __forceinline__ bit_or() {} + __host__ __device__ __forceinline__ bit_or(const bit_or&) {} + }; + + template struct bit_xor : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType a, + typename TypeTraits::ParameterType b) const + { + return a ^ b; + } + __host__ __device__ __forceinline__ bit_xor() {} + __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {} + }; + + template struct bit_not : unary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType v) const + { + return ~v; + } + __host__ __device__ __forceinline__ bit_not() {} + __host__ __device__ __forceinline__ bit_not(const bit_not&) {} + }; + + // Generalized Identity Operations + template struct identity : unary_function + { + __device__ __forceinline__ typename TypeTraits::ParameterType operator()(typename TypeTraits::ParameterType x) const + { + return x; + } + __host__ __device__ __forceinline__ identity() {} + __host__ __device__ __forceinline__ identity(const identity&) {} + }; + + template struct project1st : binary_function + { + __device__ __forceinline__ typename TypeTraits::ParameterType operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return lhs; + } + __host__ __device__ __forceinline__ project1st() {} + __host__ __device__ __forceinline__ project1st(const project1st&) {} + }; + + template struct project2nd : binary_function + { + __device__ __forceinline__ typename TypeTraits::ParameterType operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return rhs; + } + __host__ __device__ __forceinline__ project2nd() {} + __host__ __device__ __forceinline__ project2nd(const project2nd&) {} + }; + + // Min/Max Operations + +#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \ + template <> struct name : binary_function \ + { \ + __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \ + __host__ __device__ __forceinline__ name() {}\ + __host__ __device__ __forceinline__ name(const name&) {}\ + }; + + template struct maximum : binary_function + { + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return max(lhs, rhs); + } + __host__ __device__ __forceinline__ maximum() {} + __host__ __device__ __forceinline__ maximum(const maximum&) {} + }; + + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax) + OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax) + + template struct minimum : binary_function + { + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType lhs, typename TypeTraits::ParameterType rhs) const + { + return min(lhs, rhs); + } + __host__ __device__ __forceinline__ minimum() {} + __host__ __device__ __forceinline__ minimum(const minimum&) {} + }; + + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin) + OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin) + +#undef OPENCV_CUDA_IMPLEMENT_MINMAX + + // Math functions + + template struct abs_func : unary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType x) const + { + return abs(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned char operator ()(unsigned char x) const + { + return x; + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ signed char operator ()(signed char x) const + { + return ::abs((int)x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ char operator ()(char x) const + { + return ::abs((int)x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned short operator ()(unsigned short x) const + { + return x; + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ short operator ()(short x) const + { + return ::abs((int)x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ unsigned int operator ()(unsigned int x) const + { + return x; + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ int operator ()(int x) const + { + return ::abs(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ float operator ()(float x) const + { + return ::fabsf(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + template <> struct abs_func : unary_function + { + __device__ __forceinline__ double operator ()(double x) const + { + return ::fabs(x); + } + + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} + }; + +#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \ + template struct name ## _func : unary_function \ + { \ + __device__ __forceinline__ float operator ()(typename TypeTraits::ParameterType v) const \ + { \ + return func ## f(v); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; \ + template <> struct name ## _func : unary_function \ + { \ + __device__ __forceinline__ double operator ()(double v) const \ + { \ + return func(v); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; + +#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \ + template struct name ## _func : binary_function \ + { \ + __device__ __forceinline__ float operator ()(typename TypeTraits::ParameterType v1, typename TypeTraits::ParameterType v2) const \ + { \ + return func ## f(v1, v2); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; \ + template <> struct name ## _func : binary_function \ + { \ + __device__ __forceinline__ double operator ()(double v1, double v2) const \ + { \ + return func(v1, v2); \ + } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + }; + + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh) + OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh) + + OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot) + OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2) + OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow) + + #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR + #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE + #undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR + + template struct hypot_sqr_func : binary_function + { + __device__ __forceinline__ T operator ()(typename TypeTraits::ParameterType src1, typename TypeTraits::ParameterType src2) const + { + return src1 * src1 + src2 * src2; + } + __host__ __device__ __forceinline__ hypot_sqr_func() {} + __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {} + }; + + // Saturate Cast Functor + template struct saturate_cast_func : unary_function + { + __device__ __forceinline__ D operator ()(typename TypeTraits::ParameterType v) const + { + return saturate_cast(v); + } + __host__ __device__ __forceinline__ saturate_cast_func() {} + __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {} + }; + + // Threshold Functors + template struct thresh_binary_func : unary_function + { + __host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src > thresh) * maxVal; + } + + __host__ __device__ __forceinline__ thresh_binary_func() {} + __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} + + T thresh; + T maxVal; + }; + + template struct thresh_binary_inv_func : unary_function + { + __host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src <= thresh) * maxVal; + } + + __host__ __device__ __forceinline__ thresh_binary_inv_func() {} + __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} + + T thresh; + T maxVal; + }; + + template struct thresh_trunc_func : unary_function + { + explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return minimum()(src, thresh); + } + + __host__ __device__ __forceinline__ thresh_trunc_func() {} + __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other) + : thresh(other.thresh) {} + + T thresh; + }; + + template struct thresh_to_zero_func : unary_function + { + explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src > thresh) * src; + } + + __host__ __device__ __forceinline__ thresh_to_zero_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other) + : thresh(other.thresh) {} + + T thresh; + }; + + template struct thresh_to_zero_inv_func : unary_function + { + explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);} + + __device__ __forceinline__ T operator()(typename TypeTraits::ParameterType src) const + { + return (src <= thresh) * src; + } + + __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other) + : thresh(other.thresh) {} + + T thresh; + }; + + // Function Result_Object Adaptors + template struct unary_negate : unary_function + { + explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {} + + __device__ __forceinline__ bool operator()(typename TypeTraits::ParameterType x) const + { + return !pred(x); + } + + __host__ __device__ __forceinline__ unary_negate() {} + __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {} + + Predicate pred; + }; + + template __host__ __device__ __forceinline__ unary_negate not1(const Predicate& pred) + { + return unary_negate(pred); + } + + template struct binary_negate : binary_function + { + explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {} + + __device__ __forceinline__ bool operator()(typename TypeTraits::ParameterType x, + typename TypeTraits::ParameterType y) const + { + return !pred(x,y); + } + + __host__ __device__ __forceinline__ binary_negate() {} + __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {} + + Predicate pred; + }; + + template __host__ __device__ __forceinline__ binary_negate not2(const BinaryPredicate& pred) + { + return binary_negate(pred); + } + + template struct binder1st : unary_function + { + __host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {} + + __device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits::ParameterType a) const + { + return op(arg1, a); + } + + __host__ __device__ __forceinline__ binder1st() {} + __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {} + + Op op; + typename Op::first_argument_type arg1; + }; + + template __host__ __device__ __forceinline__ binder1st bind1st(const Op& op, const T& x) + { + return binder1st(op, typename Op::first_argument_type(x)); + } + + template struct binder2nd : unary_function + { + __host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {} + + __forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits::ParameterType a) const + { + return op(a, arg2); + } + + __host__ __device__ __forceinline__ binder2nd() {} + __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {} + + Op op; + typename Op::second_argument_type arg2; + }; + + template __host__ __device__ __forceinline__ binder2nd bind2nd(const Op& op, const T& x) + { + return binder2nd(op, typename Op::second_argument_type(x)); + } + + // Functor Traits + template struct IsUnaryFunction + { + typedef char Yes; + struct No {Yes a[2];}; + + template static Yes check(unary_function); + static No check(...); + + static F makeF(); + + enum { value = (sizeof(check(makeF())) == sizeof(Yes)) }; + }; + + template struct IsBinaryFunction + { + typedef char Yes; + struct No {Yes a[2];}; + + template static Yes check(binary_function); + static No check(...); + + static F makeF(); + + enum { value = (sizeof(check(makeF())) == sizeof(Yes)) }; + }; + + namespace functional_detail + { + template struct UnOpShift { enum { shift = 1 }; }; + template struct UnOpShift { enum { shift = 4 }; }; + template struct UnOpShift { enum { shift = 2 }; }; + + template struct DefaultUnaryShift + { + enum { shift = UnOpShift::shift }; + }; + + template struct BinOpShift { enum { shift = 1 }; }; + template struct BinOpShift { enum { shift = 4 }; }; + template struct BinOpShift { enum { shift = 2 }; }; + + template struct DefaultBinaryShift + { + enum { shift = BinOpShift::shift }; + }; + + template ::value> struct ShiftDispatcher; + template struct ShiftDispatcher + { + enum { shift = DefaultUnaryShift::shift }; + }; + template struct ShiftDispatcher + { + enum { shift = DefaultBinaryShift::shift }; + }; + } + + template struct DefaultTransformShift + { + enum { shift = functional_detail::ShiftDispatcher::shift }; + }; + + template struct DefaultTransformFunctorTraits + { + enum { simple_block_dim_x = 16 }; + enum { simple_block_dim_y = 16 }; + + enum { smart_block_dim_x = 16 }; + enum { smart_block_dim_y = 16 }; + enum { smart_shift = DefaultTransformShift::shift }; + }; + + template struct TransformFunctorTraits : DefaultTransformFunctorTraits {}; + +#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \ + template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type > +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_FUNCTIONAL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/limits.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/limits.hpp new file mode 100644 index 0000000..7e15ed6 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/limits.hpp @@ -0,0 +1,128 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_LIMITS_HPP +#define OPENCV_CUDA_LIMITS_HPP + +#include +#include +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ +template struct numeric_limits; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static bool min() { return false; } + __device__ __forceinline__ static bool max() { return true; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static signed char min() { return SCHAR_MIN; } + __device__ __forceinline__ static signed char max() { return SCHAR_MAX; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned char min() { return 0; } + __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static short min() { return SHRT_MIN; } + __device__ __forceinline__ static short max() { return SHRT_MAX; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned short min() { return 0; } + __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static int min() { return INT_MIN; } + __device__ __forceinline__ static int max() { return INT_MAX; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned int min() { return 0; } + __device__ __forceinline__ static unsigned int max() { return UINT_MAX; } + static const bool is_signed = false; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static float min() { return FLT_MIN; } + __device__ __forceinline__ static float max() { return FLT_MAX; } + __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; } + static const bool is_signed = true; +}; + +template <> struct numeric_limits +{ + __device__ __forceinline__ static double min() { return DBL_MIN; } + __device__ __forceinline__ static double max() { return DBL_MAX; } + __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; } + static const bool is_signed = true; +}; +}}} // namespace cv { namespace cuda { namespace cudev { + +//! @endcond + +#endif // OPENCV_CUDA_LIMITS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/reduce.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/reduce.hpp new file mode 100644 index 0000000..5de3650 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/reduce.hpp @@ -0,0 +1,209 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_REDUCE_HPP +#define OPENCV_CUDA_REDUCE_HPP + +#ifndef THRUST_DEBUG // eliminate -Wundef warning +#define THRUST_DEBUG 0 +#endif + +#include +#include "detail/reduce.hpp" +#include "detail/reduce_key_val.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op) + { + reduce_detail::Dispatcher::reductor::template reduce(smem, val, tid, op); + } + template + __device__ __forceinline__ void reduce(const thrust::tuple& smem, + const thrust::tuple& val, + unsigned int tid, + const thrust::tuple& op) + { + reduce_detail::Dispatcher::reductor::template reduce< + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple&>(smem, val, tid, op); + } + + template + __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp) + { + reduce_key_val_detail::Dispatcher::reductor::template reduce(skeys, key, svals, val, tid, cmp); + } + template + __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, + const thrust::tuple& svals, + const thrust::tuple& val, + unsigned int tid, const Cmp& cmp) + { + reduce_key_val_detail::Dispatcher::reductor::template reduce&, + const thrust::tuple&, + const Cmp&>(skeys, key, svals, val, tid, cmp); + } + template + __device__ __forceinline__ void reduceKeyVal(const thrust::tuple& skeys, + const thrust::tuple& key, + const thrust::tuple& svals, + const thrust::tuple& val, + unsigned int tid, + const thrust::tuple& cmp) + { + reduce_key_val_detail::Dispatcher::reductor::template reduce< + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple&, + const thrust::tuple& + >(skeys, key, svals, val, tid, cmp); + } + + // smem_tuple + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0) + { + return thrust::make_tuple((volatile T0*) t0); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8); + } + + template + __device__ __forceinline__ + thrust::tuple + smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9) + { + return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9); + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_REDUCE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/saturate_cast.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/saturate_cast.hpp new file mode 100644 index 0000000..c3a3d1c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/saturate_cast.hpp @@ -0,0 +1,292 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_SATURATE_CAST_HPP +#define OPENCV_CUDA_SATURATE_CAST_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); } + template __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); } + + template<> __device__ __forceinline__ uchar saturate_cast(schar v) + { + uint res = 0; + int vi = v; + asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(short v) + { + uint res = 0; + asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(ushort v) + { + uint res = 0; + asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(int v) + { + uint res = 0; + asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(uint v) + { + uint res = 0; + asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(float v) + { + uint res = 0; + asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ uchar saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + uint res = 0; + asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ schar saturate_cast(uchar v) + { + uint res = 0; + uint vi = v; + asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(short v) + { + uint res = 0; + asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(ushort v) + { + uint res = 0; + asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(int v) + { + uint res = 0; + asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(uint v) + { + uint res = 0; + asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(float v) + { + uint res = 0; + asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ schar saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + uint res = 0; + asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ ushort saturate_cast(schar v) + { + ushort res = 0; + int vi = v; + asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(short v) + { + ushort res = 0; + asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(int v) + { + ushort res = 0; + asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(uint v) + { + ushort res = 0; + asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(float v) + { + ushort res = 0; + asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ ushort saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + ushort res = 0; + asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ short saturate_cast(ushort v) + { + short res = 0; + asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(int v) + { + short res = 0; + asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(uint v) + { + short res = 0; + asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(float v) + { + short res = 0; + asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v)); + return res; + } + template<> __device__ __forceinline__ short saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + short res = 0; + asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v)); + return res; + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ int saturate_cast(uint v) + { + int res = 0; + asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ int saturate_cast(float v) + { + return __float2int_rn(v); + } + template<> __device__ __forceinline__ int saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + return __double2int_rn(v); + #else + return saturate_cast((float)v); + #endif + } + + template<> __device__ __forceinline__ uint saturate_cast(schar v) + { + uint res = 0; + int vi = v; + asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi)); + return res; + } + template<> __device__ __forceinline__ uint saturate_cast(short v) + { + uint res = 0; + asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v)); + return res; + } + template<> __device__ __forceinline__ uint saturate_cast(int v) + { + uint res = 0; + asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v)); + return res; + } + template<> __device__ __forceinline__ uint saturate_cast(float v) + { + return __float2uint_rn(v); + } + template<> __device__ __forceinline__ uint saturate_cast(double v) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130 + return __double2uint_rn(v); + #else + return saturate_cast((float)v); + #endif + } +}}} + +//! @endcond + +#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/scan.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/scan.hpp new file mode 100644 index 0000000..e128fb0 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/scan.hpp @@ -0,0 +1,258 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_SCAN_HPP +#define OPENCV_CUDA_SCAN_HPP + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/utility.hpp" +#include "opencv2/core/cuda/warp.hpp" +#include "opencv2/core/cuda/warp_shuffle.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 }; + + template struct WarpScan + { + __device__ __forceinline__ WarpScan() {} + __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); } + + __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) + { + const unsigned int lane = idx & 31; + F op; + + if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]); + if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]); + if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]); + if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]); + if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]); + + if( Kind == INCLUSIVE ) + return ptr [idx]; + else + return (lane > 0) ? ptr [idx - 1] : 0; + } + + __device__ __forceinline__ unsigned int index(const unsigned int tid) + { + return tid; + } + + __device__ __forceinline__ void init(volatile T *ptr){} + + static const int warp_offset = 0; + + typedef WarpScan merge; + }; + + template struct WarpScanNoComp + { + __device__ __forceinline__ WarpScanNoComp() {} + __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); } + + __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx) + { + const unsigned int lane = threadIdx.x & 31; + F op; + + ptr [idx ] = op(ptr [idx - 1], ptr [idx]); + ptr [idx ] = op(ptr [idx - 2], ptr [idx]); + ptr [idx ] = op(ptr [idx - 4], ptr [idx]); + ptr [idx ] = op(ptr [idx - 8], ptr [idx]); + ptr [idx ] = op(ptr [idx - 16], ptr [idx]); + + if( Kind == INCLUSIVE ) + return ptr [idx]; + else + return (lane > 0) ? ptr [idx - 1] : 0; + } + + __device__ __forceinline__ unsigned int index(const unsigned int tid) + { + return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask); + } + + __device__ __forceinline__ void init(volatile T *ptr) + { + ptr[threadIdx.x] = 0; + } + + static const int warp_smem_stride = 32 + 16 + 1; + static const int warp_offset = 16; + static const int warp_log = 5; + static const int warp_mask = 31; + + typedef WarpScanNoComp merge; + }; + + template struct BlockScan + { + __device__ __forceinline__ BlockScan() {} + __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); } + + __device__ __forceinline__ T operator()(volatile T *ptr) + { + const unsigned int tid = threadIdx.x; + const unsigned int lane = tid & warp_mask; + const unsigned int warp = tid >> warp_log; + + Sc scan; + typename Sc::merge merge_scan; + const unsigned int idx = scan.index(tid); + + T val = scan(ptr, idx); + __syncthreads (); + + if( warp == 0) + scan.init(ptr); + __syncthreads (); + + if( lane == 31 ) + ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx]; + __syncthreads (); + + if( warp == 0 ) + merge_scan(ptr, idx); + __syncthreads(); + + if ( warp > 0) + val = ptr [scan.warp_offset + warp - 1] + val; + __syncthreads (); + + ptr[idx] = val; + __syncthreads (); + + return val ; + } + + static const int warp_log = 5; + static const int warp_mask = 31; + }; + + template + __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid) + { + #if __CUDA_ARCH__ >= 300 + const unsigned int laneId = cv::cuda::device::Warp::laneId(); + + // scan on shuffl functions + #pragma unroll + for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2) + { + const T n = cv::cuda::device::shfl_up(idata, i); + if (laneId >= i) + idata += n; + } + + return idata; + #else + unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1)); + s_Data[pos] = 0; + pos += OPENCV_CUDA_WARP_SIZE; + s_Data[pos] = idata; + + s_Data[pos] += s_Data[pos - 1]; + s_Data[pos] += s_Data[pos - 2]; + s_Data[pos] += s_Data[pos - 4]; + s_Data[pos] += s_Data[pos - 8]; + s_Data[pos] += s_Data[pos - 16]; + + return s_Data[pos]; + #endif + } + + template + __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid) + { + return warpScanInclusive(idata, s_Data, tid) - idata; + } + + template + __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid) + { + if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE) + { + //Bottom-level inclusive warp scan + T warpResult = warpScanInclusive(idata, s_Data, tid); + + //Save top elements of each warp for exclusive warp scan + //sync to wait for warp scans to complete (because s_Data is being overwritten) + __syncthreads(); + if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1)) + { + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult; + } + + //wait for warp scans to complete + __syncthreads(); + + if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) ) + { + //grab top warp elements + T val = s_Data[tid]; + //calculate exclusive scan and write back to shared memory + s_Data[tid] = warpScanExclusive(val, s_Data, tid); + } + + //return updated warp scans with exclusive scan results + __syncthreads(); + + return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE]; + } + else + { + return warpScanInclusive(idata, s_Data, tid); + } + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_SCAN_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/simd_functions.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/simd_functions.hpp new file mode 100644 index 0000000..3d8c2e0 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/simd_functions.hpp @@ -0,0 +1,869 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* + * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of NVIDIA Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP +#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + // 2 + + static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = a ^ b; // sum bits + r = a + b; // actual sum + s = s ^ r; // determine carry-ins for each bit position + s = s & 0x00010000; // carry-in to high word (= carry-out from low word) + r = r - s; // subtract out carry-out from low word + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = a ^ b; // sum bits + r = a - b; // actual sum + s = s ^ r; // determine carry-ins for each bit position + s = s & 0x00010000; // borrow to high word + r = r + s; // compensate for borrow from low word + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t, u, v; + s = a & 0x0000ffff; // extract low halfword + r = b & 0x0000ffff; // extract low halfword + u = ::max(r, s); // maximum of low halfwords + v = ::min(r, s); // minimum of low halfwords + s = a & 0xffff0000; // extract high halfword + r = b & 0xffff0000; // extract high halfword + t = ::max(r, s); // maximum of high halfwords + s = ::min(r, s); // minimum of high halfwords + r = u | t; // maximum of both halfwords + s = v | s; // minimum of both halfwords + r = r - s; // |a - b| = max(a,b) - min(a,b); + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b) + { + unsigned int r, s; + + // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==> + // (a + b) / 2 = (a & b) + ((a ^ b) >> 1) + s = a ^ b; + r = a & b; + s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries + s = s >> 1; + s = r + s; + + return s; + } + + static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==> + // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1) + unsigned int s; + s = a ^ b; + r = a | b; + s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries + s = s >> 1; + r = r - s; + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + r = r ^ c; // extract msbs, msb = 1 if r < 0x8000 + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r & ~c; // msb = 1, if r was 0x0000 + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vseteq2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + r = r ^ c; // extract msbs, msb = 1 if r < 0x8000 + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r & ~c; // msb = 1, if r was 0x0000 + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetge2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80008000; // msbs = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetgt2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80008000; // msbs = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetle2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetlt2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80008000; // msb = carry-outs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r | c; // msb = 1, if r was not 0x0000 + c = c & 0x80008000; // extract msbs + r = c >> 15; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetne2(a, b); + c = r << 16; // convert bool + r = c - r; // into mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + r = a ^ b; // 0x0000 if a == b + c = r | 0x80008000; // set msbs, to catch carry out + c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000 + c = r | c; // msb = 1, if r was not 0x0000 + c = c & 0x80008000; // extract msbs + r = c >> 15; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t, u; + r = a & 0x0000ffff; // extract low halfword + s = b & 0x0000ffff; // extract low halfword + t = ::max(r, s); // maximum of low halfwords + r = a & 0xffff0000; // extract high halfword + s = b & 0xffff0000; // extract high halfword + u = ::max(r, s); // maximum of high halfwords + r = t | u; // combine halfword maximums + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t, u; + r = a & 0x0000ffff; // extract low halfword + s = b & 0x0000ffff; // extract low halfword + t = ::min(r, s); // minimum of low halfwords + r = a & 0xffff0000; // extract high halfword + s = b & 0xffff0000; // extract high halfword + u = ::min(r, s); // minimum of high halfwords + r = t | u; // combine halfword minimums + #endif + + return r; + } + + // 4 + + static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t; + s = a ^ b; // sum bits + r = a & 0x7f7f7f7f; // clear msbs + t = b & 0x7f7f7f7f; // clear msbs + s = s & 0x80808080; // msb sum bits + r = r + t; // add without msbs, record carry-out in msbs + r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out + #endif /* __CUDA_ARCH__ >= 300 */ + + return r; + } + + static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s, t; + s = a ^ ~b; // inverted sum bits + r = a | 0x80808080; // set msbs + t = b & 0x7f7f7f7f; // clear msbs + s = s & 0x80808080; // inverted msb sum bits + r = r - t; // subtract w/o msbs, record inverted borrows in msb + r = r ^ s; // combine inverted msb sum bits and borrows + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b) + { + unsigned int r, s; + + // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==> + // (a + b) / 2 = (a & b) + ((a ^ b) >> 1) + s = a ^ b; + r = a & b; + s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries + s = s >> 1; + s = r + s; + + return s; + } + + static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==> + // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1) + unsigned int c; + c = a ^ b; + r = a | b; + c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries + c = c >> 1; + r = r - c; + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x00 if a == b + c = r | 0x80808080; // set msbs, to catch carry out + r = r ^ c; // extract msbs, msb = 1 if r < 0x80 + c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 + c = r & ~c; // msb = 1, if r was 0x00 + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b) + { + unsigned int r, t; + + #if __CUDA_ARCH__ >= 300 + r = vseteq4(a, b); + t = r << 8; // convert bool + r = t - r; // to mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + t = a ^ b; // 0x00 if a == b + r = t | 0x80808080; // set msbs, to catch carry out + t = t ^ r; // extract msbs, msb = 1 if t < 0x80 + r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80 + r = t & ~r; // msb = 1, if t was 0x00 + t = r >> 7; // build mask + t = r - t; // from + r = t | r; // msbs + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetle4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2 + c = c & 0x80808080; // msbs = carry-outs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetlt4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + asm("not.b32 %0, %0;" : "+r"(a)); + c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down] + c = c & 0x80808080; // msbs = carry-outs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b) + { + unsigned int r, s; + + #if __CUDA_ARCH__ >= 300 + r = vsetge4(a, b); + s = r << 8; // convert bool + r = s - r; // to mask + #else + asm ("not.b32 %0,%0;" : "+r"(b)); + r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2 + r = r & 0x80808080; // msb = carry-outs + s = r >> 7; // build mask + s = r - s; // from + r = s | r; // msbs + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int c; + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetgt4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + asm("not.b32 %0, %0;" : "+r"(b)); + c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down] + c = c & 0x80808080; // msb = carry-outs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + unsigned int c; + r = a ^ b; // 0x00 if a == b + c = r | 0x80808080; // set msbs, to catch carry out + c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 + c = r | c; // msb = 1, if r was not 0x00 + c = c & 0x80808080; // extract msbs + r = c >> 7; // convert to bool + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b) + { + unsigned int r, c; + + #if __CUDA_ARCH__ >= 300 + r = vsetne4(a, b); + c = r << 8; // convert bool + r = c - r; // to mask + #else + // inspired by Alan Mycroft's null-byte detection algorithm: + // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080)) + r = a ^ b; // 0x00 if a == b + c = r | 0x80808080; // set msbs, to catch carry out + c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80 + c = r | c; // msb = 1, if r was not 0x00 + c = c & 0x80808080; // extract msbs + r = c >> 7; // convert + r = c - r; // msbs to + r = c | r; // mask + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = vcmpge4(a, b); // mask = 0xff if a >= b + r = a ^ b; // + s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b) + r = s ^ r; // select a when b >= a, else select b => min(a,b) + r = s - r; // |a - b| = max(a,b) - min(a,b); + #endif + + return r; + } + + static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = vcmpge4(a, b); // mask = 0xff if a >= b + r = a & s; // select a when b >= a + s = b & ~s; // select b when b < a + r = r | s; // combine byte selections + #endif + + return r; // byte-wise unsigned maximum + } + + static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b) + { + unsigned int r = 0; + + #if __CUDA_ARCH__ >= 300 + asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #elif __CUDA_ARCH__ >= 200 + asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r)); + #else + unsigned int s; + s = vcmpge4(b, a); // mask = 0xff if a >= b + r = a & s; // select a when b >= a + s = b & ~s; // select b when b < a + r = r | s; // combine byte selections + #endif + + return r; + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/transform.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/transform.hpp new file mode 100644 index 0000000..42aa6ea --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/transform.hpp @@ -0,0 +1,75 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TRANSFORM_HPP +#define OPENCV_CUDA_TRANSFORM_HPP + +#include "common.hpp" +#include "utility.hpp" +#include "detail/transform_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + static inline void transform(PtrStepSz src, PtrStepSz dst, UnOp op, const Mask& mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + transform_detail::TransformDispatcher::cn == 1 && VecTraits::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream); + } + + template + static inline void transform(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, BinOp op, const Mask& mask, cudaStream_t stream) + { + typedef TransformFunctorTraits ft; + transform_detail::TransformDispatcher::cn == 1 && VecTraits::cn == 1 && VecTraits::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream); + } +}}} + +//! @endcond + +#endif // OPENCV_CUDA_TRANSFORM_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/type_traits.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/type_traits.hpp new file mode 100644 index 0000000..8b7a3fd --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/type_traits.hpp @@ -0,0 +1,90 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP +#define OPENCV_CUDA_TYPE_TRAITS_HPP + +#include "detail/type_traits_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct IsSimpleParameter + { + enum {value = type_traits_detail::IsIntegral::value || type_traits_detail::IsFloat::value || + type_traits_detail::PointerTraits::type>::value}; + }; + + template struct TypeTraits + { + typedef typename type_traits_detail::UnConst::type NonConstType; + typedef typename type_traits_detail::UnVolatile::type NonVolatileType; + typedef typename type_traits_detail::UnVolatile::type>::type UnqualifiedType; + typedef typename type_traits_detail::PointerTraits::type PointeeType; + typedef typename type_traits_detail::ReferenceTraits::type ReferredType; + + enum { isConst = type_traits_detail::UnConst::value }; + enum { isVolatile = type_traits_detail::UnVolatile::value }; + + enum { isReference = type_traits_detail::ReferenceTraits::value }; + enum { isPointer = type_traits_detail::PointerTraits::type>::value }; + + enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral::value }; + enum { isSignedInt = type_traits_detail::IsSignedIntergral::value }; + enum { isIntegral = type_traits_detail::IsIntegral::value }; + enum { isFloat = type_traits_detail::IsFloat::value }; + enum { isArith = isIntegral || isFloat }; + enum { isVec = type_traits_detail::IsVec::value }; + + typedef typename type_traits_detail::Select::value, + T, typename type_traits_detail::AddParameterType::type>::type ParameterType; + }; +}}} + +//! @endcond + +#endif // OPENCV_CUDA_TYPE_TRAITS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/utility.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/utility.hpp new file mode 100644 index 0000000..7f5db48 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/utility.hpp @@ -0,0 +1,230 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_UTILITY_HPP +#define OPENCV_CUDA_UTILITY_HPP + +#include "saturate_cast.hpp" +#include "datamov_utils.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct CV_EXPORTS ThrustAllocator + { + typedef uchar value_type; + virtual ~ThrustAllocator(); + virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0; + virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0; + static ThrustAllocator& getAllocator(); + static void setAllocator(ThrustAllocator* allocator); + }; + #define OPENCV_CUDA_LOG_WARP_SIZE (5) + #define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE) + #define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla + #define OPENCV_CUDA_MEM_BANKS (1 << OPENCV_CUDA_LOG_MEM_BANKS) + + /////////////////////////////////////////////////////////////////////////////// + // swap + + template void __device__ __host__ __forceinline__ swap(T& a, T& b) + { + const T temp = a; + a = b; + b = temp; + } + + /////////////////////////////////////////////////////////////////////////////// + // Mask Reader + + struct SingleMask + { + explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {} + __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){} + + __device__ __forceinline__ bool operator()(int y, int x) const + { + return mask.ptr(y)[x] != 0; + } + + PtrStepb mask; + }; + + struct SingleMaskChannels + { + __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_) + : mask(mask_), channels(channels_) {} + __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_) + :mask(mask_.mask), channels(mask_.channels){} + + __device__ __forceinline__ bool operator()(int y, int x) const + { + return mask.ptr(y)[x / channels] != 0; + } + + PtrStepb mask; + int channels; + }; + + struct MaskCollection + { + explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_) + : maskCollection(maskCollection_) {} + + __device__ __forceinline__ MaskCollection(const MaskCollection& masks_) + : maskCollection(masks_.maskCollection), curMask(masks_.curMask){} + + __device__ __forceinline__ void next() + { + curMask = *maskCollection++; + } + __device__ __forceinline__ void setMask(int z) + { + curMask = maskCollection[z]; + } + + __device__ __forceinline__ bool operator()(int y, int x) const + { + uchar val; + return curMask.data == 0 || (ForceGlob::Load(curMask.ptr(y), x, val), (val != 0)); + } + + const PtrStepb* maskCollection; + PtrStepb curMask; + }; + + struct WithOutMask + { + __host__ __device__ __forceinline__ WithOutMask(){} + __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){} + + __device__ __forceinline__ void next() const + { + } + __device__ __forceinline__ void setMask(int) const + { + } + + __device__ __forceinline__ bool operator()(int, int) const + { + return true; + } + + __device__ __forceinline__ bool operator()(int, int, int) const + { + return true; + } + + static __device__ __forceinline__ bool check(int, int) + { + return true; + } + + static __device__ __forceinline__ bool check(int, int, int) + { + return true; + } + }; + + /////////////////////////////////////////////////////////////////////////////// + // Solve linear system + + // solve 2x2 linear system Ax=b + template __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2]) + { + T det = A[0][0] * A[1][1] - A[1][0] * A[0][1]; + + if (det != 0) + { + double invdet = 1.0 / det; + + x[0] = saturate_cast(invdet * (b[0] * A[1][1] - b[1] * A[0][1])); + + x[1] = saturate_cast(invdet * (A[0][0] * b[1] - A[1][0] * b[0])); + + return true; + } + + return false; + } + + // solve 3x3 linear system Ax=b + template __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3]) + { + T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) + - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) + + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]); + + if (det != 0) + { + double invdet = 1.0 / det; + + x[0] = saturate_cast(invdet * + (b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) - + A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) + + A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] ))); + + x[1] = saturate_cast(invdet * + (A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) - + b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) + + A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0]))); + + x[2] = saturate_cast(invdet * + (A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) - + A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) + + b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]))); + + return true; + } + + return false; + } +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_UTILITY_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_distance.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_distance.hpp new file mode 100644 index 0000000..ef6e510 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_distance.hpp @@ -0,0 +1,232 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP +#define OPENCV_CUDA_VEC_DISTANCE_HPP + +#include "reduce.hpp" +#include "functional.hpp" +#include "detail/vec_distance_detail.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct L1Dist + { + typedef int value_type; + typedef int result_type; + + __device__ __forceinline__ L1Dist() : mySum(0) {} + + __device__ __forceinline__ void reduceIter(int val1, int val2) + { + mySum = __sad(val1, val2, mySum); + } + + template __device__ __forceinline__ void reduceAll(int* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator int() const + { + return mySum; + } + + int mySum; + }; + template <> struct L1Dist + { + typedef float value_type; + typedef float result_type; + + __device__ __forceinline__ L1Dist() : mySum(0.0f) {} + + __device__ __forceinline__ void reduceIter(float val1, float val2) + { + mySum += ::fabs(val1 - val2); + } + + template __device__ __forceinline__ void reduceAll(float* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator float() const + { + return mySum; + } + + float mySum; + }; + + struct L2Dist + { + typedef float value_type; + typedef float result_type; + + __device__ __forceinline__ L2Dist() : mySum(0.0f) {} + + __device__ __forceinline__ void reduceIter(float val1, float val2) + { + float reg = val1 - val2; + mySum += reg * reg; + } + + template __device__ __forceinline__ void reduceAll(float* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator float() const + { + return sqrtf(mySum); + } + + float mySum; + }; + + struct HammingDist + { + typedef int value_type; + typedef int result_type; + + __device__ __forceinline__ HammingDist() : mySum(0) {} + + __device__ __forceinline__ void reduceIter(int val1, int val2) + { + mySum += __popc(val1 ^ val2); + } + + template __device__ __forceinline__ void reduceAll(int* smem, int tid) + { + reduce(smem, mySum, tid, plus()); + } + + __device__ __forceinline__ operator int() const + { + return mySum; + } + + int mySum; + }; + + // calc distance between two vectors in global memory + template + __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) + { + for (int i = tid; i < len; i += THREAD_DIM) + { + T1 val1; + ForceGlob::Load(vec1, i, val1); + + T2 val2; + ForceGlob::Load(vec2, i, val2); + + dist.reduceIter(val1, val2); + } + + dist.reduceAll(smem, tid); + } + + // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory + template + __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) + { + vec_distance_detail::VecDiffCachedCalculator::calc(vecCached, vecGlob, len, dist, tid); + + dist.reduceAll(smem, tid); + } + + // calc distance between two vectors in global memory + template struct VecDiffGlobal + { + explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0) + { + vec1 = vec1_; + } + + template + __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const + { + calcVecDiffGlobal(vec1, vec2, len, dist, smem, tid); + } + + const T1* vec1; + }; + + // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory + template struct VecDiffCachedRegister + { + template __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) + { + if (glob_tid < len) + smem[glob_tid] = vec1[glob_tid]; + __syncthreads(); + + U* vec1ValsPtr = vec1Vals; + + #pragma unroll + for (int i = tid; i < MAX_LEN; i += THREAD_DIM) + *vec1ValsPtr++ = smem[i]; + + __syncthreads(); + } + + template + __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const + { + calcVecDiffCached(vec1Vals, vec2, len, dist, smem, tid); + } + + U vec1Vals[MAX_LEN / THREAD_DIM]; + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_VEC_DISTANCE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_math.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_math.hpp new file mode 100644 index 0000000..80b1303 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_math.hpp @@ -0,0 +1,923 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VECMATH_HPP +#define OPENCV_CUDA_VECMATH_HPP + +#include "vec_traits.hpp" +#include "saturate_cast.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + +// saturate_cast + +namespace vec_math_detail +{ + template struct SatCastHelper; + template struct SatCastHelper<1, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x)); + } + }; + template struct SatCastHelper<2, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); + } + }; + template struct SatCastHelper<3, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); + } + }; + template struct SatCastHelper<4, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) + { + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); + } + }; + + template static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v) + { + return SatCastHelper::cn, VecD>::cast(v); + } +} + +template static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper(v);} + +// unary operators + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \ + { \ + return VecTraits::make(op (a.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z), op (a.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP + +// unary functions + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \ + { \ + return VecTraits::make(func (a.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \ + { \ + return VecTraits::make(func (a.x), func (a.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \ + { \ + return VecTraits::make(func (a.x), func (a.y), func (a.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \ + { \ + return VecTraits::make(func (a.x), func (a.y), func (a.z), func (a.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC + +// binary operators (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(a.x op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP + +// binary operators (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s); \ + } \ + __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(s op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s, a.w op s); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z, s op b.w); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP + +// binary function (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func (a.x, b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC + +// binary function (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC + +}}} // namespace cv { namespace cuda { namespace device + +//! @endcond + +#endif // OPENCV_CUDA_VECMATH_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_traits.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_traits.hpp new file mode 100644 index 0000000..b5ff281 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/vec_traits.hpp @@ -0,0 +1,288 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_VEC_TRAITS_HPP +#define OPENCV_CUDA_VEC_TRAITS_HPP + +#include "common.hpp" + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template struct TypeVec; + + struct __align__(8) uchar8 + { + uchar a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7) + { + uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(8) char8 + { + schar a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) + { + char8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(16) ushort8 + { + ushort a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7) + { + ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(16) short8 + { + short a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7) + { + short8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(32) uint8 + { + uint a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7) + { + uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(32) int8 + { + int a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7) + { + int8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct __align__(32) float8 + { + float a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7) + { + float8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + struct double8 + { + double a0, a1, a2, a3, a4, a5, a6, a7; + }; + static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7) + { + double8 val = {a0, a1, a2, a3, a4, a5, a6, a7}; + return val; + } + +#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \ + template<> struct TypeVec { typedef type vec_type; }; \ + template<> struct TypeVec { typedef type ## 1 vec_type; }; \ + template<> struct TypeVec { typedef type ## 2 vec_type; }; \ + template<> struct TypeVec { typedef type ## 2 vec_type; }; \ + template<> struct TypeVec { typedef type ## 3 vec_type; }; \ + template<> struct TypeVec { typedef type ## 3 vec_type; }; \ + template<> struct TypeVec { typedef type ## 4 vec_type; }; \ + template<> struct TypeVec { typedef type ## 4 vec_type; }; \ + template<> struct TypeVec { typedef type ## 8 vec_type; }; \ + template<> struct TypeVec { typedef type ## 8 vec_type; }; + + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float) + OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double) + + #undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC + + template<> struct TypeVec { typedef schar vec_type; }; + template<> struct TypeVec { typedef char2 vec_type; }; + template<> struct TypeVec { typedef char3 vec_type; }; + template<> struct TypeVec { typedef char4 vec_type; }; + template<> struct TypeVec { typedef char8 vec_type; }; + + template<> struct TypeVec { typedef uchar vec_type; }; + template<> struct TypeVec { typedef uchar2 vec_type; }; + template<> struct TypeVec { typedef uchar3 vec_type; }; + template<> struct TypeVec { typedef uchar4 vec_type; }; + template<> struct TypeVec { typedef uchar8 vec_type; }; + + template struct VecTraits; + +#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=1}; \ + static __device__ __host__ __forceinline__ type all(type v) {return v;} \ + static __device__ __host__ __forceinline__ type make(type x) {return x;} \ + static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=1}; \ + static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \ + static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \ + static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=2}; \ + static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \ + static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \ + static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=3}; \ + static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \ + static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \ + static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=4}; \ + static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \ + static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \ + static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \ + }; \ + template<> struct VecTraits \ + { \ + typedef type elem_type; \ + enum {cn=8}; \ + static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \ + static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \ + static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \ + }; + + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float) + OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double) + + #undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS + + template<> struct VecTraits + { + typedef char elem_type; + enum {cn=1}; + static __device__ __host__ __forceinline__ char all(char v) {return v;} + static __device__ __host__ __forceinline__ char make(char x) {return x;} + static __device__ __host__ __forceinline__ char make(const char* x) {return *x;} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=1}; + static __device__ __host__ __forceinline__ schar all(schar v) {return v;} + static __device__ __host__ __forceinline__ schar make(schar x) {return x;} + static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=1}; + static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);} + static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);} + static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=2}; + static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);} + static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);} + static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=3}; + static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);} + static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);} + static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=4}; + static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);} + static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);} + static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);} + }; + template<> struct VecTraits + { + typedef schar elem_type; + enum {cn=8}; + static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);} + static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);} + static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif // OPENCV_CUDA_VEC_TRAITS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp.hpp new file mode 100644 index 0000000..8af7e6a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp.hpp @@ -0,0 +1,139 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_DEVICE_WARP_HPP +#define OPENCV_CUDA_DEVICE_WARP_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + struct Warp + { + enum + { + LOG_WARP_SIZE = 5, + WARP_SIZE = 1 << LOG_WARP_SIZE, + STRIDE = WARP_SIZE + }; + + /** \brief Returns the warp lane ID of the calling thread. */ + static __device__ __forceinline__ unsigned int laneId() + { + unsigned int ret; + asm("mov.u32 %0, %%laneid;" : "=r"(ret) ); + return ret; + } + + template + static __device__ __forceinline__ void fill(It beg, It end, const T& value) + { + for(It t = beg + laneId(); t < end; t += STRIDE) + *t = value; + } + + template + static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out) + { + for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) + *out = *t; + return out; + } + + template + static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op) + { + for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE) + *out = op(*t); + return out; + } + + template + static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) + { + unsigned int lane = laneId(); + + InIt1 t1 = beg1 + lane; + InIt2 t2 = beg2 + lane; + for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE) + *out = op(*t1, *t2); + return out; + } + + template + static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op) + { + const unsigned int lane = laneId(); + + if (lane < 16) + { + T partial = ptr[lane]; + + ptr[lane] = partial = op(partial, ptr[lane + 16]); + ptr[lane] = partial = op(partial, ptr[lane + 8]); + ptr[lane] = partial = op(partial, ptr[lane + 4]); + ptr[lane] = partial = op(partial, ptr[lane + 2]); + ptr[lane] = partial = op(partial, ptr[lane + 1]); + } + + return *ptr; + } + + template + static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) + { + unsigned int lane = laneId(); + value += lane; + + for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE) + *t = value; + } + }; +}}} // namespace cv { namespace cuda { namespace cudev + +//! @endcond + +#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp_reduce.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp_reduce.hpp new file mode 100644 index 0000000..530303d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp_reduce.hpp @@ -0,0 +1,76 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__ +#define OPENCV_CUDA_WARP_REDUCE_HPP__ + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ + template + __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x) + { + const unsigned int lane = tid & 31; // index of thread in warp (0..31) + + if (lane < 16) + { + T partial = ptr[tid]; + + ptr[tid] = partial = partial + ptr[tid + 16]; + ptr[tid] = partial = partial + ptr[tid + 8]; + ptr[tid] = partial = partial + ptr[tid + 4]; + ptr[tid] = partial = partial + ptr[tid + 2]; + ptr[tid] = partial = partial + ptr[tid + 1]; + } + + return ptr[tid - lane]; + } +}}} // namespace cv { namespace cuda { namespace cudev { + +//! @endcond + +#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp_shuffle.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp_shuffle.hpp new file mode 100644 index 0000000..0da54ae --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda/warp_shuffle.hpp @@ -0,0 +1,162 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP +#define OPENCV_CUDA_WARP_SHUFFLE_HPP + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +namespace cv { namespace cuda { namespace device +{ +#if __CUDACC_VER_MAJOR__ >= 9 +# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z) +# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z) +# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z) +#endif + template + __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return __shfl(val, srcLane, width); + #else + return T(); + #endif + } + __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return (unsigned int) __shfl((int) val, srcLane, width); + #else + return 0; + #endif + } + __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + int lo = __double2loint(val); + int hi = __double2hiint(val); + + lo = __shfl(lo, srcLane, width); + hi = __shfl(hi, srcLane, width); + + return __hiloint2double(hi, lo); + #else + return 0.0; + #endif + } + + template + __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return __shfl_down(val, delta, width); + #else + return T(); + #endif + } + __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return (unsigned int) __shfl_down((int) val, delta, width); + #else + return 0; + #endif + } + __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + int lo = __double2loint(val); + int hi = __double2hiint(val); + + lo = __shfl_down(lo, delta, width); + hi = __shfl_down(hi, delta, width); + + return __hiloint2double(hi, lo); + #else + return 0.0; + #endif + } + + template + __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return __shfl_up(val, delta, width); + #else + return T(); + #endif + } + __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + return (unsigned int) __shfl_up((int) val, delta, width); + #else + return 0; + #endif + } + __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize) + { + #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 + int lo = __double2loint(val); + int hi = __double2hiint(val); + + lo = __shfl_up(lo, delta, width); + hi = __shfl_up(hi, delta, width); + + return __hiloint2double(hi, lo); + #else + return 0.0; + #endif + } +}}} + +# undef __shfl +# undef __shfl_up +# undef __shfl_down + +//! @endcond + +#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda_stream_accessor.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda_stream_accessor.hpp new file mode 100644 index 0000000..deaf356 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda_stream_accessor.hpp @@ -0,0 +1,86 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP +#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP + +#ifndef __cplusplus +# error cuda_stream_accessor.hpp header must be compiled as C++ +#endif + +/** @file cuda_stream_accessor.hpp + * This is only header file that depends on CUDA Runtime API. All other headers are independent. + */ + +#include +#include "opencv2/core/cuda.hpp" + +namespace cv +{ + namespace cuda + { + +//! @addtogroup cudacore_struct +//! @{ + + /** @brief Class that enables getting cudaStream_t from cuda::Stream + */ + struct StreamAccessor + { + CV_EXPORTS static cudaStream_t getStream(const Stream& stream); + CV_EXPORTS static Stream wrapStream(cudaStream_t stream); + }; + + /** @brief Class that enables getting cudaEvent_t from cuda::Event + */ + struct EventAccessor + { + CV_EXPORTS static cudaEvent_t getEvent(const Event& event); + CV_EXPORTS static Event wrapEvent(cudaEvent_t event); + }; + +//! @} + + } +} + +#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda_types.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda_types.hpp new file mode 100644 index 0000000..b33f061 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cuda_types.hpp @@ -0,0 +1,144 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CUDA_TYPES_HPP +#define OPENCV_CORE_CUDA_TYPES_HPP + +#ifndef __cplusplus +# error cuda_types.hpp header must be compiled as C++ +#endif + +#if defined(__OPENCV_BUILD) && defined(__clang__) +#pragma clang diagnostic ignored "-Winconsistent-missing-override" +#endif +#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 +#pragma GCC diagnostic ignored "-Wsuggest-override" +#endif + +/** @file + * @deprecated Use @ref cudev instead. + */ + +//! @cond IGNORED + +#ifdef __CUDACC__ + #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__ +#else + #define __CV_CUDA_HOST_DEVICE__ +#endif + +namespace cv +{ + namespace cuda + { + + // Simple lightweight structures that encapsulates information about an image on device. + // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile + + template struct DevPtr + { + typedef T elem_type; + typedef int index_type; + + enum { elem_size = sizeof(elem_type) }; + + T* data; + + __CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {} + __CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {} + + __CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; } + __CV_CUDA_HOST_DEVICE__ operator T*() { return data; } + __CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; } + }; + + template struct PtrSz : public DevPtr + { + __CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {} + __CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr(data_), size(size_) {} + + size_t size; + }; + + template struct PtrStep : public DevPtr + { + __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {} + __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr(data_), step(step_) {} + + size_t step; + + __CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)(((DevPtr*)this)->data) + y * step); } + __CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)(((DevPtr*)this)->data) + y * step); } + + __CV_CUDA_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } + __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } + }; + + template struct PtrStepSz : public PtrStep + { + __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {} + __CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_) + : PtrStep(data_, step_), cols(cols_), rows(rows_) {} + + template + explicit PtrStepSz(const PtrStepSz& d) : PtrStep((T*)d.data, d.step), cols(d.cols), rows(d.rows){} + + int cols; + int rows; + }; + + typedef PtrStepSz PtrStepSzb; + typedef PtrStepSz PtrStepSzus; + typedef PtrStepSz PtrStepSzf; + typedef PtrStepSz PtrStepSzi; + + typedef PtrStep PtrStepb; + typedef PtrStep PtrStepus; + typedef PtrStep PtrStepf; + typedef PtrStep PtrStepi; + + } +} + +//! @endcond + +#endif /* OPENCV_CORE_CUDA_TYPES_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cv_cpu_dispatch.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cv_cpu_dispatch.h new file mode 100644 index 0000000..ab5a67d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cv_cpu_dispatch.h @@ -0,0 +1,368 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#if defined __OPENCV_BUILD \ + +#include "cv_cpu_config.h" +#include "cv_cpu_helper.h" + +#ifdef CV_CPU_DISPATCH_MODE +#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#else +#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#define CV_CPU_BASELINE_MODE 1 +#endif + + +#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */ +#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__)) +#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros + + +#if defined CV_ENABLE_INTRINSICS \ + && !defined CV_DISABLE_OPTIMIZATION \ + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ + +#ifdef CV_CPU_COMPILE_SSE2 +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#endif +#ifdef CV_CPU_COMPILE_SSE3 +# include +# define CV_SSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSSE3 +# include +# define CV_SSSE3 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_1 +# include +# define CV_SSE4_1 1 +#endif +#ifdef CV_CPU_COMPILE_SSE4_2 +# include +# define CV_SSE4_2 1 +#endif +#ifdef CV_CPU_COMPILE_POPCNT +# ifdef _MSC_VER +# include +# if defined(_M_X64) +# define CV_POPCNT_U64 (int)_mm_popcnt_u64 +# endif +# define CV_POPCNT_U32 _mm_popcnt_u32 +# else +# include +# if defined(__x86_64__) +# define CV_POPCNT_U64 __builtin_popcountll +# endif +# define CV_POPCNT_U32 __builtin_popcount +# endif +# define CV_POPCNT 1 +#endif +#ifdef CV_CPU_COMPILE_AVX +# include +# define CV_AVX 1 +#endif +#ifdef CV_CPU_COMPILE_FP16 +# if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) +# include +# else +# include +# endif +# define CV_FP16 1 +#endif +#ifdef CV_CPU_COMPILE_AVX2 +# include +# define CV_AVX2 1 +#endif +#ifdef CV_CPU_COMPILE_AVX_512F +# include +# define CV_AVX_512F 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_COMMON +# define CV_AVX512_COMMON 1 +# define CV_AVX_512CD 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNL +# define CV_AVX512_KNL 1 +# define CV_AVX_512ER 1 +# define CV_AVX_512PF 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNM +# define CV_AVX512_KNM 1 +# define CV_AVX_5124FMAPS 1 +# define CV_AVX_5124VNNIW 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_SKX +# define CV_AVX512_SKX 1 +# define CV_AVX_512VL 1 +# define CV_AVX_512BW 1 +# define CV_AVX_512DQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CNL +# define CV_AVX512_CNL 1 +# define CV_AVX_512IFMA 1 +# define CV_AVX_512VBMI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CLX +# define CV_AVX512_CLX 1 +# define CV_AVX_512VNNI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_ICL +# define CV_AVX512_ICL 1 +# undef CV_AVX_512IFMA +# define CV_AVX_512IFMA 1 +# undef CV_AVX_512VBMI +# define CV_AVX_512VBMI 1 +# undef CV_AVX_512VNNI +# define CV_AVX_512VNNI 1 +# define CV_AVX_512VBMI2 1 +# define CV_AVX_512BITALG 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif +#ifdef CV_CPU_COMPILE_FMA3 +# define CV_FMA3 1 +#endif + +#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#endif + +#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071) +# include +# define CV_RVV071 1 +#endif + +#if defined(__ARM_NEON__) || defined(__aarch64__) +# include +#endif + +#ifdef CV_CPU_COMPILE_VSX +# include +# undef vector +# undef pixel +# undef bool +# define CV_VSX 1 +#endif + +#ifdef CV_CPU_COMPILE_VSX3 +# define CV_VSX3 1 +#endif + +#ifdef CV_CPU_COMPILE_MSA +# include "hal/msa_macros.h" +# define CV_MSA 1 +#endif + +#ifdef __EMSCRIPTEN__ +# define CV_WASM_SIMD 1 +# include +#endif + +#if defined CV_CPU_COMPILE_RVV +# define CV_RVV 1 +# include +#endif + +#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ + +#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX +struct VZeroUpperGuard { +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline VZeroUpperGuard() { _mm256_zeroupper(); } +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline ~VZeroUpperGuard() { _mm256_zeroupper(); } +}; +#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard); +#endif + +#ifdef __CV_AVX_GUARD +#define CV_AVX_GUARD __CV_AVX_GUARD +#else +#define CV_AVX_GUARD +#endif + +#endif // __OPENCV_BUILD + + + +#if !defined __OPENCV_BUILD /* Compatibility code */ \ + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) +# include +# define CV_MMX 1 +# define CV_SSE 1 +# define CV_SSE2 1 +#elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER)) +# include +# include +# define CV_NEON 1 +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +# include +# define CV_NEON 1 +#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__) +# include +# undef vector +# undef pixel +# undef bool +# define CV_VSX 1 +#endif + +#ifdef __F16C__ +# include +# define CV_FP16 1 +#endif + +#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code) + + + +#ifndef CV_MMX +# define CV_MMX 0 +#endif +#ifndef CV_SSE +# define CV_SSE 0 +#endif +#ifndef CV_SSE2 +# define CV_SSE2 0 +#endif +#ifndef CV_SSE3 +# define CV_SSE3 0 +#endif +#ifndef CV_SSSE3 +# define CV_SSSE3 0 +#endif +#ifndef CV_SSE4_1 +# define CV_SSE4_1 0 +#endif +#ifndef CV_SSE4_2 +# define CV_SSE4_2 0 +#endif +#ifndef CV_POPCNT +# define CV_POPCNT 0 +#endif +#ifndef CV_AVX +# define CV_AVX 0 +#endif +#ifndef CV_FP16 +# define CV_FP16 0 +#endif +#ifndef CV_AVX2 +# define CV_AVX2 0 +#endif +#ifndef CV_FMA3 +# define CV_FMA3 0 +#endif +#ifndef CV_AVX_512F +# define CV_AVX_512F 0 +#endif +#ifndef CV_AVX_512BW +# define CV_AVX_512BW 0 +#endif +#ifndef CV_AVX_512CD +# define CV_AVX_512CD 0 +#endif +#ifndef CV_AVX_512DQ +# define CV_AVX_512DQ 0 +#endif +#ifndef CV_AVX_512ER +# define CV_AVX_512ER 0 +#endif +#ifndef CV_AVX_512IFMA +# define CV_AVX_512IFMA 0 +#endif +#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated +#ifndef CV_AVX_512PF +# define CV_AVX_512PF 0 +#endif +#ifndef CV_AVX_512VBMI +# define CV_AVX_512VBMI 0 +#endif +#ifndef CV_AVX_512VL +# define CV_AVX_512VL 0 +#endif +#ifndef CV_AVX_5124FMAPS +# define CV_AVX_5124FMAPS 0 +#endif +#ifndef CV_AVX_5124VNNIW +# define CV_AVX_5124VNNIW 0 +#endif +#ifndef CV_AVX_512VPOPCNTDQ +# define CV_AVX_512VPOPCNTDQ 0 +#endif +#ifndef CV_AVX_512VNNI +# define CV_AVX_512VNNI 0 +#endif +#ifndef CV_AVX_512VBMI2 +# define CV_AVX_512VBMI2 0 +#endif +#ifndef CV_AVX_512BITALG +# define CV_AVX_512BITALG 0 +#endif +#ifndef CV_AVX512_COMMON +# define CV_AVX512_COMMON 0 +#endif +#ifndef CV_AVX512_KNL +# define CV_AVX512_KNL 0 +#endif +#ifndef CV_AVX512_KNM +# define CV_AVX512_KNM 0 +#endif +#ifndef CV_AVX512_SKX +# define CV_AVX512_SKX 0 +#endif +#ifndef CV_AVX512_CNL +# define CV_AVX512_CNL 0 +#endif +#ifndef CV_AVX512_CLX +# define CV_AVX512_CLX 0 +#endif +#ifndef CV_AVX512_ICL +# define CV_AVX512_ICL 0 +#endif + +#ifndef CV_NEON +# define CV_NEON 0 +#endif + +#ifndef CV_RVV071 +# define CV_RVV071 0 +#endif + +#ifndef CV_VSX +# define CV_VSX 0 +#endif + +#ifndef CV_VSX3 +# define CV_VSX3 0 +#endif + +#ifndef CV_MSA +# define CV_MSA 0 +#endif + +#ifndef CV_WASM_SIMD +# define CV_WASM_SIMD 0 +#endif + +#ifndef CV_RVV +# define CV_RVV 0 +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cv_cpu_helper.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cv_cpu_helper.h new file mode 100644 index 0000000..39ae0b9 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cv_cpu_helper.h @@ -0,0 +1,508 @@ +// AUTOGENERATED, DO NOT EDIT + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE +# define CV_TRY_SSE 1 +# define CV_CPU_FORCE_SSE 1 +# define CV_CPU_HAS_SUPPORT_SSE 1 +# define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE +# define CV_TRY_SSE 1 +# define CV_CPU_FORCE_SSE 0 +# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE)) +# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args) +# define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args) +#else +# define CV_TRY_SSE 0 +# define CV_CPU_FORCE_SSE 0 +# define CV_CPU_HAS_SUPPORT_SSE 0 +# define CV_CPU_CALL_SSE(fn, args) +# define CV_CPU_CALL_SSE_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2 +# define CV_TRY_SSE2 1 +# define CV_CPU_FORCE_SSE2 1 +# define CV_CPU_HAS_SUPPORT_SSE2 1 +# define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2 +# define CV_TRY_SSE2 1 +# define CV_CPU_FORCE_SSE2 0 +# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2)) +# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args) +# define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args) +#else +# define CV_TRY_SSE2 0 +# define CV_CPU_FORCE_SSE2 0 +# define CV_CPU_HAS_SUPPORT_SSE2 0 +# define CV_CPU_CALL_SSE2(fn, args) +# define CV_CPU_CALL_SSE2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3 +# define CV_TRY_SSE3 1 +# define CV_CPU_FORCE_SSE3 1 +# define CV_CPU_HAS_SUPPORT_SSE3 1 +# define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3 +# define CV_TRY_SSE3 1 +# define CV_CPU_FORCE_SSE3 0 +# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3)) +# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args) +# define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args) +#else +# define CV_TRY_SSE3 0 +# define CV_CPU_FORCE_SSE3 0 +# define CV_CPU_HAS_SUPPORT_SSE3 0 +# define CV_CPU_CALL_SSE3(fn, args) +# define CV_CPU_CALL_SSE3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3 +# define CV_TRY_SSSE3 1 +# define CV_CPU_FORCE_SSSE3 1 +# define CV_CPU_HAS_SUPPORT_SSSE3 1 +# define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3 +# define CV_TRY_SSSE3 1 +# define CV_CPU_FORCE_SSSE3 0 +# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3)) +# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args) +# define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args) +#else +# define CV_TRY_SSSE3 0 +# define CV_CPU_FORCE_SSSE3 0 +# define CV_CPU_HAS_SUPPORT_SSSE3 0 +# define CV_CPU_CALL_SSSE3(fn, args) +# define CV_CPU_CALL_SSSE3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1 +# define CV_TRY_SSE4_1 1 +# define CV_CPU_FORCE_SSE4_1 1 +# define CV_CPU_HAS_SUPPORT_SSE4_1 1 +# define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1 +# define CV_TRY_SSE4_1 1 +# define CV_CPU_FORCE_SSE4_1 0 +# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1)) +# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args) +# define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args) +#else +# define CV_TRY_SSE4_1 0 +# define CV_CPU_FORCE_SSE4_1 0 +# define CV_CPU_HAS_SUPPORT_SSE4_1 0 +# define CV_CPU_CALL_SSE4_1(fn, args) +# define CV_CPU_CALL_SSE4_1_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2 +# define CV_TRY_SSE4_2 1 +# define CV_CPU_FORCE_SSE4_2 1 +# define CV_CPU_HAS_SUPPORT_SSE4_2 1 +# define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2 +# define CV_TRY_SSE4_2 1 +# define CV_CPU_FORCE_SSE4_2 0 +# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) +# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args) +# define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args) +#else +# define CV_TRY_SSE4_2 0 +# define CV_CPU_FORCE_SSE4_2 0 +# define CV_CPU_HAS_SUPPORT_SSE4_2 0 +# define CV_CPU_CALL_SSE4_2(fn, args) +# define CV_CPU_CALL_SSE4_2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT +# define CV_TRY_POPCNT 1 +# define CV_CPU_FORCE_POPCNT 1 +# define CV_CPU_HAS_SUPPORT_POPCNT 1 +# define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT +# define CV_TRY_POPCNT 1 +# define CV_CPU_FORCE_POPCNT 0 +# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT)) +# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args) +# define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args) +#else +# define CV_TRY_POPCNT 0 +# define CV_CPU_FORCE_POPCNT 0 +# define CV_CPU_HAS_SUPPORT_POPCNT 0 +# define CV_CPU_CALL_POPCNT(fn, args) +# define CV_CPU_CALL_POPCNT_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX +# define CV_TRY_AVX 1 +# define CV_CPU_FORCE_AVX 1 +# define CV_CPU_HAS_SUPPORT_AVX 1 +# define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX +# define CV_TRY_AVX 1 +# define CV_CPU_FORCE_AVX 0 +# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) +# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args) +# define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args) +#else +# define CV_TRY_AVX 0 +# define CV_CPU_FORCE_AVX 0 +# define CV_CPU_HAS_SUPPORT_AVX 0 +# define CV_CPU_CALL_AVX(fn, args) +# define CV_CPU_CALL_AVX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16 +# define CV_TRY_FP16 1 +# define CV_CPU_FORCE_FP16 1 +# define CV_CPU_HAS_SUPPORT_FP16 1 +# define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16 +# define CV_TRY_FP16 1 +# define CV_CPU_FORCE_FP16 0 +# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16)) +# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args) +# define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args) +#else +# define CV_TRY_FP16 0 +# define CV_CPU_FORCE_FP16 0 +# define CV_CPU_HAS_SUPPORT_FP16 0 +# define CV_CPU_CALL_FP16(fn, args) +# define CV_CPU_CALL_FP16_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2 +# define CV_TRY_AVX2 1 +# define CV_CPU_FORCE_AVX2 1 +# define CV_CPU_HAS_SUPPORT_AVX2 1 +# define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2 +# define CV_TRY_AVX2 1 +# define CV_CPU_FORCE_AVX2 0 +# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) +# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args) +# define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args) +#else +# define CV_TRY_AVX2 0 +# define CV_CPU_FORCE_AVX2 0 +# define CV_CPU_HAS_SUPPORT_AVX2 0 +# define CV_CPU_CALL_AVX2(fn, args) +# define CV_CPU_CALL_AVX2_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3 +# define CV_TRY_FMA3 1 +# define CV_CPU_FORCE_FMA3 1 +# define CV_CPU_HAS_SUPPORT_FMA3 1 +# define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3 +# define CV_TRY_FMA3 1 +# define CV_CPU_FORCE_FMA3 0 +# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3)) +# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args) +# define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args) +#else +# define CV_TRY_FMA3 0 +# define CV_CPU_FORCE_FMA3 0 +# define CV_CPU_HAS_SUPPORT_FMA3 0 +# define CV_CPU_CALL_FMA3(fn, args) +# define CV_CPU_CALL_FMA3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F +# define CV_TRY_AVX_512F 1 +# define CV_CPU_FORCE_AVX_512F 1 +# define CV_CPU_HAS_SUPPORT_AVX_512F 1 +# define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F +# define CV_TRY_AVX_512F 1 +# define CV_CPU_FORCE_AVX_512F 0 +# define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F)) +# define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args) +# define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args) +#else +# define CV_TRY_AVX_512F 0 +# define CV_CPU_FORCE_AVX_512F 0 +# define CV_CPU_HAS_SUPPORT_AVX_512F 0 +# define CV_CPU_CALL_AVX_512F(fn, args) +# define CV_CPU_CALL_AVX_512F_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 1 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON)) +# define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +#else +# define CV_TRY_AVX512_COMMON 0 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...) CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 1 +# define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL)) +# define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +#else +# define CV_TRY_AVX512_KNL 0 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 0 +# define CV_CPU_CALL_AVX512_KNL(fn, args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 1 +# define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM)) +# define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +#else +# define CV_TRY_AVX512_KNM 0 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 0 +# define CV_CPU_CALL_AVX512_KNM(fn, args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX +# define CV_TRY_AVX512_SKX 1 +# define CV_CPU_FORCE_AVX512_SKX 1 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX 1 +# define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX +# define CV_TRY_AVX512_SKX 1 +# define CV_CPU_FORCE_AVX512_SKX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX)) +# define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args) +#else +# define CV_TRY_AVX512_SKX 0 +# define CV_CPU_FORCE_AVX512_SKX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_SKX 0 +# define CV_CPU_CALL_AVX512_SKX(fn, args) +# define CV_CPU_CALL_AVX512_SKX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 1 +# define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL)) +# define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +#else +# define CV_TRY_AVX512_CNL 0 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 0 +# define CV_CPU_CALL_AVX512_CNL(fn, args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 1 +# define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX)) +# define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) +#else +# define CV_TRY_AVX512_CLX 0 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 0 +# define CV_CPU_CALL_AVX512_CLX(fn, args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...) CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 1 +# define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL)) +# define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +#else +# define CV_TRY_AVX512_ICL 0 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 0 +# define CV_CPU_CALL_AVX512_ICL(fn, args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...) CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON +# define CV_TRY_NEON 1 +# define CV_CPU_FORCE_NEON 1 +# define CV_CPU_HAS_SUPPORT_NEON 1 +# define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON +# define CV_TRY_NEON 1 +# define CV_CPU_FORCE_NEON 0 +# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON)) +# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args) +# define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args) +#else +# define CV_TRY_NEON 0 +# define CV_CPU_FORCE_NEON 0 +# define CV_CPU_HAS_SUPPORT_NEON 0 +# define CV_CPU_CALL_NEON(fn, args) +# define CV_CPU_CALL_NEON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA +# define CV_TRY_MSA 1 +# define CV_CPU_FORCE_MSA 1 +# define CV_CPU_HAS_SUPPORT_MSA 1 +# define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA +# define CV_TRY_MSA 1 +# define CV_CPU_FORCE_MSA 0 +# define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA)) +# define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args) +# define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args) +#else +# define CV_TRY_MSA 0 +# define CV_CPU_FORCE_MSA 0 +# define CV_CPU_HAS_SUPPORT_MSA 0 +# define CV_CPU_CALL_MSA(fn, args) +# define CV_CPU_CALL_MSA_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...) CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX +# define CV_TRY_VSX 1 +# define CV_CPU_FORCE_VSX 1 +# define CV_CPU_HAS_SUPPORT_VSX 1 +# define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX +# define CV_TRY_VSX 1 +# define CV_CPU_FORCE_VSX 0 +# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX)) +# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args) +# define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args) +#else +# define CV_TRY_VSX 0 +# define CV_CPU_FORCE_VSX 0 +# define CV_CPU_HAS_SUPPORT_VSX 0 +# define CV_CPU_CALL_VSX(fn, args) +# define CV_CPU_CALL_VSX_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3 +# define CV_TRY_VSX3 1 +# define CV_CPU_FORCE_VSX3 1 +# define CV_CPU_HAS_SUPPORT_VSX3 1 +# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3 +# define CV_TRY_VSX3 1 +# define CV_CPU_FORCE_VSX3 0 +# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3)) +# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args) +# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args) +#else +# define CV_TRY_VSX3 0 +# define CV_CPU_FORCE_VSX3 0 +# define CV_CPU_HAS_SUPPORT_VSX3 0 +# define CV_CPU_CALL_VSX3(fn, args) +# define CV_CPU_CALL_VSX3_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_RVV +# define CV_TRY_RVV 1 +# define CV_CPU_FORCE_RVV 1 +# define CV_CPU_HAS_SUPPORT_RVV 1 +# define CV_CPU_CALL_RVV(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_RVV_(fn, args) return (opt_RVV::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_RVV +# define CV_TRY_RVV 1 +# define CV_CPU_FORCE_RVV 0 +# define CV_CPU_HAS_SUPPORT_RVV (cv::checkHardwareSupport(CV_CPU_RVV)) +# define CV_CPU_CALL_RVV(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args) +# define CV_CPU_CALL_RVV_(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args) +#else +# define CV_TRY_RVV 0 +# define CV_CPU_FORCE_RVV 0 +# define CV_CPU_HAS_SUPPORT_RVV 0 +# define CV_CPU_CALL_RVV(fn, args) +# define CV_CPU_CALL_RVV_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_RVV(fn, args, mode, ...) CV_CPU_CALL_RVV(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args) +#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvdef.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvdef.h new file mode 100644 index 0000000..f785f32 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvdef.h @@ -0,0 +1,967 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CVDEF_H +#define OPENCV_CORE_CVDEF_H + +#include "opencv2/core/version.hpp" + +//! @addtogroup core_utils +//! @{ + +#ifdef OPENCV_INCLUDE_PORT_FILE // User-provided header file with custom platform configuration +#include OPENCV_INCLUDE_PORT_FILE +#endif + +#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD +#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \ + (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC)) +// Guard to prevent using of binary incompatible binaries / runtimes +// https://github.com/opencv/opencv/pull/9161 +#define CV__DEBUG_NS_BEGIN namespace debug_build_guard { +#define CV__DEBUG_NS_END } +namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; } +#endif +#endif + +#ifndef CV__DEBUG_NS_BEGIN +#define CV__DEBUG_NS_BEGIN +#define CV__DEBUG_NS_END +#endif + + +#ifdef __OPENCV_BUILD +#include "cvconfig.h" +#endif + +#ifndef __CV_EXPAND +#define __CV_EXPAND(x) x +#endif + +#ifndef __CV_CAT +#define __CV_CAT__(x, y) x ## y +#define __CV_CAT_(x, y) __CV_CAT__(x, y) +#define __CV_CAT(x, y) __CV_CAT_(x, y) +#endif + +#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N +#define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) + +#ifdef CV_Func +// keep current value (through OpenCV port file) +#elif defined __GNUC__ || (defined (__cpluscplus) && (__cpluscplus >= 201103)) +#define CV_Func __func__ +#elif defined __clang__ && (__clang_minor__ * 100 + __clang_major__ >= 305) +#define CV_Func __func__ +#elif defined(__STDC_VERSION__) && (__STDC_VERSION >= 199901) +#define CV_Func __func__ +#elif defined _MSC_VER +#define CV_Func __FUNCTION__ +#elif defined(__INTEL_COMPILER) && (_INTEL_COMPILER >= 600) +#define CV_Func __FUNCTION__ +#elif defined __IBMCPP__ && __IBMCPP__ >=500 +#define CV_Func __FUNCTION__ +#elif defined __BORLAND__ && (__BORLANDC__ >= 0x550) +#define CV_Func __FUNC__ +#else +#define CV_Func "" +#endif + +//! @cond IGNORED + +//////////////// static assert ///////////////// +#define CVAUX_CONCAT_EXP(a, b) a##b +#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b) + +#if defined(__clang__) +# ifndef __has_extension +# define __has_extension __has_feature /* compatibility, for older versions of clang */ +# endif +# if __has_extension(cxx_static_assert) +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# elif __has_extension(c_static_assert) +# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition) +# endif +#elif defined(__GNUC__) +# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# endif +#elif defined(_MSC_VER) +# if _MSC_VER >= 1600 /* MSVC 10 */ +# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition) +# endif +#endif +#ifndef CV_StaticAssert +# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302) +# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); }) +# else +namespace cv { + template struct CV_StaticAssert_failed; + template <> struct CV_StaticAssert_failed { enum { val = 1 }; }; + template struct CV_StaticAssert_test {}; +} +# define CV_StaticAssert(condition, reason)\ + typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__) +# endif +#endif + +// Suppress warning "-Wdeprecated-declarations" / C4996 +#if defined(_MSC_VER) + #define CV_DO_PRAGMA(x) __pragma(x) +#elif defined(__GNUC__) + #define CV_DO_PRAGMA(x) _Pragma (#x) +#else + #define CV_DO_PRAGMA(x) +#endif + +#ifdef _MSC_VER +#define CV_SUPPRESS_DEPRECATED_START \ + CV_DO_PRAGMA(warning(push)) \ + CV_DO_PRAGMA(warning(disable: 4996)) +#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop)) +#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405)) +#define CV_SUPPRESS_DEPRECATED_START \ + CV_DO_PRAGMA(GCC diagnostic push) \ + CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") +#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop) +#else +#define CV_SUPPRESS_DEPRECATED_START +#define CV_SUPPRESS_DEPRECATED_END +#endif + +#define CV_UNUSED(name) (void)name + +//! @endcond + +// undef problematic defines sometimes defined by system headers (windows.h in particular) +#undef small +#undef min +#undef max +#undef abs +#undef Complex + +#if defined __cplusplus +#include +#else +#include +#endif + +#include "opencv2/core/hal/interface.h" + +#if defined __ICL +# define CV_ICC __ICL +#elif defined __ICC +# define CV_ICC __ICC +#elif defined __ECL +# define CV_ICC __ECL +#elif defined __ECC +# define CV_ICC __ECC +#elif defined __INTEL_COMPILER +# define CV_ICC __INTEL_COMPILER +#endif + +#ifndef CV_INLINE +# if defined __cplusplus +# define CV_INLINE static inline +# elif defined _MSC_VER +# define CV_INLINE __inline +# else +# define CV_INLINE static +# endif +#endif + +#ifndef CV_ALWAYS_INLINE +#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define CV_ALWAYS_INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define CV_ALWAYS_INLINE __forceinline +#else +#define CV_ALWAYS_INLINE inline +#endif +#endif + +#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED) +# define CV_ENABLE_UNROLLED 0 +#else +# define CV_ENABLE_UNROLLED 1 +#endif + +#ifdef __GNUC__ +# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x))) +#elif defined _MSC_VER +# define CV_DECL_ALIGNED(x) __declspec(align(x)) +#else +# define CV_DECL_ALIGNED(x) +#endif + +/* CPU features and intrinsics support */ +#define CV_CPU_NONE 0 +#define CV_CPU_MMX 1 +#define CV_CPU_SSE 2 +#define CV_CPU_SSE2 3 +#define CV_CPU_SSE3 4 +#define CV_CPU_SSSE3 5 +#define CV_CPU_SSE4_1 6 +#define CV_CPU_SSE4_2 7 +#define CV_CPU_POPCNT 8 +#define CV_CPU_FP16 9 +#define CV_CPU_AVX 10 +#define CV_CPU_AVX2 11 +#define CV_CPU_FMA3 12 + +#define CV_CPU_AVX_512F 13 +#define CV_CPU_AVX_512BW 14 +#define CV_CPU_AVX_512CD 15 +#define CV_CPU_AVX_512DQ 16 +#define CV_CPU_AVX_512ER 17 +#define CV_CPU_AVX_512IFMA512 18 // deprecated +#define CV_CPU_AVX_512IFMA 18 +#define CV_CPU_AVX_512PF 19 +#define CV_CPU_AVX_512VBMI 20 +#define CV_CPU_AVX_512VL 21 +#define CV_CPU_AVX_512VBMI2 22 +#define CV_CPU_AVX_512VNNI 23 +#define CV_CPU_AVX_512BITALG 24 +#define CV_CPU_AVX_512VPOPCNTDQ 25 +#define CV_CPU_AVX_5124VNNIW 26 +#define CV_CPU_AVX_5124FMAPS 27 + +#define CV_CPU_NEON 100 + +#define CV_CPU_MSA 150 + +#define CV_CPU_RISCVV 170 + +#define CV_CPU_VSX 200 +#define CV_CPU_VSX3 201 + +#define CV_CPU_RVV 210 + +// CPU features groups +#define CV_CPU_AVX512_SKX 256 +#define CV_CPU_AVX512_COMMON 257 +#define CV_CPU_AVX512_KNL 258 +#define CV_CPU_AVX512_KNM 259 +#define CV_CPU_AVX512_CNL 260 +#define CV_CPU_AVX512_CLX 261 +#define CV_CPU_AVX512_ICL 262 + +// when adding to this list remember to update the following enum +#define CV_HARDWARE_MAX_FEATURE 512 + +/** @brief Available CPU features. +*/ +enum CpuFeatures { + CPU_MMX = 1, + CPU_SSE = 2, + CPU_SSE2 = 3, + CPU_SSE3 = 4, + CPU_SSSE3 = 5, + CPU_SSE4_1 = 6, + CPU_SSE4_2 = 7, + CPU_POPCNT = 8, + CPU_FP16 = 9, + CPU_AVX = 10, + CPU_AVX2 = 11, + CPU_FMA3 = 12, + + CPU_AVX_512F = 13, + CPU_AVX_512BW = 14, + CPU_AVX_512CD = 15, + CPU_AVX_512DQ = 16, + CPU_AVX_512ER = 17, + CPU_AVX_512IFMA512 = 18, // deprecated + CPU_AVX_512IFMA = 18, + CPU_AVX_512PF = 19, + CPU_AVX_512VBMI = 20, + CPU_AVX_512VL = 21, + CPU_AVX_512VBMI2 = 22, + CPU_AVX_512VNNI = 23, + CPU_AVX_512BITALG = 24, + CPU_AVX_512VPOPCNTDQ= 25, + CPU_AVX_5124VNNIW = 26, + CPU_AVX_5124FMAPS = 27, + + CPU_NEON = 100, + + CPU_MSA = 150, + + CPU_RISCVV = 170, + + CPU_VSX = 200, + CPU_VSX3 = 201, + + CPU_RVV = 210, + + CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL + CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512 + CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF + CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ + CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI + CPU_AVX512_CLX = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI + CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ + + CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE +}; + + +#include "cv_cpu_dispatch.h" + +#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64)) +// int*, int64* should be propertly aligned pointers on ARMv7 +#define CV_STRONG_ALIGNMENT 1 +#endif +#if !defined(CV_STRONG_ALIGNMENT) +#define CV_STRONG_ALIGNMENT 0 +#endif + +/* fundamental constants */ +#define CV_PI 3.1415926535897932384626433832795 +#define CV_2PI 6.283185307179586476925286766559 +#define CV_LOG2 0.69314718055994530941723212145818 + +#if defined __ARM_FP16_FORMAT_IEEE \ + && !defined __CUDACC__ +# define CV_FP16_TYPE 1 +#else +# define CV_FP16_TYPE 0 +#endif + +typedef union Cv16suf +{ + short i; + ushort u; +#if CV_FP16_TYPE + __fp16 h; +#endif +} +Cv16suf; + +typedef union Cv32suf +{ + int i; + unsigned u; + float f; +} +Cv32suf; + +typedef union Cv64suf +{ + int64 i; + uint64 u; + double f; +} +Cv64suf; + +#ifndef OPENCV_ABI_COMPATIBILITY +#define OPENCV_ABI_COMPATIBILITY 400 +#endif + +#ifdef __OPENCV_BUILD +# define DISABLE_OPENCV_3_COMPATIBILITY +# define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY +#endif + +#ifndef CV_EXPORTS +# if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS) +# define CV_EXPORTS __declspec(dllexport) +# elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__)) +# define CV_EXPORTS __attribute__ ((visibility ("default"))) +# endif +#endif + +#ifndef CV_EXPORTS +# define CV_EXPORTS +#endif + +#ifdef _MSC_VER +# define CV_EXPORTS_TEMPLATE +#else +# define CV_EXPORTS_TEMPLATE CV_EXPORTS +#endif + +#ifndef CV_DEPRECATED +# if defined(__GNUC__) +# define CV_DEPRECATED __attribute__ ((deprecated)) +# elif defined(_MSC_VER) +# define CV_DEPRECATED __declspec(deprecated) +# else +# define CV_DEPRECATED +# endif +#endif + +#ifndef CV_DEPRECATED_EXTERNAL +# if defined(__OPENCV_BUILD) +# define CV_DEPRECATED_EXTERNAL /* nothing */ +# else +# define CV_DEPRECATED_EXTERNAL CV_DEPRECATED +# endif +#endif + + +#ifndef CV_EXTERN_C +# ifdef __cplusplus +# define CV_EXTERN_C extern "C" +# else +# define CV_EXTERN_C +# endif +#endif + +/* special informative macros for wrapper generators */ +#define CV_EXPORTS_W CV_EXPORTS +#define CV_EXPORTS_W_SIMPLE CV_EXPORTS +#define CV_EXPORTS_AS(synonym) CV_EXPORTS +#define CV_EXPORTS_W_MAP CV_EXPORTS +#define CV_IN_OUT +#define CV_OUT +#define CV_PROP +#define CV_PROP_RW +#define CV_WRAP +#define CV_WRAP_AS(synonym) +#define CV_WRAP_MAPPABLE(mappable) +#define CV_WRAP_PHANTOM(phantom_header) +#define CV_WRAP_DEFAULT(val) + +/****************************************************************************************\ +* Matrix type (Mat) * +\****************************************************************************************/ + +#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT) +#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1) +#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1) +#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK) +#define CV_MAT_CONT_FLAG_SHIFT 14 +#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT) +#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG) +#define CV_IS_CONT_MAT CV_IS_MAT_CONT +#define CV_SUBMAT_FLAG_SHIFT 15 +#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT) +#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG) + +/** Size of each channel item, + 0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */ +#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15) + +#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type)) + +#ifndef MIN +# define MIN(a,b) ((a) > (b) ? (b) : (a)) +#endif + +#ifndef MAX +# define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + +///////////////////////////////////////// Enum operators /////////////////////////////////////// + +/** + +Provides compatibility operators for both classical and C++11 enum classes, +as well as exposing the C++11 enum class members for backwards compatibility + +@code + // Provides operators required for flag enums + CV_ENUM_FLAGS(AccessFlag) + + // Exposes the listed members of the enum class AccessFlag to the current namespace + CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]); +@endcode +*/ + +#define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST) \ +static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST; \ + +#define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...) \ +__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \ +__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__)); \ + +#define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType) \ +static inline bool operator!(const EnumType& val) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return !static_cast(val); \ +} \ + +#define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type) \ +static inline bool operator!=(const Arg1Type& a, const Arg2Type& b) \ +{ \ + return static_cast(a) != static_cast(b); \ +} \ + +#define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type) \ +static inline bool operator==(const Arg1Type& a, const Arg2Type& b) \ +{ \ + return static_cast(a) == static_cast(b); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType) \ +static inline EnumType operator~(const EnumType& val) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(~static_cast(val)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) | static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) & static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type) \ +static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b) \ +{ \ + typedef std::underlying_type::type UnderlyingType; \ + return static_cast(static_cast(a) ^ static_cast(b)); \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) | static_cast(val)); \ + return _this; \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) & static_cast(val)); \ + return _this; \ +} \ + +#define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type) \ +static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val) \ +{ \ + _this = static_cast(static_cast(_this) ^ static_cast(val)); \ + return _this; \ +} \ + +#define CV_ENUM_CLASS_EXPOSE(EnumType, ...) \ +__CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \ + +#define CV_ENUM_FLAGS(EnumType) \ +__CV_ENUM_FLAGS_LOGICAL_NOT (EnumType) \ +__CV_ENUM_FLAGS_LOGICAL_EQ (EnumType, int) \ +__CV_ENUM_FLAGS_LOGICAL_NOT_EQ (EnumType, int) \ + \ +__CV_ENUM_FLAGS_BITWISE_NOT (EnumType) \ +__CV_ENUM_FLAGS_BITWISE_OR (EnumType, EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_AND (EnumType, EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_XOR (EnumType, EnumType, EnumType) \ + \ +__CV_ENUM_FLAGS_BITWISE_OR_EQ (EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_AND_EQ (EnumType, EnumType) \ +__CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType) \ + +/****************************************************************************************\ +* static analysys * +\****************************************************************************************/ + +// In practice, some macro are not processed correctly (noreturn is not detected). +// We need to use simplified definition for them. +#ifndef CV_STATIC_ANALYSIS +# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__) +# define CV_STATIC_ANALYSIS 1 +# endif +#else +# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1) // defined and not empty +# if 0 == CV_STATIC_ANALYSIS +# undef CV_STATIC_ANALYSIS +# endif +# endif +#endif + +/****************************************************************************************\ +* Thread sanitizer * +\****************************************************************************************/ +#ifndef CV_THREAD_SANITIZER +# if defined(__has_feature) +# if __has_feature(thread_sanitizer) +# define CV_THREAD_SANITIZER +# endif +# endif +#endif + +/****************************************************************************************\ +* exchange-add operation for atomic operations on reference counters * +\****************************************************************************************/ + +#ifdef CV_XADD + // allow to use user-defined macro +#elif defined __GNUC__ || defined __clang__ +# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) && !defined __INTEL_COMPILER +# ifdef __ATOMIC_ACQ_REL +# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) +# else +# define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) +# endif +# else +# if defined __ATOMIC_ACQ_REL && !defined __clang__ + // version for gcc >= 4.7 +# define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) +# else +# define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) +# endif +# endif +#elif defined _MSC_VER && !defined RC_INVOKED +# include +# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) +#else + #ifdef OPENCV_FORCE_UNSAFE_XADD + CV_INLINE int CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; } + #else + #error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)" + #endif +#endif + + +/****************************************************************************************\ +* CV_NORETURN attribute * +\****************************************************************************************/ + +#ifndef CV_NORETURN +# if defined(__GNUC__) +# define CV_NORETURN __attribute__((__noreturn__)) +# elif defined(_MSC_VER) && (_MSC_VER >= 1300) +# define CV_NORETURN __declspec(noreturn) +# else +# define CV_NORETURN /* nothing by default */ +# endif +#endif + +/****************************************************************************************\ +* CV_NODISCARD_STD attribute (C++17) * +* encourages the compiler to issue a warning if the return value is discarded * +\****************************************************************************************/ +#ifndef CV_NODISCARD_STD +# ifndef __has_cpp_attribute +// workaround preprocessor non-compliance https://reviews.llvm.org/D57851 +# define __has_cpp_attribute(__x) 0 +# endif +# if __has_cpp_attribute(nodiscard) +# define CV_NODISCARD_STD [[nodiscard]] +# elif __cplusplus >= 201703L +// available when compiler is C++17 compliant +# define CV_NODISCARD_STD [[nodiscard]] +# elif defined(__INTEL_COMPILER) + // see above, available when C++17 is enabled +# elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L +// available with VS2017 v15.3+ with /std:c++17 or higher; works on functions and classes +# define CV_NODISCARD_STD [[nodiscard]] +# elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 700) && (__cplusplus >= 201103L) +// available with GCC 7.0+; works on functions, works or silently fails on classes +# define CV_NODISCARD_STD [[nodiscard]] +# elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 408) && (__cplusplus >= 201103L) +// available with GCC 4.8+ but it usually does nothing and can fail noisily -- therefore not used +// define CV_NODISCARD_STD [[gnu::warn_unused_result]] +# endif +#endif +#ifndef CV_NODISCARD_STD +# define CV_NODISCARD_STD /* nothing by default */ +#endif + + +/****************************************************************************************\ +* CV_NODISCARD attribute (deprecated, GCC only) * +* DONT USE: use instead the standard CV_NODISCARD_STD macro above * +* this legacy method silently fails to issue warning until some version * +* after gcc 6.3.0. Yet with gcc 7+ you can use the above standard method * +* which makes this method useless. Don't use it. * +* @deprecated use instead CV_NODISCARD_STD * +\****************************************************************************************/ +#ifndef CV_NODISCARD +# if defined(__GNUC__) +# define CV_NODISCARD __attribute__((__warn_unused_result__)) +# elif defined(__clang__) && defined(__has_attribute) +# if __has_attribute(__warn_unused_result__) +# define CV_NODISCARD __attribute__((__warn_unused_result__)) +# endif +# endif +#endif +#ifndef CV_NODISCARD +# define CV_NODISCARD /* nothing by default */ +#endif + + +/****************************************************************************************\ +* C++ 11 * +\****************************************************************************************/ +#ifndef CV_CXX11 +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800) +# define CV_CXX11 1 +# endif +#else +# if CV_CXX11 == 0 +# undef CV_CXX11 +# endif +#endif +#ifndef CV_CXX11 +# error "OpenCV 4.x+ requires enabled C++11 support" +#endif + +#define CV_CXX_MOVE_SEMANTICS 1 +#define CV_CXX_MOVE(x) std::move(x) +#define CV_CXX_STD_ARRAY 1 +#include +#ifndef CV_OVERRIDE +# define CV_OVERRIDE override +#endif +#ifndef CV_FINAL +# define CV_FINAL final +#endif + +#ifndef CV_NOEXCEPT +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) +# define CV_NOEXCEPT noexcept +# endif +#endif +#ifndef CV_NOEXCEPT +# define CV_NOEXCEPT +#endif + +#ifndef CV_CONSTEXPR +# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) +# define CV_CONSTEXPR constexpr +# endif +#endif +#ifndef CV_CONSTEXPR +# define CV_CONSTEXPR +#endif + +// Integer types portatibility +#ifdef OPENCV_STDINT_HEADER +#include OPENCV_STDINT_HEADER +#elif defined(__cplusplus) +#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */ +namespace cv { +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; +} +#elif defined(_MSC_VER) || __cplusplus >= 201103L +#include +namespace cv { +using std::int8_t; +using std::uint8_t; +using std::int16_t; +using std::uint16_t; +using std::int32_t; +using std::uint32_t; +using std::int64_t; +using std::uint64_t; +} +#else +#include +namespace cv { +typedef ::int8_t int8_t; +typedef ::uint8_t uint8_t; +typedef ::int16_t int16_t; +typedef ::uint16_t uint16_t; +typedef ::int32_t int32_t; +typedef ::uint32_t uint32_t; +typedef ::int64_t int64_t; +typedef ::uint64_t uint64_t; +} +#endif +#else // pure C +#include +#endif + +#ifdef __cplusplus +namespace cv +{ + +class float16_t +{ +public: +#if CV_FP16_TYPE + + float16_t() : h(0) {} + explicit float16_t(float x) { h = (__fp16)x; } + operator float() const { return (float)h; } + static float16_t fromBits(ushort w) + { + Cv16suf u; + u.u = w; + float16_t result; + result.h = u.h; + return result; + } + static float16_t zero() + { + float16_t result; + result.h = (__fp16)0; + return result; + } + ushort bits() const + { + Cv16suf u; + u.h = h; + return u.u; + } +protected: + __fp16 h; + +#else + float16_t() : w(0) {} + explicit float16_t(float x) + { + #if CV_FP16 + __m128 v = _mm_load_ss(&x); + w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0)); + #else + Cv32suf in; + in.f = x; + unsigned sign = in.u & 0x80000000; + in.u ^= sign; + + if( in.u >= 0x47800000 ) + w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00); + else + { + if (in.u < 0x38800000) + { + in.f += 0.5f; + w = (ushort)(in.u - 0x3f000000); + } + else + { + unsigned t = in.u + 0xc8000fff; + w = (ushort)((t + ((in.u >> 13) & 1)) >> 13); + } + } + + w = (ushort)(w | (sign >> 16)); + #endif + } + + operator float() const + { + #if CV_FP16 + float f; + _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w))); + return f; + #else + Cv32suf out; + + unsigned t = ((w & 0x7fff) << 13) + 0x38000000; + unsigned sign = (w & 0x8000) << 16; + unsigned e = w & 0x7c00; + + out.u = t + (1 << 23); + out.u = (e >= 0x7c00 ? t + 0x38000000 : + e == 0 ? (static_cast(out.f -= 6.103515625e-05f), out.u) : t) | sign; + return out.f; + #endif + } + + static float16_t fromBits(ushort b) + { + float16_t result; + result.w = b; + return result; + } + static float16_t zero() + { + float16_t result; + result.w = (ushort)0; + return result; + } + ushort bits() const { return w; } +protected: + ushort w; + +#endif +}; + +} +#endif + +//! @} + +#ifndef __cplusplus +#include "opencv2/core/fast_math.hpp" // define cvRound(double) +#endif + +#endif // OPENCV_CORE_CVDEF_H diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd.hpp new file mode 100644 index 0000000..6ce9e4b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd.hpp @@ -0,0 +1,190 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CVSTD_HPP +#define OPENCV_CORE_CVSTD_HPP + +#ifndef __cplusplus +# error cvstd.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" +#include +#include +#include + +#include + +// import useful primitives from stl +# include +# include +# include //for abs(int) +# include + +namespace cv +{ + static inline uchar abs(uchar a) { return a; } + static inline ushort abs(ushort a) { return a; } + static inline unsigned abs(unsigned a) { return a; } + static inline uint64 abs(uint64 a) { return a; } + + using std::min; + using std::max; + using std::abs; + using std::swap; + using std::sqrt; + using std::exp; + using std::pow; + using std::log; +} + +#include "cvstd_wrapper.hpp" + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +//////////////////////////// memory management functions //////////////////////////// + +/** @brief Allocates an aligned memory buffer. + +The function allocates the buffer of the specified size and returns it. When the buffer size is 16 +bytes or more, the returned buffer is aligned to 16 bytes. +@param bufSize Allocated buffer size. + */ +CV_EXPORTS void* fastMalloc(size_t bufSize); + +/** @brief Deallocates a memory buffer. + +The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the +function does nothing. C version of the function clears the pointer *pptr* to avoid problems with +double memory deallocation. +@param ptr Pointer to the allocated buffer. + */ +CV_EXPORTS void fastFree(void* ptr); + +/*! + The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree() +*/ +template class Allocator +{ +public: + typedef _Tp value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + template class rebind { typedef Allocator other; }; + + explicit Allocator() {} + ~Allocator() {} + explicit Allocator(Allocator const&) {} + template + explicit Allocator(Allocator const&) {} + + // address + pointer address(reference r) { return &r; } + const_pointer address(const_reference r) { return &r; } + + pointer allocate(size_type count, const void* =0) { return reinterpret_cast(fastMalloc(count * sizeof (_Tp))); } + void deallocate(pointer p, size_type) { fastFree(p); } + + void construct(pointer p, const _Tp& v) { new(static_cast(p)) _Tp(v); } + void destroy(pointer p) { p->~_Tp(); } + + size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); } +}; + +//! @} core_utils + +//! @endcond + +//! @addtogroup core_basic +//! @{ + +//////////////////////////////// string class //////////////////////////////// + +class CV_EXPORTS FileNode; //for string constructor from FileNode + +typedef std::string String; + +#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS + +//! @cond IGNORED +namespace details { +// std::tolower is int->int +static inline char char_tolower(char ch) +{ + return (char)std::tolower((int)ch); +} +// std::toupper is int->int +static inline char char_toupper(char ch) +{ + return (char)std::toupper((int)ch); +} +} // namespace details +//! @endcond + +static inline std::string toLowerCase(const std::string& str) +{ + std::string result(str); + std::transform(result.begin(), result.end(), result.begin(), details::char_tolower); + return result; +} + +static inline std::string toUpperCase(const std::string& str) +{ + std::string result(str); + std::transform(result.begin(), result.end(), result.begin(), details::char_toupper); + return result; +} + +#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS + +//! @} core_basic +} // cv + +#endif //OPENCV_CORE_CVSTD_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd.inl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd.inl.hpp new file mode 100644 index 0000000..37ad1e6 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd.inl.hpp @@ -0,0 +1,197 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_CVSTDINL_HPP +#define OPENCV_CORE_CVSTDINL_HPP + +#include +#include +#include + +//! @cond IGNORED + +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + +namespace cv +{ + +template class DataType< std::complex<_Tp> > +{ +public: + typedef std::complex<_Tp> value_type; + typedef value_type work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + depth = DataType::depth, + channels = 2, + fmt = DataType::fmt + ((channels - 1) << 8), + type = CV_MAKETYPE(depth, channels) }; + + typedef Vec vec_type; +}; + +static inline +std::ostream& operator << (std::ostream& out, Ptr fmtd) +{ + fmtd->reset(); + for(const char* str = fmtd->next(); str; str = fmtd->next()) + out << str; + return out; +} + +static inline +std::ostream& operator << (std::ostream& out, const Mat& mtx) +{ + return out << Formatter::get()->format(mtx); +} + +static inline +std::ostream& operator << (std::ostream& out, const UMat& m) +{ + return out << m.getMat(ACCESS_READ); +} + +template static inline +std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c) +{ + return out << "(" << c.re << "," << c.im << ")"; +} + +template static inline +std::ostream& operator << (std::ostream& out, const std::vector >& vec) +{ + return out << Formatter::get()->format(Mat(vec)); +} + + +template static inline +std::ostream& operator << (std::ostream& out, const std::vector >& vec) +{ + return out << Formatter::get()->format(Mat(vec)); +} + + +template static inline +std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx) +{ + return out << Formatter::get()->format(Mat(matx)); +} + +template static inline +std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p) +{ + out << "[" << p.x << ", " << p.y << "]"; + return out; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p) +{ + out << "[" << p.x << ", " << p.y << ", " << p.z << "]"; + return out; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec) +{ + out << "["; + if (cv::traits::Depth<_Tp>::value <= CV_32S) + { + for (int i = 0; i < n - 1; ++i) { + out << (int)vec[i] << ", "; + } + out << (int)vec[n-1] << "]"; + } + else + { + for (int i = 0; i < n - 1; ++i) { + out << vec[i] << ", "; + } + out << vec[n-1] << "]"; + } + + return out; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size) +{ + return out << "[" << size.width << " x " << size.height << "]"; +} + +template static inline +std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect) +{ + return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]"; +} + +static inline std::ostream& operator << (std::ostream& out, const MatSize& msize) +{ + int i, dims = msize.dims(); + for( i = 0; i < dims; i++ ) + { + out << msize[i]; + if( i < dims-1 ) + out << " x "; + } + return out; +} + +static inline std::ostream &operator<< (std::ostream &s, cv::Range &r) +{ + return s << "[" << r.start << " : " << r.end << ")"; +} + +} // cv + +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + +//! @endcond + +#endif // OPENCV_CORE_CVSTDINL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd_wrapper.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd_wrapper.hpp new file mode 100644 index 0000000..25e0041 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/cvstd_wrapper.hpp @@ -0,0 +1,154 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP +#define OPENCV_CORE_CVSTD_WRAPPER_HPP + +#include "opencv2/core/cvdef.h" + +#include +#include // std::shared_ptr +#include // std::enable_if + +namespace cv { + +using std::nullptr_t; + +//! @addtogroup core_basic +//! @{ + +#ifdef CV_DOXYGEN + +template using Ptr = std::shared_ptr<_Tp>; // In ideal world it should look like this, but we need some compatibility workarounds below + +template static inline +Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); } + +#else // cv::Ptr with compatibility workarounds + +// It should be defined for C-API types only. +// C++ types should use regular "delete" operator. +template struct DefaultDeleter; +#if 0 +{ + void operator()(Y* p) const; +}; +#endif + +namespace sfinae { +template +struct has_parenthesis_operator +{ +private: + template + static CV_CONSTEXPR std::true_type has_parenthesis_operator_check(typename std::is_same().operator()(std::declval()...))>::type, Ret>::type*); + + template static CV_CONSTEXPR std::false_type has_parenthesis_operator_check(...); + + typedef decltype(has_parenthesis_operator_check(0)) type; + +public: +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/) + static CV_CONSTEXPR bool value = type::value; +#else + // support MSVS 2013 + static const int value = type::value; +#endif +}; +} // namespace sfinae + +template +struct has_custom_delete + : public std::false_type {}; + +// Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files +#ifndef __CUDACC__ +template +struct has_custom_delete, void, T*>::value >::type > + : public std::true_type {}; +#endif + +template +struct Ptr : public std::shared_ptr +{ +#if 0 + using std::shared_ptr::shared_ptr; // GCC 5.x can't handle this +#else + inline Ptr() CV_NOEXCEPT : std::shared_ptr() {} + inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr(nullptr) {} + template inline Ptr(Y* p, D d) : std::shared_ptr(p, d) {} + template inline Ptr(nullptr_t, D d) : std::shared_ptr(nullptr, d) {} + + template inline Ptr(const Ptr& r, T* ptr) CV_NOEXCEPT : std::shared_ptr(r, ptr) {} + + inline Ptr(const Ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + inline Ptr(Ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} + + template inline Ptr(const Ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + template inline Ptr(Ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} +#endif + inline Ptr(const std::shared_ptr& o) CV_NOEXCEPT : std::shared_ptr(o) {} + inline Ptr(std::shared_ptr&& o) CV_NOEXCEPT : std::shared_ptr(std::move(o)) {} + + // Overload with custom DefaultDeleter: Ptr(...) + template + inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr(ptr, DefaultDeleter()) {} + + // Overload without custom deleter: Ptr(...); + template + inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr(ptr) {} + + template + inline Ptr(Y* ptr) : Ptr(has_custom_delete(), ptr) {} + + // Overload with custom DefaultDeleter: Ptr(...) + template + inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr::reset(ptr, DefaultDeleter()); } + + // Overload without custom deleter: Ptr(...); + template + inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr::reset(ptr); } + + template + inline void reset(Y* ptr) { Ptr::reset(has_custom_delete(), ptr); } + + template + void reset(Y* ptr, Deleter d) { std::shared_ptr::reset(ptr, d); } + + void reset() CV_NOEXCEPT { std::shared_ptr::reset(); } + + Ptr& operator=(const Ptr& o) { std::shared_ptr::operator =(o); return *this; } + template inline Ptr& operator=(const Ptr& o) { std::shared_ptr::operator =(o); return *this; } + + T* operator->() const CV_NOEXCEPT { return std::shared_ptr::get();} + typename std::add_lvalue_reference::type operator*() const CV_NOEXCEPT { return *std::shared_ptr::get(); } + + // OpenCV 3.x methods (not a part of standard C++ library) + inline void release() { std::shared_ptr::reset(); } + inline operator T* () const { return std::shared_ptr::get(); } + inline bool empty() const { return std::shared_ptr::get() == nullptr; } + + template inline + Ptr staticCast() const CV_NOEXCEPT { return std::static_pointer_cast(*this); } + + template inline + Ptr constCast() const CV_NOEXCEPT { return std::const_pointer_cast(*this); } + + template inline + Ptr dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast(*this); } +}; + +template static inline +Ptr<_Tp> makePtr(const A1&... a1) +{ + static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter"); + return (Ptr<_Tp>)std::make_shared<_Tp>(a1...); +} + +#endif // CV_DOXYGEN + +//! @} core_basic +} // cv + +#endif //OPENCV_CORE_CVSTD_WRAPPER_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/async_promise.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/async_promise.hpp new file mode 100644 index 0000000..6eb3fb5 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/async_promise.hpp @@ -0,0 +1,71 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ASYNC_PROMISE_HPP +#define OPENCV_CORE_ASYNC_PROMISE_HPP + +#include "../async.hpp" + +#include "exception_ptr.hpp" + +namespace cv { + +/** @addtogroup core_async +@{ +*/ + + +/** @brief Provides result of asynchronous operations + +*/ +class CV_EXPORTS AsyncPromise +{ +public: + ~AsyncPromise() CV_NOEXCEPT; + AsyncPromise() CV_NOEXCEPT; + explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT; + AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT; + void release() CV_NOEXCEPT; + + /** Returns associated AsyncArray + @note Can be called once + */ + AsyncArray getArrayResult(); + + /** Stores asynchronous result. + @param[in] value result + */ + void setValue(InputArray value); + + // TODO "move" setters + +#if CV__EXCEPTION_PTR + /** Stores exception. + @param[in] exception exception to be raised in AsyncArray + */ + void setException(std::exception_ptr exception); +#endif + + /** Stores exception. + @param[in] exception exception to be raised in AsyncArray + */ + void setException(const cv::Exception& exception); + +#ifdef CV_CXX11 + explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; } + AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; } +#endif + + + // PImpl + typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl; + inline void* _getImpl() const CV_NOEXCEPT { return p; } +protected: + Impl* p; +}; + + +//! @} +} // namespace +#endif // OPENCV_CORE_ASYNC_PROMISE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/dispatch_helper.impl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/dispatch_helper.impl.hpp new file mode 100644 index 0000000..d6ec676 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/dispatch_helper.impl.hpp @@ -0,0 +1,49 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP +#define OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP + +//! @cond IGNORED + +namespace cv { +namespace detail { + +template class Functor, typename... Args> +static inline void depthDispatch(const int depth, Args&&... args) +{ + switch (depth) + { + case CV_8U: + Functor{}(std::forward(args)...); + break; + case CV_8S: + Functor{}(std::forward(args)...); + break; + case CV_16U: + Functor{}(std::forward(args)...); + break; + case CV_16S: + Functor{}(std::forward(args)...); + break; + case CV_32S: + Functor{}(std::forward(args)...); + break; + case CV_32F: + Functor{}(std::forward(args)...); + break; + case CV_64F: + Functor{}(std::forward(args)...); + break; + case CV_16F: + default: + CV_Error(cv::Error::BadDepth, "Unsupported matrix type."); + }; +} + +}} + +//! @endcond + +#endif //OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/exception_ptr.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/exception_ptr.hpp new file mode 100644 index 0000000..d98ffc4 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/detail/exception_ptr.hpp @@ -0,0 +1,27 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H +#define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H + +#ifndef CV__EXCEPTION_PTR +# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2 +# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938 +# elif defined(CV_CXX11) +# define CV__EXCEPTION_PTR 1 +# elif defined(_MSC_VER) +# define CV__EXCEPTION_PTR (_MSC_VER >= 1600) +# elif defined(__clang__) +# define CV__EXCEPTION_PTR 0 // C++11 only (see above) +# elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__) +# define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0) +# endif +#endif +#ifndef CV__EXCEPTION_PTR +# define CV__EXCEPTION_PTR 0 +#elif CV__EXCEPTION_PTR +# include // std::exception_ptr +#endif + +#endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/directx.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/directx.hpp new file mode 100644 index 0000000..056a85a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/directx.hpp @@ -0,0 +1,184 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_DIRECTX_HPP +#define OPENCV_CORE_DIRECTX_HPP + +#include "mat.hpp" +#include "ocl.hpp" + +#if !defined(__d3d11_h__) +struct ID3D11Device; +struct ID3D11Texture2D; +#endif + +#if !defined(__d3d10_h__) +struct ID3D10Device; +struct ID3D10Texture2D; +#endif + +#if !defined(_D3D9_H_) +struct IDirect3DDevice9; +struct IDirect3DDevice9Ex; +struct IDirect3DSurface9; +#endif + + +namespace cv { namespace directx { + +namespace ocl { +using namespace cv::ocl; + +//! @addtogroup core_directx +// This section describes OpenCL and DirectX interoperability. +// +// To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is +// supported only on Windows. +// +// To use OpenCL functionality you should first initialize OpenCL context from DirectX resource. +// +//! @{ + +// TODO static functions in the Context class +//! @brief Creates OpenCL context from D3D11 device +// +//! @param pD3D11Device - pointer to D3D11 device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device); + +//! @brief Creates OpenCL context from D3D10 device +// +//! @param pD3D10Device - pointer to D3D10 device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device); + +//! @brief Creates OpenCL context from Direct3DDevice9Ex device +// +//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex); + +//! @brief Creates OpenCL context from Direct3DDevice9 device +// +//! @param pDirect3DDevice9 - pointer to Direct3Device9 device +//! @return Returns reference to OpenCL Context +CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9); + +//! @} + +} // namespace cv::directx::ocl + +//! @addtogroup core_directx +//! @{ + +//! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then +//! input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12. +// +//! @note Note: Destination texture must be allocated by application. Function does memory copy from src to +//! pD3D11Texture2D +// +//! @param src - source InputArray +//! @param pD3D11Texture2D - destination D3D11 texture +CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D); + +//! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then +//! data will be upsampled and color-converted to BGR format. +// +//! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size. +//! function does memory copy from pD3D11Texture2D to dst +// +//! @param pD3D11Texture2D - source D3D11 texture +//! @param dst - destination OutputArray +CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst); + +//! @brief Converts InputArray to ID3D10Texture2D +// +//! @note Note: function does memory copy from src to +//! pD3D10Texture2D +// +//! @param src - source InputArray +//! @param pD3D10Texture2D - destination D3D10 texture +CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D); + +//! @brief Converts ID3D10Texture2D to OutputArray +// +//! @note Note: function does memory copy from pD3D10Texture2D +//! to dst +// +//! @param pD3D10Texture2D - source D3D10 texture +//! @param dst - destination OutputArray +CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst); + +//! @brief Converts InputArray to IDirect3DSurface9 +// +//! @note Note: function does memory copy from src to +//! pDirect3DSurface9 +// +//! @param src - source InputArray +//! @param pDirect3DSurface9 - destination D3D10 texture +//! @param surfaceSharedHandle - shared handle +CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL); + +//! @brief Converts IDirect3DSurface9 to OutputArray +// +//! @note Note: function does memory copy from pDirect3DSurface9 +//! to dst +// +//! @param pDirect3DSurface9 - source D3D10 texture +//! @param dst - destination OutputArray +//! @param surfaceSharedHandle - shared handle +CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL); + +//! @brief Get OpenCV type from DirectX type +//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11 +//! @return OpenCV type or -1 if there is no equivalent +CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11 + +//! @brief Get OpenCV type from DirectX type +//! @param iD3DFORMAT - enum D3DTYPE for D3D9 +//! @return OpenCV type or -1 if there is no equivalent +CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9 + +//! @} + +} } // namespace cv::directx + +#endif // OPENCV_CORE_DIRECTX_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/dualquaternion.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/dualquaternion.hpp new file mode 100644 index 0000000..1f644e9 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/dualquaternion.hpp @@ -0,0 +1,979 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang +#ifndef OPENCV_CORE_DUALQUATERNION_HPP +#define OPENCV_CORE_DUALQUATERNION_HPP + +#include +#include + +namespace cv{ +//! @addtogroup core +//! @{ + +template class DualQuat; +template std::ostream& operator<<(std::ostream&, const DualQuat<_Tp>&); + +/** + * Dual quaternions were introduced to describe rotation together with translation while ordinary + * quaternions can only describe rotation. It can be used for shortest path pose interpolation, + * local pose optimization or volumetric deformation. More details can be found + * - https://en.wikipedia.org/wiki/Dual_quaternion + * - ["A beginners guide to dual-quaternions: what they are, how they work, and how to use them for 3D character hierarchies", Ben Kenwright, 2012](https://borodust.org/public/shared/beginner_dual_quats.pdf) + * - ["Dual Quaternions", Yan-Bin Jia, 2013](http://web.cs.iastate.edu/~cs577/handouts/dual-quaternion.pdf) + * - ["Geometric Skinning with Approximate Dual Quaternion Blending", Kavan, 2008](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric) + * - http://rodolphe-vaillant.fr/?e=29 + * + * A unit dual quaternion can be classically represented as: + * \f[ + * \begin{equation} + * \begin{split} + * \sigma &= \left(r+\frac{\epsilon}{2}tr\right)\\ + * &= [w, x, y, z, w\_, x\_, y\_, z\_] + * \end{split} + * \end{equation} + * \f] + * where \f$r, t\f$ represents the rotation (ordinary unit quaternion) and translation (pure ordinary quaternion) respectively. + * + * A general dual quaternions which consist of two quaternions is usually represented in form of: + * \f[ + * \sigma = p + \epsilon q + * \f] + * where the introduced dual unit \f$\epsilon\f$ satisfies \f$\epsilon^2 = \epsilon^3 =...=0\f$, and \f$p, q\f$ are quaternions. + * + * Alternatively, dual quaternions can also be interpreted as four components which are all [dual numbers](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric): + * \f[ + * \sigma = \hat{q}_w + \hat{q}_xi + \hat{q}_yj + \hat{q}_zk + * \f] + * If we set \f$\hat{q}_x, \hat{q}_y\f$ and \f$\hat{q}_z\f$ equal to 0, a dual quaternion is transformed to a dual number. see normalize(). + * + * If you want to create a dual quaternion, you can use: + * + * ``` + * using namespace cv; + * double angle = CV_PI; + * + * // create from eight number + * DualQuatd dq1(1, 2, 3, 4, 5, 6, 7, 8); //p = [1,2,3,4]. q=[5,6,7,8] + * + * // create from Vec + * Vec v{1,2,3,4,5,6,7,8}; + * DualQuatd dq_v{v}; + * + * // create from two quaternion + * Quatd p(1, 2, 3, 4); + * Quatd q(5, 6, 7, 8); + * DualQuatd dq2 = DualQuatd::createFromQuat(p, q); + * + * // create from an angle, an axis and a translation + * Vec3d axis{0, 0, 1}; + * Vec3d trans{3, 4, 5}; + * DualQuatd dq3 = DualQuatd::createFromAngleAxisTrans(angle, axis, trans); + * + * // If you already have an instance of class Affine3, then you can use + * Affine3d R = dq3.toAffine3(); + * DualQuatd dq4 = DualQuatd::createFromAffine3(R); + * + * // or create directly by affine transformation matrix Rt + * // see createFromMat() in detail for the form of Rt + * Matx44d Rt = dq3.toMat(); + * DualQuatd dq5 = DualQuatd::createFromMat(Rt); + * + * // Any rotation + translation movement can + * // be expressed as a rotation + translation around the same line in space (expressed by Plucker + * // coords), and here's a way to represent it this way. + * Vec3d axis{1, 1, 1}; // axis will be normalized in createFromPitch + * Vec3d trans{3, 4 ,5}; + * axis = axis / std::sqrt(axis.dot(axis));// The formula for computing moment that I use below requires a normalized axis + * Vec3d moment = 1.0 / 2 * (trans.cross(axis) + axis.cross(trans.cross(axis)) * + * std::cos(rotation_angle / 2) / std::sin(rotation_angle / 2)); + * double d = trans.dot(qaxis); + * DualQuatd dq6 = DualQuatd::createFromPitch(angle, d, axis, moment); + * ``` + * + * A point \f$v=(x, y, z)\f$ in form of dual quaternion is \f$[1+\epsilon v]=[1,0,0,0,0,x,y,z]\f$. + * The transformation of a point \f$v_1\f$ to another point \f$v_2\f$ under the dual quaternion \f$\sigma\f$ is + * \f[ + * 1 + \epsilon v_2 = \sigma * (1 + \epsilon v_1) * \sigma^{\star} + * \f] + * where \f$\sigma^{\star}=p^*-\epsilon q^*.\f$ + * + * A line in the \f$Pl\ddot{u}cker\f$ coordinates \f$(\hat{l}, m)\f$ defined by the dual quaternion \f$l=\hat{l}+\epsilon m\f$. + * To transform a line, \f[l_2 = \sigma * l_1 * \sigma^*,\f] where \f$\sigma=r+\frac{\epsilon}{2}rt\f$ and + * \f$\sigma^*=p^*+\epsilon q^*\f$. + * + * To extract the Vec or Vec, see toVec(); + * + * To extract the affine transformation matrix, see toMat(); + * + * To extract the instance of Affine3, see toAffine3(); + * + * If two quaternions \f$q_0, q_1\f$ are needed to be interpolated, you can use sclerp() + * ``` + * DualQuatd::sclerp(q0, q1, t) + * ``` + * or dqblend(). + * ``` + * DualQuatd::dqblend(q0, q1, t) + * ``` + * With more than two dual quaternions to be blended, you can use generalize linear dual quaternion blending + * with the corresponding weights, i.e. gdqblend(). + * + */ +template +class CV_EXPORTS DualQuat{ + static_assert(std::is_floating_point<_Tp>::value, "Dual quaternion only make sense with type of float or double"); + using value_type = _Tp; + +public: + static constexpr _Tp CV_DUAL_QUAT_EPS = (_Tp)1.e-6; + + DualQuat(); + + /** + * @brief create from eight same type numbers. + */ + DualQuat(const _Tp w, const _Tp x, const _Tp y, const _Tp z, const _Tp w_, const _Tp x_, const _Tp y_, const _Tp z_); + + /** + * @brief create from a double or float vector. + */ + DualQuat(const Vec<_Tp, 8> &q); + + _Tp w, x, y, z, w_, x_, y_, z_; + + /** + * @brief create Dual Quaternion from two same type quaternions p and q. + * A Dual Quaternion \f$\sigma\f$ has the form: + * \f[\sigma = p + \epsilon q\f] + * where p and q are defined as follows: + * \f[\begin{equation} + * \begin{split} + * p &= w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\\ + * q &= w\_ + x\_\boldsymbol{i} + y\_\boldsymbol{j} + z\_\boldsymbol{k}. + * \end{split} + * \end{equation} + * \f] + * The p and q are the real part and dual part respectively. + * @param realPart a quaternion, real part of dual quaternion. + * @param dualPart a quaternion, dual part of dual quaternion. + * @sa Quat + */ + static DualQuat<_Tp> createFromQuat(const Quat<_Tp> &realPart, const Quat<_Tp> &dualPart); + + /** + * @brief create a dual quaternion from a rotation angle \f$\theta\f$, a rotation axis + * \f$\boldsymbol{u}\f$ and a translation \f$\boldsymbol{t}\f$. + * It generates a dual quaternion \f$\sigma\f$ in the form of + * \f[\begin{equation} + * \begin{split} + * \sigma &= r + \frac{\epsilon}{2}\boldsymbol{t}r \\ + * &= [\cos(\frac{\theta}{2}), \boldsymbol{u}\sin(\frac{\theta}{2})] + * + \frac{\epsilon}{2}[0, \boldsymbol{t}][[\cos(\frac{\theta}{2}), + * \boldsymbol{u}\sin(\frac{\theta}{2})]]\\ + * &= \cos(\frac{\theta}{2}) + \boldsymbol{u}\sin(\frac{\theta}{2}) + * + \frac{\epsilon}{2}(-(\boldsymbol{t} \cdot \boldsymbol{u})\sin(\frac{\theta}{2}) + * + \boldsymbol{t}\cos(\frac{\theta}{2}) + \boldsymbol{u} \times \boldsymbol{t} \sin(\frac{\theta}{2})). + * \end{split} + * \end{equation}\f] + * @param angle rotation angle. + * @param axis rotation axis. + * @param translation a vector of length 3. + * @note Axis will be normalized in this function. And translation is applied + * after the rotation. Use @ref createFromQuat(r, r * t / 2) to create a dual quaternion + * which translation is applied before rotation. + * @sa Quat + */ + static DualQuat<_Tp> createFromAngleAxisTrans(const _Tp angle, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &translation); + + /** + * @brief Transform this dual quaternion to an affine transformation matrix \f$M\f$. + * Dual quaternion consists of a rotation \f$r=[a,b,c,d]\f$ and a translation \f$t=[\Delta x,\Delta y,\Delta z]\f$. The + * affine transformation matrix \f$M\f$ has the form + * \f[ + * \begin{bmatrix} + * 1-2(e_2^2 +e_3^2) &2(e_1e_2-e_0e_3) &2(e_0e_2+e_1e_3) &\Delta x\\ + * 2(e_0e_3+e_1e_2) &1-2(e_1^2+e_3^2) &2(e_2e_3-e_0e_1) &\Delta y\\ + * 2(e_1e_3-e_0e_2) &2(e_0e_1+e_2e_3) &1-2(e_1^2-e_2^2) &\Delta z\\ + * 0&0&0&1 + * \end{bmatrix} + * \f] + * if A is a matrix consisting of n points to be transformed, this could be achieved by + * \f[ + * new\_A = M * A + * \f] + * where A has the form + * \f[ + * \begin{bmatrix} + * x_0& x_1& x_2&...&x_n\\ + * y_0& y_1& y_2&...&y_n\\ + * z_0& z_1& z_2&...&z_n\\ + * 1&1&1&...&1 + * \end{bmatrix} + * \f] + * where the same subscript represent the same point. The size of A should be \f$[4,n]\f$. + * and the same size for matrix new_A. + * @param _R 4x4 matrix that represents rotations and translation. + * @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create + * a dual quaternion which translation is applied before rotation. + */ + static DualQuat<_Tp> createFromMat(InputArray _R); + + /** + * @brief create dual quaternion from an affine matrix. The definition of affine matrix can refer to createFromMat() + */ + static DualQuat<_Tp> createFromAffine3(const Affine3<_Tp> &R); + + /** + * @brief A dual quaternion is a vector in form of + * \f[ + * \begin{equation} + * \begin{split} + * \sigma &=\boldsymbol{p} + \epsilon \boldsymbol{q}\\ + * &= \cos\hat{\frac{\theta}{2}}+\overline{\hat{l}}\sin\frac{\hat{\theta}}{2} + * \end{split} + * \end{equation} + * \f] + * where \f$\hat{\theta}\f$ is dual angle and \f$\overline{\hat{l}}\f$ is dual axis: + * \f[ + * \hat{\theta}=\theta + \epsilon d,\\ + * \overline{\hat{l}}= \hat{l} +\epsilon m. + * \f] + * In this representation, \f$\theta\f$ is rotation angle and \f$(\hat{l},m)\f$ is the screw axis, d is the translation distance along the axis. + * + * @param angle rotation angle. + * @param d translation along the rotation axis. + * @param axis rotation axis represented by quaternion with w = 0. + * @param moment the moment of line, and it should be orthogonal to axis. + * @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create + * a dual quaternion which translation is applied before rotation. + */ + static DualQuat<_Tp> createFromPitch(const _Tp angle, const _Tp d, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &moment); + + /** + * @brief return a quaternion which represent the real part of dual quaternion. + * The definition of real part is in createFromQuat(). + * @sa createFromQuat, getDualPart + */ + Quat<_Tp> getRealPart() const; + + /** + * @brief return a quaternion which represent the dual part of dual quaternion. + * The definition of dual part is in createFromQuat(). + * @sa createFromQuat, getRealPart + */ + Quat<_Tp> getDualPart() const; + + /** + * @brief return the conjugate of a dual quaternion. + * \f[ + * \begin{equation} + * \begin{split} + * \sigma^* &= (p + \epsilon q)^* + * &= (p^* + \epsilon q^*) + * \end{split} + * \end{equation} + * \f] + * @param dq a dual quaternion. + */ + template + friend DualQuat conjugate(const DualQuat &dq); + + /** + * @brief return the conjugate of a dual quaternion. + * \f[ + * \begin{equation} + * \begin{split} + * \sigma^* &= (p + \epsilon q)^* + * &= (p^* + \epsilon q^*) + * \end{split} + * \end{equation} + * \f] + */ + DualQuat<_Tp> conjugate() const; + + /** + * @brief return the rotation in quaternion form. + */ + Quat<_Tp> getRotation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the translation vector. + * The rotation \f$r\f$ in this dual quaternion \f$\sigma\f$ is applied before translation \f$t\f$. + * The dual quaternion \f$\sigma\f$ is defined as + * \f[\begin{equation} + * \begin{split} + * \sigma &= p + \epsilon q \\ + * &= r + \frac{\epsilon}{2}{t}r. + * \end{split} + * \end{equation}\f] + * Thus, the translation can be obtained as follows + * \f[t = 2qp^*.\f] + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + * @note This dual quaternion's translation is applied after the rotation. + */ + Vec<_Tp, 3> getTranslation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the norm \f$||\sigma||\f$ of dual quaternion \f$\sigma = p + \epsilon q\f$. + * \f[ + * \begin{equation} + * \begin{split} + * ||\sigma|| &= \sqrt{\sigma * \sigma^*} \\ + * &= ||p|| + \epsilon \frac{p \cdot q}{||p||}. + * \end{split} + * \end{equation} + * \f] + * Generally speaking, the norm of a not unit dual + * quaternion is a dual number. For convenience, we return it in the form of a dual quaternion + * , i.e. + * \f[ ||\sigma|| = [||p||, 0, 0, 0, \frac{p \cdot q}{||p||}, 0, 0, 0].\f] + * + * @note The data type of dual number is dual quaternion. + */ + DualQuat<_Tp> norm() const; + + /** + * @brief return a normalized dual quaternion. + * A dual quaternion can be expressed as + * \f[ + * \begin{equation} + * \begin{split} + * \sigma &= p + \epsilon q\\ + * &=||\sigma||\left(r+\frac{1}{2}tr\right) + * \end{split} + * \end{equation} + * \f] + * where \f$r, t\f$ represents the rotation (ordinary quaternion) and translation (pure ordinary quaternion) respectively, + * and \f$||\sigma||\f$ is the norm of dual quaternion(a dual number). + * A dual quaternion is unit if and only if + * \f[ + * ||p||=1, p \cdot q=0 + * \f] + * where \f$\cdot\f$ means dot product. + * The process of normalization is + * \f[ + * \sigma_{u}=\frac{\sigma}{||\sigma||} + * \f] + * Next, we simply proof \f$\sigma_u\f$ is a unit dual quaternion: + * \f[ + * \renewcommand{\Im}{\operatorname{Im}} + * \begin{equation} + * \begin{split} + * \sigma_{u}=\frac{\sigma}{||\sigma||}&=\frac{p + \epsilon q}{||p||+\epsilon\frac{p\cdot q}{||p||}}\\ + * &=\frac{p}{||p||}+\epsilon\left(\frac{q}{||p||}-p\frac{p\cdot q}{||p||^3}\right)\\ + * &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\left(qp^{*}-p\cdot q\right)\frac{p}{||p||}\\ + * &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\Im(qp^*)\frac{p}{||p||}.\\ + * \end{split} + * \end{equation} + * \f] + * As expected, the real part is a rotation and dual part is a pure quaternion. + */ + DualQuat<_Tp> normalize() const; + + /** + * @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero, + * the inverse dual quaternion is + * \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f] + * or equivalentlly, + * \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f] + * @param dq a dual quaternion. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion + * and this function will save some computations. + */ + template + friend DualQuat inv(const DualQuat &dq, QuatAssumeType assumeUnit); + + /** + * @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero, + * the inverse dual quaternion is + * \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f] + * or equivalentlly, + * \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f] + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> inv(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the dot product of two dual quaternion. + * @param p other dual quaternion. + */ + _Tp dot(DualQuat<_Tp> p) const; + + /** + ** @brief return the value of \f$p^t\f$ where p is a dual quaternion. + * This could be calculated as: + * \f[ + * p^t = \exp(t\ln p) + * \f] + * @param dq a dual quaternion. + * @param t index of power function. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion + * and this function will save some computations. + */ + template + friend DualQuat power(const DualQuat &dq, const T t, QuatAssumeType assumeUnit); + + /** + ** @brief return the value of \f$p^t\f$ where p is a dual quaternion. + * This could be calculated as: + * \f[ + * p^t = \exp(t\ln p) + * \f] + * + * @param t index of power function. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> power(const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of \f$p^q\f$ where p and q are dual quaternions. + * This could be calculated as: + * \f[ + * p^q = \exp(q\ln p) + * \f] + * @param p a dual quaternion. + * @param q a dual quaternion. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion p assume to be a dual unit quaternion + * and this function will save some computations. + */ + template + friend DualQuat power(const DualQuat& p, const DualQuat& q, QuatAssumeType assumeUnit); + + /** + * @brief return the value of \f$p^q\f$ where p and q are dual quaternions. + * This could be calculated as: + * \f[ + * p^q = \exp(q\ln p) + * \f] + * + * @param q a dual quaternion + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a dual unit quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> power(const DualQuat<_Tp>& q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of exponential function value + * @param dq a dual quaternion. + */ + template + friend DualQuat exp(const DualQuat &dq); + + /** + * @brief return the value of exponential function value + */ + DualQuat<_Tp> exp() const; + + /** + * @brief return the value of logarithm function value + * + * @param dq a dual quaternion. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion + * and this function will save some computations. + */ + template + friend DualQuat log(const DualQuat &dq, QuatAssumeType assumeUnit); + + /** + * @brief return the value of logarithm function value + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> log(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief Transform this dual quaternion to a vector. + */ + Vec<_Tp, 8> toVec() const; + + /** + * @brief Transform this dual quaternion to a affine transformation matrix + * the form of matrix, see createFromMat(). + */ + Matx<_Tp, 4, 4> toMat(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief Transform this dual quaternion to a instance of Affine3. + */ + Affine3<_Tp> toAffine3(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief The screw linear interpolation(ScLERP) is an extension of spherical linear interpolation of dual quaternion. + * If \f$\sigma_1\f$ and \f$\sigma_2\f$ are two dual quaternions representing the initial and final pose. + * The interpolation of ScLERP function can be defined as: + * \f[ + * ScLERP(t;\sigma_1,\sigma_2) = \sigma_1 * (\sigma_1^{-1} * \sigma_2)^t, t\in[0,1] + * \f] + * + * @param q1 a dual quaternion represents a initial pose. + * @param q2 a dual quaternion represents a final pose. + * @param t interpolation parameter + * @param directChange if true, it always return the shortest path. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + * + * For example + * ``` + * double angle1 = CV_PI / 2; + * Vec3d axis{0, 0, 1}; + * Vec3d t(0, 0, 3); + * DualQuatd initial = DualQuatd::createFromAngleAxisTrans(angle1, axis, t); + * double angle2 = CV_PI; + * DualQuatd final = DualQuatd::createFromAngleAxisTrans(angle2, axis, t); + * DualQuatd inter = DualQuatd::sclerp(initial, final, 0.5); + * ``` + */ + static DualQuat<_Tp> sclerp(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t, + bool directChange=true, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + /** + * @brief The method of Dual Quaternion linear Blending(DQB) is to compute a transformation between dual quaternion + * \f$q_1\f$ and \f$q_2\f$ and can be defined as: + * \f[ + * DQB(t;{\boldsymbol{q}}_1,{\boldsymbol{q}}_2)= + * \frac{(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2}{||(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2||}. + * \f] + * where \f$q_1\f$ and \f$q_2\f$ are unit dual quaternions representing the input transformations. + * If you want to use DQB that works for more than two rigid transformations, see @ref gdqblend + * + * @param q1 a unit dual quaternion representing the input transformations. + * @param q2 a unit dual quaternion representing the input transformations. + * @param t parameter \f$t\in[0,1]\f$. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + * + * @sa gdqblend + */ + static DualQuat<_Tp> dqblend(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t, + QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations. + * If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights + * \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply + * \f[ + * gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n} + * {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}. + * \f] + * @param dualquat vector of dual quaternions + * @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should + * satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions + * and this function will save some computations. + * @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat. + */ + template + static DualQuat<_Tp> gdqblend(const Vec, cn> &dualquat, InputArray weights, + QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations. + * If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights + * \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply + * \f[ + * gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n} + * {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}. + * \f] + * @param dualquat The dual quaternions which have 8 channels and 1 row or 1 col. + * @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should + * satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions + * and this function will save some computations. + * @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat. + */ + static DualQuat<_Tp> gdqblend(InputArray dualquat, InputArray weights, + QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief Return opposite dual quaternion \f$-p\f$ + * which satisfies \f$p + (-p) = 0.\f$ + * + * For example + * ``` + * DualQuatd q{1, 2, 3, 4, 5, 6, 7, 8}; + * std::cout << -q << std::endl; // [-1, -2, -3, -4, -5, -6, -7, -8] + * ``` + */ + DualQuat<_Tp> operator-() const; + + /** + * @brief return true if two dual quaternions p and q are nearly equal, i.e. when the absolute + * value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_DUAL_QUAT_EPS. + */ + bool operator==(const DualQuat<_Tp>&) const; + + /** + * @brief Subtraction operator of two dual quaternions p and q. + * It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p - q << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4] + * ``` + */ + DualQuat<_Tp> operator-(const DualQuat<_Tp>&) const; + + /** + * @brief Subtraction assignment operator of two dual quaternions p and q. + * It subtracts right operand from the left operand and assign the result to left operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p -= q; // equivalent to p = p - q + * std::cout << p << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4] + * + * ``` + */ + DualQuat<_Tp>& operator-=(const DualQuat<_Tp>&); + + /** + * @brief Addition operator of two dual quaternions p and q. + * It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p + q << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20] + * ``` + */ + DualQuat<_Tp> operator+(const DualQuat<_Tp>&) const; + + /** + * @brief Addition assignment operator of two dual quaternions p and q. + * It adds right operand to the left operand and assign the result to left operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p += q; // equivalent to p = p + q + * std::cout << p << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20] + * + * ``` + */ + DualQuat<_Tp>& operator+=(const DualQuat<_Tp>&); + + /** + * @brief Multiplication assignment operator of two quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication: + * The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [A, B][C, D]\\ + * &=[AC, AD + BC] + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p *= q; + * std::cout << p << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120] + * ``` + */ + DualQuat<_Tp>& operator*=(const DualQuat<_Tp>&); + + /** + * @brief Multiplication assignment operator of a quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\ + * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * p *= s; + * std::cout << p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + DualQuat<_Tp> operator*=(const _Tp s); + + + /** + * @brief Multiplication operator of two dual quaternions q and p. + * Multiplies values on either side of the operator. + * + * Rule of dual quaternion multiplication: + * The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [A, B][C, D]\\ + * &=[AC, AD + BC] + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p * q << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120] + * ``` + */ + DualQuat<_Tp> operator*(const DualQuat<_Tp>&) const; + + /** + * @brief Division operator of a dual quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of dual quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z, w\_, x\_, y\_, z\_] / s\\ + * &=[w/s, x/s, y/s, z/s, w\_/s, x\_/s, y\_/s, z\_/s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4] + * ``` + * @note the type of scalar should be equal to this dual quaternion. + */ + DualQuat<_Tp> operator/(const _Tp s) const; + + /** + * @brief Division operator of two dual quaternions p and q. + * Divides left hand operand by right hand operand. + * + * Rule of dual quaternion division with a dual quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q &= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p / q << std::endl; // equivalent to p * q.inv() + * ``` + */ + DualQuat<_Tp> operator/(const DualQuat<_Tp>&) const; + + /** + * @brief Division assignment operator of two dual quaternions p and q; + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of dual quaternion division with a quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q&= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p /= q; // equivalent to p = p * q.inv() + * std::cout << p << std::endl; + * ``` + */ + DualQuat<_Tp>& operator/=(const DualQuat<_Tp>&); + + /** + * @brief Division assignment operator of a dual quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of dual quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z, w\_, x\_, y\_ ,z\_] / s\\ + * &=[w / s, x / s, y / s, z / s, w\_ / \space s, x\_ / \space s, y\_ / \space s, z\_ / \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0;; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + Quat<_Tp>& operator/=(const _Tp s); + + /** + * @brief Addition operator of a scalar and a dual quaternions. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator+(const T s, const DualQuat&); + + /** + * @brief Addition operator of a dual quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator+(const DualQuat&, const T s); + + /** + * @brief Multiplication operator of a scalar and a dual quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\ + * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * std::cout << s * p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator*(const T s, const DualQuat&); + + /** + * @brief Subtraction operator of a dual quaternion and a scalar. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << p - scalar << std::endl; //[-1, 2, 3, 4, 5, 6, 7, 8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator-(const DualQuat&, const T s); + + /** + * @brief Subtraction operator of a scalar and a dual quaternions. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4, -5, -6, -7, -8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator-(const T s, const DualQuat&); + + /** + * @brief Multiplication operator of a dual quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\ + * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * std::cout << p * s << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator*(const DualQuat&, const T s); + + template + friend std::ostream& cv::operator<<(std::ostream&, const DualQuat&); + +}; + +using DualQuatd = DualQuat; +using DualQuatf = DualQuat; + +//! @} core +}//namespace + +#include "dualquaternion.inl.hpp" + +#endif /* OPENCV_CORE_QUATERNION_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/dualquaternion.inl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/dualquaternion.inl.hpp new file mode 100644 index 0000000..6abb159 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/dualquaternion.inl.hpp @@ -0,0 +1,487 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang + +#ifndef OPENCV_CORE_DUALQUATERNION_INL_HPP +#define OPENCV_CORE_DUALQUATERNION_INL_HPP + +#ifndef OPENCV_CORE_DUALQUATERNION_HPP +#error This is not a standalone header. Include dualquaternion.hpp instead. +#endif + +/////////////////////////////////////////////////////////////////////////////////////// +//Implementation +namespace cv { + +template +DualQuat::DualQuat():w(0), x(0), y(0), z(0), w_(0), x_(0), y_(0), z_(0){}; + +template +DualQuat::DualQuat(const T vw, const T vx, const T vy, const T vz, const T _w, const T _x, const T _y, const T _z): + w(vw), x(vx), y(vy), z(vz), w_(_w), x_(_x), y_(_y), z_(_z){}; + +template +DualQuat::DualQuat(const Vec &q):w(q[0]), x(q[1]), y(q[2]), z(q[3]), + w_(q[4]), x_(q[5]), y_(q[6]), z_(q[7]){}; + +template +DualQuat DualQuat::createFromQuat(const Quat &realPart, const Quat &dualPart) +{ + T w = realPart.w; + T x = realPart.x; + T y = realPart.y; + T z = realPart.z; + T w_ = dualPart.w; + T x_ = dualPart.x; + T y_ = dualPart.y; + T z_ = dualPart.z; + return DualQuat(w, x, y, z, w_, x_, y_, z_); +} + +template +DualQuat DualQuat::createFromAngleAxisTrans(const T angle, const Vec &axis, const Vec &trans) +{ + Quat r = Quat::createFromAngleAxis(angle, axis); + Quat t{0, trans[0], trans[1], trans[2]}; + return createFromQuat(r, t * r * T(0.5)); +} + +template +DualQuat DualQuat::createFromMat(InputArray _R) +{ + CV_CheckTypeEQ(_R.type(), cv::traits::Type::value, ""); + if (_R.size() != Size(4, 4)) + { + CV_Error(Error::StsBadArg, "The input matrix must have 4 columns and 4 rows"); + } + Mat R = _R.getMat(); + Quat r = Quat::createFromRotMat(R.colRange(0, 3).rowRange(0, 3)); + Quat trans(0, R.at(0, 3), R.at(1, 3), R.at(2, 3)); + return createFromQuat(r, trans * r * T(0.5)); +} + +template +DualQuat DualQuat::createFromAffine3(const Affine3 &R) +{ + return createFromMat(R.matrix); +} + +template +DualQuat DualQuat::createFromPitch(const T angle, const T d, const Vec &axis, const Vec &moment) +{ + T half_angle = angle * T(0.5), half_d = d * T(0.5); + Quat qaxis = Quat(0, axis[0], axis[1], axis[2]).normalize(); + Quat qmoment = Quat(0, moment[0], moment[1], moment[2]); + qmoment -= qaxis * axis.dot(moment); + Quat dual = -half_d * std::sin(half_angle) + std::sin(half_angle) * qmoment + + half_d * std::cos(half_angle) * qaxis; + return createFromQuat(Quat::createFromAngleAxis(angle, axis), dual); +} + +template +inline bool DualQuat::operator==(const DualQuat &q) const +{ + return (abs(w - q.w) < CV_DUAL_QUAT_EPS && abs(x - q.x) < CV_DUAL_QUAT_EPS && + abs(y - q.y) < CV_DUAL_QUAT_EPS && abs(z - q.z) < CV_DUAL_QUAT_EPS && + abs(w_ - q.w_) < CV_DUAL_QUAT_EPS && abs(x_ - q.x_) < CV_DUAL_QUAT_EPS && + abs(y_ - q.y_) < CV_DUAL_QUAT_EPS && abs(z_ - q.z_) < CV_DUAL_QUAT_EPS); +} + +template +inline Quat DualQuat::getRealPart() const +{ + return Quat(w, x, y, z); +} + +template +inline Quat DualQuat::getDualPart() const +{ + return Quat(w_, x_, y_, z_); +} + +template +inline DualQuat conjugate(const DualQuat &dq) +{ + return dq.conjugate(); +} + +template +inline DualQuat DualQuat::conjugate() const +{ + return DualQuat(w, -x, -y, -z, w_, -x_, -y_, -z_); +} + +template +DualQuat DualQuat::norm() const +{ + Quat real = getRealPart(); + T realNorm = real.norm(); + Quat dual = getDualPart(); + if (realNorm < CV_DUAL_QUAT_EPS){ + return DualQuat(0, 0, 0, 0, 0, 0, 0, 0); + } + return DualQuat(realNorm, 0, 0, 0, real.dot(dual) / realNorm, 0, 0, 0); +} + +template +inline Quat DualQuat::getRotation(QuatAssumeType assumeUnit) const +{ + if (assumeUnit) + { + return getRealPart(); + } + return getRealPart().normalize(); +} + +template +inline Vec DualQuat::getTranslation(QuatAssumeType assumeUnit) const +{ + Quat trans = T(2.0) * (getDualPart() * getRealPart().inv(assumeUnit)); + return Vec{trans[1], trans[2], trans[3]}; +} + +template +DualQuat DualQuat::normalize() const +{ + Quat p = getRealPart(); + Quat q = getDualPart(); + T p_norm = p.norm(); + if (p_norm < CV_DUAL_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "Cannot normalize this dual quaternion: the norm is too small."); + } + Quat p_nr = p / p_norm; + Quat q_nr = q / p_norm; + return createFromQuat(p_nr, q_nr - p_nr * p_nr.dot(q_nr)); +} + +template +inline T DualQuat::dot(DualQuat q) const +{ + return q.w * w + q.x * x + q.y * y + q.z * z + q.w_ * w_ + q.x_ * x_ + q.y_ * y_ + q.z_ * z_; +} + +template +inline DualQuat inv(const DualQuat &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return dq.inv(assumeUnit); +} + +template +inline DualQuat DualQuat::inv(QuatAssumeType assumeUnit) const +{ + Quat real = getRealPart(); + Quat dual = getDualPart(); + return createFromQuat(real.inv(assumeUnit), -real.inv(assumeUnit) * dual * real.inv(assumeUnit)); +} + +template +inline DualQuat DualQuat::operator-(const DualQuat &q) const +{ + return DualQuat(w - q.w, x - q.x, y - q.y, z - q.z, w_ - q.w_, x_ - q.x_, y_ - q.y_, z_ - q.z_); +} + +template +inline DualQuat DualQuat::operator-() const +{ + return DualQuat(-w, -x, -y, -z, -w_, -x_, -y_, -z_); +} + +template +inline DualQuat DualQuat::operator+(const DualQuat &q) const +{ + return DualQuat(w + q.w, x + q.x, y + q.y, z + q.z, w_ + q.w_, x_ + q.x_, y_ + q.y_, z_ + q.z_); +} + +template +inline DualQuat& DualQuat::operator+=(const DualQuat &q) +{ + *this = *this + q; + return *this; +} + +template +inline DualQuat DualQuat::operator*(const DualQuat &q) const +{ + Quat A = getRealPart(); + Quat B = getDualPart(); + Quat C = q.getRealPart(); + Quat D = q.getDualPart(); + return DualQuat::createFromQuat(A * C, A * D + B * C); +} + +template +inline DualQuat& DualQuat::operator*=(const DualQuat &q) +{ + *this = *this * q; + return *this; +} + +template +inline DualQuat operator+(const T a, const DualQuat &q) +{ + return DualQuat(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_); +} + +template +inline DualQuat operator+(const DualQuat &q, const T a) +{ + return DualQuat(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_); +} + +template +inline DualQuat operator-(const DualQuat &q, const T a) +{ + return DualQuat(q.w - a, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_); +} + +template +inline DualQuat& DualQuat::operator-=(const DualQuat &q) +{ + *this = *this - q; + return *this; +} + +template +inline DualQuat operator-(const T a, const DualQuat &q) +{ + return DualQuat(a - q.w, -q.x, -q.y, -q.z, -q.w_, -q.x_, -q.y_, -q.z_); +} + +template +inline DualQuat operator*(const T a, const DualQuat &q) +{ + return DualQuat(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a); +} + +template +inline DualQuat operator*(const DualQuat &q, const T a) +{ + return DualQuat(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a); +} + +template +inline DualQuat DualQuat::operator/(const T a) const +{ + return DualQuat(w / a, x / a, y / a, z / a, w_ / a, x_ / a, y_ / a, z_ / a); +} + +template +inline DualQuat DualQuat::operator/(const DualQuat &q) const +{ + return *this * q.inv(); +} + +template +inline DualQuat& DualQuat::operator/=(const DualQuat &q) +{ + *this = *this / q; + return *this; +} + +template +std::ostream & operator<<(std::ostream &os, const DualQuat &q) +{ + os << "DualQuat " << Vec{q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_}; + return os; +} + +template +inline DualQuat exp(const DualQuat &dq) +{ + return dq.exp(); +} + +namespace detail { + +template +Matx<_Tp, 4, 4> jacob_exp(const Quat<_Tp> &q) +{ + _Tp nv = std::sqrt(q.x * q.x + q.y * q.y + q.z * q.z); + _Tp sinc_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? _Tp(1.0) - nv * nv * _Tp(1.0/6.0) : std::sin(nv) / nv; + _Tp csiii_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? -_Tp(1.0/3.0) : (std::cos(nv) - sinc_nv) / nv / nv; + Matx<_Tp, 4, 4> J_exp_quat { + std::cos(nv), -sinc_nv * q.x, -sinc_nv * q.y, -sinc_nv * q.z, + sinc_nv * q.x, csiii_nv * q.x * q.x + sinc_nv, csiii_nv * q.x * q.y, csiii_nv * q.x * q.z, + sinc_nv * q.y, csiii_nv * q.y * q.x, csiii_nv * q.y * q.y + sinc_nv, csiii_nv * q.y * q.z, + sinc_nv * q.z, csiii_nv * q.z * q.x, csiii_nv * q.z * q.y, csiii_nv * q.z * q.z + sinc_nv + }; + return std::exp(q.w) * J_exp_quat; +} + +} // namespace detail + +template +DualQuat DualQuat::exp() const +{ + Quat real = getRealPart(); + return createFromQuat(real.exp(), Quat(detail::jacob_exp(real) * getDualPart().toVec())); +} + +template +DualQuat log(const DualQuat &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return dq.log(assumeUnit); +} + +template +DualQuat DualQuat::log(QuatAssumeType assumeUnit) const +{ + Quat plog = getRealPart().log(assumeUnit); + Matx jacob = detail::jacob_exp(plog); + return createFromQuat(plog, Quat(jacob.inv() * getDualPart().toVec())); +} + +template +inline DualQuat power(const DualQuat &dq, const T t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return dq.power(t, assumeUnit); +} + +template +inline DualQuat DualQuat::power(const T t, QuatAssumeType assumeUnit) const +{ + return (t * log(assumeUnit)).exp(); +} + +template +inline DualQuat power(const DualQuat &p, const DualQuat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return p.power(q, assumeUnit); +} + +template +inline DualQuat DualQuat::power(const DualQuat &q, QuatAssumeType assumeUnit) const +{ + return (q * log(assumeUnit)).exp(); +} + +template +inline Vec DualQuat::toVec() const +{ + return Vec(w, x, y, z, w_, x_, y_, z_); +} + +template +Affine3 DualQuat::toAffine3(QuatAssumeType assumeUnit) const +{ + return Affine3(toMat(assumeUnit)); +} + +template +Matx DualQuat::toMat(QuatAssumeType assumeUnit) const +{ + Matx rot44 = getRotation(assumeUnit).toRotMat4x4(); + Vec translation = getTranslation(assumeUnit); + rot44(0, 3) = translation[0]; + rot44(1, 3) = translation[1]; + rot44(2, 3) = translation[2]; + return rot44; +} + +template +DualQuat DualQuat::sclerp(const DualQuat &q0, const DualQuat &q1, const T t, bool directChange, QuatAssumeType assumeUnit) +{ + DualQuat v0(q0), v1(q1); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + } + Quat v0Real = v0.getRealPart(); + Quat v1Real = v1.getRealPart(); + if (directChange && v1Real.dot(v0Real) < 0) + { + v0 = -v0; + } + DualQuat v0inv1 = v0.inv() * v1; + return v0 * v0inv1.power(t, QUAT_ASSUME_UNIT); +} + +template +DualQuat DualQuat::dqblend(const DualQuat &q1, const DualQuat &q2, const T t, QuatAssumeType assumeUnit) +{ + DualQuat v1(q1), v2(q2); + if (!assumeUnit) + { + v1 = v1.normalize(); + v2 = v2.normalize(); + } + if (v1.getRotation(assumeUnit).dot(v2.getRotation(assumeUnit)) < 0) + { + return ((1 - t) * v1 - t * v2).normalize(); + } + return ((1 - t) * v1 + t * v2).normalize(); +} + +template +DualQuat DualQuat::gdqblend(InputArray _dualquat, InputArray _weight, QuatAssumeType assumeUnit) +{ + CV_CheckTypeEQ(_weight.type(), cv::traits::Type::value, ""); + CV_CheckTypeEQ(_dualquat.type(), CV_MAKETYPE(CV_MAT_DEPTH(cv::traits::Type::value), 8), ""); + Size dq_s = _dualquat.size(); + if (dq_s != _weight.size() || (dq_s.height != 1 && dq_s.width != 1)) + { + CV_Error(Error::StsBadArg, "The size of weight must be the same as dualquat, both of them should be (1, n) or (n, 1)"); + } + Mat dualquat = _dualquat.getMat(), weight = _weight.getMat(); + const int cn = std::max(dq_s.width, dq_s.height); + if (!assumeUnit) + { + for (int i = 0; i < cn; ++i) + { + dualquat.at>(i) = DualQuat{dualquat.at>(i)}.normalize().toVec(); + } + } + Vec dq_blend = dualquat.at>(0) * weight.at(0); + Quat q0 = DualQuat {dualquat.at>(0)}.getRotation(assumeUnit); + for (int i = 1; i < cn; ++i) + { + T k = q0.dot(DualQuat{dualquat.at>(i)}.getRotation(assumeUnit)) < 0 ? -1: 1; + dq_blend = dq_blend + dualquat.at>(i) * k * weight.at(i); + } + return DualQuat{dq_blend}.normalize(); +} + +template +template +DualQuat DualQuat::gdqblend(const Vec, cn> &_dualquat, InputArray _weight, QuatAssumeType assumeUnit) +{ + Vec, cn> dualquat(_dualquat); + if (cn == 0) + { + return DualQuat(1, 0, 0, 0, 0, 0, 0, 0); + } + Mat dualquat_mat(cn, 1, CV_64FC(8)); + for (int i = 0; i < cn ; ++i) + { + dualquat_mat.at>(i) = dualquat[i].toVec(); + } + return gdqblend(dualquat_mat, _weight, assumeUnit); +} + +} //namespace cv + +#endif /*OPENCV_CORE_DUALQUATERNION_INL_HPP*/ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/eigen.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/eigen.hpp new file mode 100644 index 0000000..51f4147 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/eigen.hpp @@ -0,0 +1,402 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#ifndef OPENCV_CORE_EIGEN_HPP +#define OPENCV_CORE_EIGEN_HPP + +#ifndef EIGEN_WORLD_VERSION +#error "Wrong usage of OpenCV's Eigen utility header. Include Eigen's headers first. See https://github.com/opencv/opencv/issues/17366" +#endif + +#include "opencv2/core.hpp" + +#if defined _MSC_VER && _MSC_VER >= 1200 +#define NOMINMAX // fix https://github.com/opencv/opencv/issues/17548 +#pragma warning( disable: 4714 ) //__forceinline is not inlined +#pragma warning( disable: 4127 ) //conditional expression is constant +#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data +#endif + +#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT) +#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \ + && defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY) +#include +#define OPENCV_EIGEN_TENSOR_SUPPORT 1 +#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 +#endif // !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT) + +namespace cv +{ + +/** @addtogroup core_eigen +These functions are provided for OpenCV-Eigen interoperability. They convert `Mat` +objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen +documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for +information about the `Matrix` template type. + +@note Using these functions requires the `Eigen/Dense` or similar header to be +included before this header. +*/ +//! @{ + +#if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN) +/** @brief Converts an Eigen::Tensor to a cv::Mat. + +The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where: + H = number of rows + W = number of columns + C = number of channels + +Usage: +\code +Eigen::Tensor a_tensor(...); +// populate tensor with values +Mat a_mat; +eigen2cv(a_tensor, a_mat); +\endcode +*/ +template static inline +void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst ) +{ + if( !(_layout & Eigen::RowMajorBit) ) + { + const std::array shuffle{2, 1, 0}; + Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle); + Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data()); + _src.copyTo(dst); + } + else + { + Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data()); + _src.copyTo(dst); + } +} + +/** @brief Converts a cv::Mat to an Eigen::Tensor. + +The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where: + H = number of rows + W = number of columns + C = number of channels + +Usage: +\code +Mat a_mat(...); +// populate Mat with values +Eigen::Tensor a_tensor(...); +cv2eigen(a_mat, a_tensor); +\endcode +*/ +template static inline +void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst ) +{ + if( !(_layout & Eigen::RowMajorBit) ) + { + Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels()); + Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data()); + if (src.type() == _dst.type()) + src.copyTo(_dst); + else + src.convertTo(_dst, _dst.type()); + const std::array shuffle{2, 1, 0}; + dst = row_major_tensor.swap_layout().shuffle(shuffle); + } + else + { + dst.resize(src.rows, src.cols, src.channels()); + Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data()); + if (src.type() == _dst.type()) + src.copyTo(_dst); + else + src.convertTo(_dst, _dst.type()); + } +} + +/** @brief Maps cv::Mat data to an Eigen::TensorMap. + +The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where: + H = number of rows + W = number of columns + C = number of channels + +Explicit instantiation of the return type is required. + +@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures. +The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated. + +The example below initializes a cv::Mat and produces an Eigen::TensorMap: +\code +float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; +Mat a_mat(2, 2, CV_32FC3, arr); +Eigen::TensorMap> a_tensormap = cv2eigen_tensormap(a_mat); +\endcode +*/ +template static inline +Eigen::TensorMap> cv2eigen_tensormap(InputArray src) +{ + Mat mat = src.getMat(); + CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), ""); + return Eigen::TensorMap>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels()); +} +#endif // OPENCV_EIGEN_TENSOR_SUPPORT + +template static inline +void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst ) +{ + if( !(src.Flags & Eigen::RowMajorBit) ) + { + Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value, + (void*)src.data(), src.outerStride()*sizeof(_Tp)); + transpose(_src, dst); + } + else + { + Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value, + (void*)src.data(), src.outerStride()*sizeof(_Tp)); + _src.copyTo(dst); + } +} + +// Matx case +template static inline +void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, + Matx<_Tp, _rows, _cols>& dst ) +{ + if( !(src.Flags & Eigen::RowMajorBit) ) + { + dst = Matx<_Tp, _cols, _rows>(static_cast(src.data())).t(); + } + else + { + dst = Matx<_Tp, _rows, _cols>(static_cast(src.data())); + } +} + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst ) +{ + CV_DbgAssert(src.rows == _rows && src.cols == _cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else if( src.cols == src.rows ) + { + src.convertTo(_dst, _dst.type()); + transpose(_dst, _dst); + } + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +// Matx case +template static inline +void cv2eigen( const Matx<_Tp, _rows, _cols>& src, + Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst ) +{ + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(_cols, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(_rows, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + Mat(src).copyTo(_dst); + } +} + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst ) +{ + dst.resize(src.rows, src.cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else if( src.cols == src.rows ) + { + src.convertTo(_dst, _dst.type()); + transpose(_dst, _dst); + } + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +// Matx case +template static inline +void cv2eigen( const Matx<_Tp, _rows, _cols>& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst ) +{ + dst.resize(_rows, _cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(_cols, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(_rows, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + Mat(src).copyTo(_dst); + } +} + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst ) +{ + CV_Assert(src.cols == 1); + dst.resize(src.rows); + + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +// Matx case +template static inline +void cv2eigen( const Matx<_Tp, _rows, 1>& src, + Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst ) +{ + dst.resize(_rows); + + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(1, _rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(_rows, 1, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.copyTo(_dst); + } +} + + +template static inline +void cv2eigen( const Mat& src, + Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst ) +{ + CV_Assert(src.rows == 1); + dst.resize(src.cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + if( src.type() == _dst.type() ) + transpose(src, _dst); + else + Mat(src.t()).convertTo(_dst, _dst.type()); + } + else + { + const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + src.convertTo(_dst, _dst.type()); + } +} + +//Matx +template static inline +void cv2eigen( const Matx<_Tp, 1, _cols>& src, + Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst ) +{ + dst.resize(_cols); + if( !(dst.Flags & Eigen::RowMajorBit) ) + { + const Mat _dst(_cols, 1, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + transpose(src, _dst); + } + else + { + const Mat _dst(1, _cols, traits::Type<_Tp>::value, + dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp))); + Mat(src).copyTo(_dst); + } +} + +//! @} + +} // cv + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/fast_math.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/fast_math.hpp new file mode 100644 index 0000000..eb4fbe2 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/fast_math.hpp @@ -0,0 +1,411 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_FAST_MATH_HPP +#define OPENCV_CORE_FAST_MATH_HPP + +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils +//! @{ + +/****************************************************************************************\ +* fast math * +\****************************************************************************************/ + +#ifdef __cplusplus +# include +#else +# ifdef __BORLANDC__ +# include +# else +# include +# endif +#endif + +#if defined(__CUDACC__) + // nothing, intrinsics/asm code is not supported +#else + #if ((defined _MSC_VER && defined _M_X64) \ + || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \ + && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H) + #include + #endif + + #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \ + && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H) + #include + #undef vector + #undef bool + #undef pixel + #endif + + #if defined(CV_INLINE_ROUND_FLT) + // user-specified version + // CV_INLINE_ROUND_DBL should be defined too + #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ + // 1. general scheme + #define ARM_ROUND(_value, _asm_string) \ + int res; \ + float temp; \ + CV_UNUSED(temp); \ + __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \ + return res + // 2. version for double + #ifdef __clang__ + #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]") + #else + #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]") + #endif + // 3. version for float + #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") + #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 + // P8 and newer machines can convert fp32/64 to int quickly. + #define CV_INLINE_ROUND_DBL(value) \ + int out; \ + double temp; \ + __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \ + return out; + + // FP32 also works with FP64 routine above + #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value) + #endif + + #ifdef CV_INLINE_ISINF_FLT + // user-specified version + // CV_INLINE_ISINF_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30); + #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value) + #endif + + #ifdef CV_INLINE_ISNAN_FLT + // user-specified version + // CV_INLINE_ISNAN_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40); + #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) + #endif + + #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \ + && ( \ + defined(__x86_64__) || defined(__i686__) \ + || defined(__arm__) \ + || defined(__PPC64__) \ + ) + /* Let builtin C math functions when available. Dedicated hardware is available to + round and convert FP values. */ + #define OPENCV_USE_FASTMATH_BUILTINS 1 + #endif + + /* Enable builtin math functions if possible, desired, and available. + Note, not all math functions inline equally. E.g lrint will not inline + without the -fno-math-errno option. */ + #if defined(CV_ICC) + // nothing + #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS + #if defined(__clang__) + #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value); + #endif + #elif defined(__GNUC__) + #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value); + #endif + #elif defined(_MSC_VER) + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return isinf(value); + #endif + #endif + #endif + +#endif // defined(__CUDACC__) + +/** @brief Rounds floating-point number to the nearest integer + + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int +cvRound( double value ) +{ +#if defined CV_INLINE_ROUND_DBL + CV_INLINE_ROUND_DBL(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) + __m128d t = _mm_set_sd( value ); + return _mm_cvtsd_si32(t); +#elif defined _MSC_VER && defined _M_IX86 + int t; + __asm + { + fld value; + fistp t; + } + return t; +#elif defined CV_ICC || defined __GNUC__ + return (int)(lrint(value)); +#else + /* it's ok if round does not comply with IEEE754 standard; + the tests should allow +/-1 difference when the tested functions use round */ + return (int)(value + (value >= 0 ? 0.5 : -0.5)); +#endif +} + + +/** @brief Rounds floating-point number to the nearest integer not larger than the original. + + The function computes an integer i such that: + \f[i \le \texttt{value} < i+1\f] + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int cvFloor( double value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_floor(value); +#else + int i = (int)value; + return i - (i > value); +#endif +} + +/** @brief Rounds floating-point number to the nearest integer not smaller than the original. + + The function computes an integer i such that: + \f[i \le \texttt{value} < i+1\f] + @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the + result is not defined. + */ +CV_INLINE int cvCeil( double value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_ceil(value); +#else + int i = (int)value; + return i + (i < value); +#endif +} + +/** @brief Determines if the argument is Not A Number. + + @param value The input floating-point value + + The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0 + otherwise. */ +CV_INLINE int cvIsNaN( double value ) +{ +#if defined CV_INLINE_ISNAN_DBL + CV_INLINE_ISNAN_DBL(value); +#else + Cv64suf ieee754; + ieee754.f = value; + return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) + + ((unsigned)ieee754.u != 0) > 0x7ff00000; +#endif +} + +/** @brief Determines if the argument is Infinity. + + @param value The input floating-point value + + The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard) + and 0 otherwise. */ +CV_INLINE int cvIsInf( double value ) +{ +#if defined CV_INLINE_ISINF_DBL + CV_INLINE_ISINF_DBL(value); +#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__) + Cv64suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff00000000) == + 0x7ff0000000000000; +#else + Cv64suf ieee754; + ieee754.f = value; + return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 && + (unsigned)ieee754.u == 0; +#endif +} + +#ifdef __cplusplus + +/** @overload */ +CV_INLINE int cvRound(float value) +{ +#if defined CV_INLINE_ROUND_FLT + CV_INLINE_ROUND_FLT(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) + __m128 t = _mm_set_ss( value ); + return _mm_cvtss_si32(t); +#elif defined _MSC_VER && defined _M_IX86 + int t; + __asm + { + fld value; + fistp t; + } + return t; +#elif defined CV_ICC || defined __GNUC__ + return (int)(lrintf(value)); +#else + /* it's ok if round does not comply with IEEE754 standard; + the tests should allow +/-1 difference when the tested functions use round */ + return (int)(value + (value >= 0 ? 0.5f : -0.5f)); +#endif +} + +/** @overload */ +CV_INLINE int cvRound( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvFloor( float value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_floorf(value); +#else + int i = (int)value; + return i - (i > value); +#endif +} + +/** @overload */ +CV_INLINE int cvFloor( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvCeil( float value ) +{ +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) + return __builtin_ceilf(value); +#else + int i = (int)value; + return i + (i < value); +#endif +} + +/** @overload */ +CV_INLINE int cvCeil( int value ) +{ + return value; +} + +/** @overload */ +CV_INLINE int cvIsNaN( float value ) +{ +#if defined CV_INLINE_ISNAN_FLT + CV_INLINE_ISNAN_FLT(value); +#else + Cv32suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff) > 0x7f800000; +#endif +} + +/** @overload */ +CV_INLINE int cvIsInf( float value ) +{ +#if defined CV_INLINE_ISINF_FLT + CV_INLINE_ISINF_FLT(value); +#else + Cv32suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff) == 0x7f800000; +#endif +} + +#endif // __cplusplus + +//! @} core_utils + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/hal.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/hal.hpp new file mode 100644 index 0000000..0d68078 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/hal.hpp @@ -0,0 +1,256 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_HPP +#define OPENCV_HAL_HPP + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/hal/interface.h" + +namespace cv { namespace hal { + +//! @addtogroup core_hal_functions +//! @{ + +CV_EXPORTS int normHamming(const uchar* a, int n); +CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n); + +CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize); +CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize); + +CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags); +CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags); +CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors); +CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors); + +CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step, + float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step, + double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step, + float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); +CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step, + double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step, + int m_a, int n_a, int n_d, int flags); + +CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n); +CV_EXPORTS float normL1_(const float* a, const float* b, int n); +CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n); + +CV_EXPORTS void exp32f(const float* src, float* dst, int n); +CV_EXPORTS void exp64f(const double* src, double* dst, int n); +CV_EXPORTS void log32f(const float* src, float* dst, int n); +CV_EXPORTS void log64f(const double* src, double* dst, int n); + +CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees); +CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees); +CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n); +CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n); +CV_EXPORTS void sqrt32f(const float* src, float* dst, int len); +CV_EXPORTS void sqrt64f(const double* src, double* dst, int len); +CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len); +CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len); + +CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn ); +CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn ); +CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn ); +CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn ); + +CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn ); +CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn ); +CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn ); +CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn ); + +CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); +CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* ); + +CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); +CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop); + +CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); + +CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars ); +CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); +CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); + +CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len ); +CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len ); + +CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len ); +CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len ); + +struct CV_EXPORTS DFT1D +{ + static Ptr create(int len, int count, int depth, int flags, bool * useBuffer = 0); + virtual void apply(const uchar *src, uchar *dst) = 0; + virtual ~DFT1D() {} +}; + +struct CV_EXPORTS DFT2D +{ + static Ptr create(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows = 0); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DFT2D() {} +}; + +struct CV_EXPORTS DCT2D +{ + static Ptr create(int width, int height, int depth, int flags); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DCT2D() {} +}; + +//! @} core_hal + +//============================================================================= +// for binary compatibility with 3.0 + +//! @cond IGNORED + +CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n); +CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n); + +CV_EXPORTS void exp(const float* src, float* dst, int n); +CV_EXPORTS void exp(const double* src, double* dst, int n); +CV_EXPORTS void log(const float* src, float* dst, int n); +CV_EXPORTS void log(const double* src, double* dst, int n); + +CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees); +CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n); +CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n); +CV_EXPORTS void sqrt(const float* src, float* dst, int len); +CV_EXPORTS void sqrt(const double* src, double* dst, int len); +CV_EXPORTS void invSqrt(const float* src, float* dst, int len); +CV_EXPORTS void invSqrt(const double* src, double* dst, int len); + +//! @endcond + +}} //cv::hal + +#endif //OPENCV_HAL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/interface.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/interface.h new file mode 100644 index 0000000..6f0a83d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/interface.h @@ -0,0 +1,190 @@ +#ifndef OPENCV_CORE_HAL_INTERFACE_H +#define OPENCV_CORE_HAL_INTERFACE_H + +//! @addtogroup core_hal_interface +//! @{ + +//! @name Return codes +//! @{ +#define CV_HAL_ERROR_OK 0 +#define CV_HAL_ERROR_NOT_IMPLEMENTED 1 +#define CV_HAL_ERROR_UNKNOWN -1 +//! @} + +#ifdef __cplusplus +#include +#else +#include +#include +#endif + +//! @name Data types +//! primitive types +//! - schar - signed 1 byte integer +//! - uchar - unsigned 1 byte integer +//! - short - signed 2 byte integer +//! - ushort - unsigned 2 byte integer +//! - int - signed 4 byte integer +//! - uint - unsigned 4 byte integer +//! - int64 - signed 8 byte integer +//! - uint64 - unsigned 8 byte integer +//! @{ +#if !defined _MSC_VER && !defined __BORLANDC__ +# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__ +# include +# ifdef __NEWLIB__ + typedef unsigned int uint; +# else + typedef std::uint32_t uint; +# endif +# else +# include + typedef uint32_t uint; +# endif +#else + typedef unsigned uint; +#endif + +typedef signed char schar; + +#ifndef __IPL_H__ + typedef unsigned char uchar; + typedef unsigned short ushort; +#endif + +#if defined _MSC_VER || defined __BORLANDC__ + typedef __int64 int64; + typedef unsigned __int64 uint64; +# define CV_BIG_INT(n) n##I64 +# define CV_BIG_UINT(n) n##UI64 +#else + typedef int64_t int64; + typedef uint64_t uint64; +# define CV_BIG_INT(n) n##LL +# define CV_BIG_UINT(n) n##ULL +#endif + +#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0" + +#define CV_CN_MAX 512 +#define CV_CN_SHIFT 3 +#define CV_DEPTH_MAX (1 << CV_CN_SHIFT) + +#define CV_8U 0 +#define CV_8S 1 +#define CV_16U 2 +#define CV_16S 3 +#define CV_32S 4 +#define CV_32F 5 +#define CV_64F 6 +#define CV_16F 7 + +#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) +#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) + +#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) +#define CV_MAKE_TYPE CV_MAKETYPE + +#define CV_8UC1 CV_MAKETYPE(CV_8U,1) +#define CV_8UC2 CV_MAKETYPE(CV_8U,2) +#define CV_8UC3 CV_MAKETYPE(CV_8U,3) +#define CV_8UC4 CV_MAKETYPE(CV_8U,4) +#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) + +#define CV_8SC1 CV_MAKETYPE(CV_8S,1) +#define CV_8SC2 CV_MAKETYPE(CV_8S,2) +#define CV_8SC3 CV_MAKETYPE(CV_8S,3) +#define CV_8SC4 CV_MAKETYPE(CV_8S,4) +#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) + +#define CV_16UC1 CV_MAKETYPE(CV_16U,1) +#define CV_16UC2 CV_MAKETYPE(CV_16U,2) +#define CV_16UC3 CV_MAKETYPE(CV_16U,3) +#define CV_16UC4 CV_MAKETYPE(CV_16U,4) +#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) + +#define CV_16SC1 CV_MAKETYPE(CV_16S,1) +#define CV_16SC2 CV_MAKETYPE(CV_16S,2) +#define CV_16SC3 CV_MAKETYPE(CV_16S,3) +#define CV_16SC4 CV_MAKETYPE(CV_16S,4) +#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) + +#define CV_32SC1 CV_MAKETYPE(CV_32S,1) +#define CV_32SC2 CV_MAKETYPE(CV_32S,2) +#define CV_32SC3 CV_MAKETYPE(CV_32S,3) +#define CV_32SC4 CV_MAKETYPE(CV_32S,4) +#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) + +#define CV_32FC1 CV_MAKETYPE(CV_32F,1) +#define CV_32FC2 CV_MAKETYPE(CV_32F,2) +#define CV_32FC3 CV_MAKETYPE(CV_32F,3) +#define CV_32FC4 CV_MAKETYPE(CV_32F,4) +#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) + +#define CV_64FC1 CV_MAKETYPE(CV_64F,1) +#define CV_64FC2 CV_MAKETYPE(CV_64F,2) +#define CV_64FC3 CV_MAKETYPE(CV_64F,3) +#define CV_64FC4 CV_MAKETYPE(CV_64F,4) +#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) + +#define CV_16FC1 CV_MAKETYPE(CV_16F,1) +#define CV_16FC2 CV_MAKETYPE(CV_16F,2) +#define CV_16FC3 CV_MAKETYPE(CV_16F,3) +#define CV_16FC4 CV_MAKETYPE(CV_16F,4) +#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n)) +//! @} + +//! @name Comparison operation +//! @sa cv::CmpTypes +//! @{ +#define CV_HAL_CMP_EQ 0 +#define CV_HAL_CMP_GT 1 +#define CV_HAL_CMP_GE 2 +#define CV_HAL_CMP_LT 3 +#define CV_HAL_CMP_LE 4 +#define CV_HAL_CMP_NE 5 +//! @} + +//! @name Border processing modes +//! @sa cv::BorderTypes +//! @{ +#define CV_HAL_BORDER_CONSTANT 0 +#define CV_HAL_BORDER_REPLICATE 1 +#define CV_HAL_BORDER_REFLECT 2 +#define CV_HAL_BORDER_WRAP 3 +#define CV_HAL_BORDER_REFLECT_101 4 +#define CV_HAL_BORDER_TRANSPARENT 5 +#define CV_HAL_BORDER_ISOLATED 16 +//! @} + +//! @name DFT flags +//! @{ +#define CV_HAL_DFT_INVERSE 1 +#define CV_HAL_DFT_SCALE 2 +#define CV_HAL_DFT_ROWS 4 +#define CV_HAL_DFT_COMPLEX_OUTPUT 16 +#define CV_HAL_DFT_REAL_OUTPUT 32 +#define CV_HAL_DFT_TWO_STAGE 64 +#define CV_HAL_DFT_STAGE_COLS 128 +#define CV_HAL_DFT_IS_CONTINUOUS 512 +#define CV_HAL_DFT_IS_INPLACE 1024 +//! @} + +//! @name SVD flags +//! @{ +#define CV_HAL_SVD_NO_UV 1 +#define CV_HAL_SVD_SHORT_UV 2 +#define CV_HAL_SVD_MODIFY_A 4 +#define CV_HAL_SVD_FULL_UV 8 +//! @} + +//! @name Gemm flags +//! @{ +#define CV_HAL_GEMM_1_T 1 +#define CV_HAL_GEMM_2_T 2 +#define CV_HAL_GEMM_3_T 4 +//! @} + +//! @} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin.hpp new file mode 100644 index 0000000..ac331f2 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin.hpp @@ -0,0 +1,706 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_INTRIN_HPP +#define OPENCV_HAL_INTRIN_HPP + +#include +#include +#include +#include "opencv2/core/cvdef.h" + +#define OPENCV_HAL_ADD(a, b) ((a) + (b)) +#define OPENCV_HAL_AND(a, b) ((a) & (b)) +#define OPENCV_HAL_NOP(a) (a) +#define OPENCV_HAL_1ST(a, b) (a) + +namespace { +inline unsigned int trailingZeros32(unsigned int value) { +#if defined(_MSC_VER) +#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64) + unsigned long index = 0; + _BitScanForward(&index, value); + return (unsigned int)index; +#elif defined(__clang__) + // clang-cl doesn't export _tzcnt_u32 for non BMI systems + return value ? __builtin_ctz(value) : 32; +#else + return _tzcnt_u32(value); +#endif +#elif defined(__GNUC__) || defined(__GNUG__) + return __builtin_ctz(value); +#elif defined(__ICC) || defined(__INTEL_COMPILER) + return _bit_scan_forward(value); +#elif defined(__clang__) + return llvm.cttz.i32(value, true); +#else + static const int MultiplyDeBruijnBitPosition[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; + return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27]; +#endif +} +} + +// unlike HAL API, which is in cv::hal, +// we put intrinsics into cv namespace to make its +// access from within opencv code more accessible +namespace cv { + +namespace hal { + +enum StoreMode +{ + STORE_UNALIGNED = 0, + STORE_ALIGNED = 1, + STORE_ALIGNED_NOCACHE = 2 +}; + +} + +// TODO FIXIT: Don't use "God" traits. Split on separate cases. +template struct V_TypeTraits +{ +}; + +#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \ + template<> struct V_TypeTraits \ + { \ + typedef type value_type; \ + typedef int_type_ int_type; \ + typedef abs_type_ abs_type; \ + typedef uint_type_ uint_type; \ + typedef w_type_ w_type; \ + typedef q_type_ q_type; \ + typedef sum_type_ sum_type; \ + \ + static inline int_type reinterpret_int(type x) \ + { \ + union { type l; int_type i; } v; \ + v.l = x; \ + return v.i; \ + } \ + \ + static inline type reinterpret_from_int(int_type x) \ + { \ + union { type l; int_type i; } v; \ + v.i = x; \ + return v.l; \ + } \ + } + +#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \ + template<> struct V_TypeTraits \ + { \ + typedef type value_type; \ + typedef int_type_ int_type; \ + typedef abs_type_ abs_type; \ + typedef uint_type_ uint_type; \ + typedef w_type_ w_type; \ + typedef sum_type_ sum_type; \ + \ + static inline int_type reinterpret_int(type x) \ + { \ + union { type l; int_type i; } v; \ + v.l = x; \ + return v.i; \ + } \ + \ + static inline type reinterpret_from_int(int_type x) \ + { \ + union { type l; int_type i; } v; \ + v.i = x; \ + return v.l; \ + } \ + } + +CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned); +CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int); +CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned); +CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double); + +#ifndef CV_DOXYGEN + +#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE +#ifdef CV_FORCE_SIMD128_CPP + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#elif defined(CV_CPU_DISPATCH_MODE) + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#else + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline { + #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#endif +#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; +#endif +} + +#ifdef CV_DOXYGEN +# undef CV_AVX2 +# undef CV_SSE2 +# undef CV_NEON +# undef CV_VSX +# undef CV_FP16 +# undef CV_MSA +# undef CV_RVV +#endif + +#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP) +#define CV__SIMD_FORWARD 128 +#include "opencv2/core/hal/intrin_forward.hpp" +#endif + +#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_sse_em.hpp" +#include "opencv2/core/hal/intrin_sse.hpp" + +#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_neon.hpp" + +#elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP) +#define CV_SIMD128_CPP 0 +#include "opencv2/core/hal/intrin_rvv071.hpp" + +#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_vsx.hpp" + +#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP) + +#include "opencv2/core/hal/intrin_msa.hpp" + +#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP) +#include "opencv2/core/hal/intrin_wasm.hpp" + +#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP) +#include "opencv2/core/hal/intrin_rvv.hpp" + +#else + +#include "opencv2/core/hal/intrin_cpp.hpp" + +#endif + +// AVX2 can be used together with SSE2, so +// we define those two sets of intrinsics at once. +// Most of the intrinsics do not conflict (the proper overloaded variant is +// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2), +// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load(). +// Correspondingly, the wide intrinsics (which are mapped to the "widest" +// available instruction set) will get vx_ prefix +// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load()) +#if CV_AVX2 + +#define CV__SIMD_FORWARD 256 +#include "opencv2/core/hal/intrin_forward.hpp" +#include "opencv2/core/hal/intrin_avx.hpp" + +#endif + +// AVX512 can be used together with SSE2 and AVX2, so +// we define those sets of intrinsics at once. +// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load(). +// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load()) +#if CV_AVX512_SKX + +#define CV__SIMD_FORWARD 512 +#include "opencv2/core/hal/intrin_forward.hpp" +#include "opencv2/core/hal/intrin_avx512.hpp" + +#endif + +//! @cond IGNORED + +namespace cv { + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + +#ifndef CV_SIMD128 +#define CV_SIMD128 0 +#endif + +#ifndef CV_SIMD128_CPP +#define CV_SIMD128_CPP 0 +#endif + +#ifndef CV_SIMD128_64F +#define CV_SIMD128_64F 0 +#endif + +#ifndef CV_SIMD256 +#define CV_SIMD256 0 +#endif + +#ifndef CV_SIMD256_64F +#define CV_SIMD256_64F 0 +#endif + +#ifndef CV_SIMD512 +#define CV_SIMD512 0 +#endif + +#ifndef CV_SIMD512_64F +#define CV_SIMD512_64F 0 +#endif + +#ifndef CV_SIMD128_FP16 +#define CV_SIMD128_FP16 0 +#endif + +#ifndef CV_SIMD256_FP16 +#define CV_SIMD256_FP16 0 +#endif + +#ifndef CV_SIMD512_FP16 +#define CV_SIMD512_FP16 0 +#endif + +//================================================================================================== + +template struct V_RegTraits +{ +}; + +#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \ + template<> struct V_RegTraits<_reg> \ + { \ + typedef _reg reg; \ + typedef _u_reg u_reg; \ + typedef _w_reg w_reg; \ + typedef _q_reg q_reg; \ + typedef _int_reg int_reg; \ + typedef _round_reg round_reg; \ + } + +#if CV_SIMD128 || CV_SIMD128_CPP + CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void); + CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void); + CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void); + CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void); + CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void); + CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void); +#if CV_SIMD128_64F || CV_SIMD128_CPP + CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4); +#else + CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4); +#endif + CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void); + CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void); +#if CV_SIMD128_64F + CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4); +#endif +#endif + +#if CV_SIMD256 + CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void); + CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void); + CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void); + CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void); + CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void); + CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void); + CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8); + CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void); + CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void); + CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8); +#endif + +#if CV_SIMD512 + CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void); + CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void); + CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void); + CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void); + CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void); + CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void); + CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16); + CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void); + CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void); + CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16); +#endif +//! @endcond + +#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) +#define CV__SIMD_NAMESPACE simd512 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD512_64F + #define CV_SIMD_FP16 CV_SIMD512_FP16 + #define CV_SIMD_WIDTH 64 +//! @addtogroup core_hal_intrin +//! @{ + //! @brief Maximum available vector register capacity 8-bit unsigned integer values + typedef v_uint8x64 v_uint8; + //! @brief Maximum available vector register capacity 8-bit signed integer values + typedef v_int8x64 v_int8; + //! @brief Maximum available vector register capacity 16-bit unsigned integer values + typedef v_uint16x32 v_uint16; + //! @brief Maximum available vector register capacity 16-bit signed integer values + typedef v_int16x32 v_int16; + //! @brief Maximum available vector register capacity 32-bit unsigned integer values + typedef v_uint32x16 v_uint32; + //! @brief Maximum available vector register capacity 32-bit signed integer values + typedef v_int32x16 v_int32; + //! @brief Maximum available vector register capacity 64-bit unsigned integer values + typedef v_uint64x8 v_uint64; + //! @brief Maximum available vector register capacity 64-bit signed integer values + typedef v_int64x8 v_int64; + //! @brief Maximum available vector register capacity 32-bit floating point values (single precision) + typedef v_float32x16 v_float32; + #if CV_SIMD512_64F + //! @brief Maximum available vector register capacity 64-bit floating point values (double precision) + typedef v_float64x8 v_float64; + #endif +//! @} + + #define VXPREFIX(func) v512##func +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) +#define CV__SIMD_NAMESPACE simd256 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD256_64F + #define CV_SIMD_FP16 CV_SIMD256_FP16 + #define CV_SIMD_WIDTH 32 +//! @addtogroup core_hal_intrin +//! @{ + //! @brief Maximum available vector register capacity 8-bit unsigned integer values + typedef v_uint8x32 v_uint8; + //! @brief Maximum available vector register capacity 8-bit signed integer values + typedef v_int8x32 v_int8; + //! @brief Maximum available vector register capacity 16-bit unsigned integer values + typedef v_uint16x16 v_uint16; + //! @brief Maximum available vector register capacity 16-bit signed integer values + typedef v_int16x16 v_int16; + //! @brief Maximum available vector register capacity 32-bit unsigned integer values + typedef v_uint32x8 v_uint32; + //! @brief Maximum available vector register capacity 32-bit signed integer values + typedef v_int32x8 v_int32; + //! @brief Maximum available vector register capacity 64-bit unsigned integer values + typedef v_uint64x4 v_uint64; + //! @brief Maximum available vector register capacity 64-bit signed integer values + typedef v_int64x4 v_int64; + //! @brief Maximum available vector register capacity 32-bit floating point values (single precision) + typedef v_float32x8 v_float32; + #if CV_SIMD256_64F + //! @brief Maximum available vector register capacity 64-bit floating point values (double precision) + typedef v_float64x4 v_float64; + #endif +//! @} + + #define VXPREFIX(func) v256##func +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) +#if defined CV_SIMD128_CPP +#define CV__SIMD_NAMESPACE simd128_cpp +#else +#define CV__SIMD_NAMESPACE simd128 +#endif +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD CV_SIMD128 + #define CV_SIMD_64F CV_SIMD128_64F + #define CV_SIMD_WIDTH 16 +//! @addtogroup core_hal_intrin +//! @{ + //! @brief Maximum available vector register capacity 8-bit unsigned integer values + typedef v_uint8x16 v_uint8; + //! @brief Maximum available vector register capacity 8-bit signed integer values + typedef v_int8x16 v_int8; + //! @brief Maximum available vector register capacity 16-bit unsigned integer values + typedef v_uint16x8 v_uint16; + //! @brief Maximum available vector register capacity 16-bit signed integer values + typedef v_int16x8 v_int16; + //! @brief Maximum available vector register capacity 32-bit unsigned integer values + typedef v_uint32x4 v_uint32; + //! @brief Maximum available vector register capacity 32-bit signed integer values + typedef v_int32x4 v_int32; + //! @brief Maximum available vector register capacity 64-bit unsigned integer values + typedef v_uint64x2 v_uint64; + //! @brief Maximum available vector register capacity 64-bit signed integer values + typedef v_int64x2 v_int64; + //! @brief Maximum available vector register capacity 32-bit floating point values (single precision) + typedef v_float32x4 v_float32; + #if CV_SIMD128_64F + //! @brief Maximum available vector register capacity 64-bit floating point values (double precision) + typedef v_float64x2 v_float64; + #endif +//! @} + + #define VXPREFIX(func) v##func +} // namespace +using namespace CV__SIMD_NAMESPACE; +#endif + +namespace CV__SIMD_NAMESPACE { +//! @addtogroup core_hal_intrin +//! @{ + //! @name Wide init with value + //! @{ + //! @brief Create maximum available capacity vector with elements set to a specific value + inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); } + inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); } + inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); } + inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); } + inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); } + inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); } + inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); } + inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); } + inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); } +#if CV_SIMD_64F + inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); } +#endif + //! @} + + //! @name Wide init with zero + //! @{ + //! @brief Create maximum available capacity vector with elements set to zero + inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); } + inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); } + inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); } + inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); } + inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); } + inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); } + inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); } + inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); } + inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); } +#if CV_SIMD_64F + inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); } +#endif + //! @} + + //! @name Wide load from memory + //! @{ + //! @brief Load maximum available capacity register contents from memory + inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); } + inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); } + inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); } + inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); } + inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); } +#if CV_SIMD_64F + inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); } +#endif + //! @} + + //! @name Wide load from memory(aligned) + //! @{ + //! @brief Load maximum available capacity register contents from memory(aligned) + inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); } +#if CV_SIMD_64F + inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); } +#endif + //! @} + + //! @name Wide load lower half from memory + //! @{ + //! @brief Load lower half of maximum available capacity register from memory + inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); } +#if CV_SIMD_64F + inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); } +#endif + //! @} + + //! @name Wide load halfs from memory + //! @{ + //! @brief Load maximum available capacity register contents from two memory blocks + inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } +#if CV_SIMD_64F + inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } +#endif + //! @} + + //! @name Wide LUT of elements + //! @{ + //! @brief Load maximum available capacity register contents with array elements by provided indexes + inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } +#if CV_SIMD_64F + inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } +#endif + //! @} + + //! @name Wide LUT of element pairs + //! @{ + //! @brief Load maximum available capacity register contents with array element pairs by provided indexes + inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } +#if CV_SIMD_64F + inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } +#endif + //! @} + + //! @name Wide LUT of element quads + //! @{ + //! @brief Load maximum available capacity register contents with array element quads by provided indexes + inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + //! @} + + //! @name Wide load with double expansion + //! @{ + //! @brief Load maximum available capacity register contents from memory with double expand + inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); } + //! @} + + //! @name Wide load with quad expansion + //! @{ + //! @brief Load maximum available capacity register contents from memory with quad expand + inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } + inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } + //! @} + + /** @brief SIMD processing state cleanup call */ + inline void vx_cleanup() { VXPREFIX(_cleanup)(); } + + +//! @cond IGNORED + + // backward compatibility + template static inline + void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); } + // backward compatibility + template static inline + void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); } + +//! @endcond + + +//! @} + #undef VXPREFIX +} // namespace + +//! @cond IGNORED +#ifndef CV_SIMD_64F +#define CV_SIMD_64F 0 +#endif + +#ifndef CV_SIMD_FP16 +#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types +#endif + +#ifndef CV_SIMD +#define CV_SIMD 0 +#endif + +#include "simd_utils.impl.hpp" + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif + +} // cv:: + +//! @endcond + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_avx.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_avx.hpp new file mode 100644 index 0000000..979b616 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_avx.hpp @@ -0,0 +1,3177 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_AVX_HPP +#define OPENCV_HAL_INTRIN_AVX_HPP + +#define CV_SIMD256 1 +#define CV_SIMD256_64F 1 +#define CV_SIMD256_FP16 0 // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1) + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Utils //////////// + +inline __m256i _v256_combine(const __m128i& lo, const __m128i& hi) +{ return _mm256_inserti128_si256(_mm256_castsi128_si256(lo), hi, 1); } + +inline __m256 _v256_combine(const __m128& lo, const __m128& hi) +{ return _mm256_insertf128_ps(_mm256_castps128_ps256(lo), hi, 1); } + +inline __m256d _v256_combine(const __m128d& lo, const __m128d& hi) +{ return _mm256_insertf128_pd(_mm256_castpd128_pd256(lo), hi, 1); } + +inline int _v_cvtsi256_si32(const __m256i& a) +{ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); } + +inline __m256i _v256_shuffle_odd_64(const __m256i& v) +{ return _mm256_permute4x64_epi64(v, _MM_SHUFFLE(3, 1, 2, 0)); } + +inline __m256d _v256_shuffle_odd_64(const __m256d& v) +{ return _mm256_permute4x64_pd(v, _MM_SHUFFLE(3, 1, 2, 0)); } + +template +inline __m256i _v256_permute2x128(const __m256i& a, const __m256i& b) +{ return _mm256_permute2x128_si256(a, b, imm); } + +template +inline __m256 _v256_permute2x128(const __m256& a, const __m256& b) +{ return _mm256_permute2f128_ps(a, b, imm); } + +template +inline __m256d _v256_permute2x128(const __m256d& a, const __m256d& b) +{ return _mm256_permute2f128_pd(a, b, imm); } + +template +inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_v256_permute2x128(a.val, b.val)); } + +template +inline __m256i _v256_permute4x64(const __m256i& a) +{ return _mm256_permute4x64_epi64(a, imm); } + +template +inline __m256d _v256_permute4x64(const __m256d& a) +{ return _mm256_permute4x64_pd(a, imm); } + +template +inline _Tpvec v256_permute4x64(const _Tpvec& a) +{ return _Tpvec(_v256_permute4x64(a.val)); } + +inline __m128i _v256_extract_high(const __m256i& v) +{ return _mm256_extracti128_si256(v, 1); } + +inline __m128 _v256_extract_high(const __m256& v) +{ return _mm256_extractf128_ps(v, 1); } + +inline __m128d _v256_extract_high(const __m256d& v) +{ return _mm256_extractf128_pd(v, 1); } + +inline __m128i _v256_extract_low(const __m256i& v) +{ return _mm256_castsi256_si128(v); } + +inline __m128 _v256_extract_low(const __m256& v) +{ return _mm256_castps256_ps128(v); } + +inline __m128d _v256_extract_low(const __m256d& v) +{ return _mm256_castpd256_pd128(v); } + +inline __m256i _v256_packs_epu32(const __m256i& a, const __m256i& b) +{ + const __m256i m = _mm256_set1_epi32(65535); + __m256i am = _mm256_min_epu32(a, m); + __m256i bm = _mm256_min_epu32(b, m); + return _mm256_packus_epi32(am, bm); +} + +template +inline int _v256_extract_epi8(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi8(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 4)); + return _mm_extract_epi8(b, i & 15); // SSE4.1 +#endif +} + +template +inline int _v256_extract_epi16(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi16(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 3)); + return _mm_extract_epi16(b, i & 7); // SSE2 +#endif +} + +template +inline int _v256_extract_epi32(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi32(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 2)); + return _mm_extract_epi32(b, i & 3); // SSE4.1 +#endif +} + +template +inline int64 _v256_extract_epi64(const __m256i& a) +{ +#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/)) + return _mm256_extract_epi64(a, i); +#else + __m128i b = _mm256_extractf128_si256(a, ((i) >> 1)); + return _mm_extract_epi64(b, i & 1); // SSE4.1 +#endif +} + +///////// Types //////////// + +struct v_uint8x32 +{ + typedef uchar lane_type; + enum { nlanes = 32 }; + __m256i val; + + explicit v_uint8x32(__m256i v) : val(v) {} + v_uint8x32(uchar v0, uchar v1, uchar v2, uchar v3, + uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, + uchar v12, uchar v13, uchar v14, uchar v15, + uchar v16, uchar v17, uchar v18, uchar v19, + uchar v20, uchar v21, uchar v22, uchar v23, + uchar v24, uchar v25, uchar v26, uchar v27, + uchar v28, uchar v29, uchar v30, uchar v31) + { + val = _mm256_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3, + (char)v4, (char)v5, (char)v6 , (char)v7, (char)v8, (char)v9, + (char)v10, (char)v11, (char)v12, (char)v13, (char)v14, (char)v15, + (char)v16, (char)v17, (char)v18, (char)v19, (char)v20, (char)v21, + (char)v22, (char)v23, (char)v24, (char)v25, (char)v26, (char)v27, + (char)v28, (char)v29, (char)v30, (char)v31); + } + /* coverity[uninit_ctor]: suppress warning */ + v_uint8x32() {} + + uchar get0() const { return (uchar)_v_cvtsi256_si32(val); } +}; + +struct v_int8x32 +{ + typedef schar lane_type; + enum { nlanes = 32 }; + __m256i val; + + explicit v_int8x32(__m256i v) : val(v) {} + v_int8x32(schar v0, schar v1, schar v2, schar v3, + schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, + schar v12, schar v13, schar v14, schar v15, + schar v16, schar v17, schar v18, schar v19, + schar v20, schar v21, schar v22, schar v23, + schar v24, schar v25, schar v26, schar v27, + schar v28, schar v29, schar v30, schar v31) + { + val = _mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, + v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31); + } + /* coverity[uninit_ctor]: suppress warning */ + v_int8x32() {} + + schar get0() const { return (schar)_v_cvtsi256_si32(val); } +}; + +struct v_uint16x16 +{ + typedef ushort lane_type; + enum { nlanes = 16 }; + __m256i val; + + explicit v_uint16x16(__m256i v) : val(v) {} + v_uint16x16(ushort v0, ushort v1, ushort v2, ushort v3, + ushort v4, ushort v5, ushort v6, ushort v7, + ushort v8, ushort v9, ushort v10, ushort v11, + ushort v12, ushort v13, ushort v14, ushort v15) + { + val = _mm256_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3, + (short)v4, (short)v5, (short)v6, (short)v7, (short)v8, (short)v9, + (short)v10, (short)v11, (short)v12, (short)v13, (short)v14, (short)v15); + } + /* coverity[uninit_ctor]: suppress warning */ + v_uint16x16() {} + + ushort get0() const { return (ushort)_v_cvtsi256_si32(val); } +}; + +struct v_int16x16 +{ + typedef short lane_type; + enum { nlanes = 16 }; + __m256i val; + + explicit v_int16x16(__m256i v) : val(v) {} + v_int16x16(short v0, short v1, short v2, short v3, + short v4, short v5, short v6, short v7, + short v8, short v9, short v10, short v11, + short v12, short v13, short v14, short v15) + { + val = _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15); + } + /* coverity[uninit_ctor]: suppress warning */ + v_int16x16() {} + + short get0() const { return (short)_v_cvtsi256_si32(val); } +}; + +struct v_uint32x8 +{ + typedef unsigned lane_type; + enum { nlanes = 8 }; + __m256i val; + + explicit v_uint32x8(__m256i v) : val(v) {} + v_uint32x8(unsigned v0, unsigned v1, unsigned v2, unsigned v3, + unsigned v4, unsigned v5, unsigned v6, unsigned v7) + { + val = _mm256_setr_epi32((unsigned)v0, (unsigned)v1, (unsigned)v2, + (unsigned)v3, (unsigned)v4, (unsigned)v5, (unsigned)v6, (unsigned)v7); + } + /* coverity[uninit_ctor]: suppress warning */ + v_uint32x8() {} + + unsigned get0() const { return (unsigned)_v_cvtsi256_si32(val); } +}; + +struct v_int32x8 +{ + typedef int lane_type; + enum { nlanes = 8 }; + __m256i val; + + explicit v_int32x8(__m256i v) : val(v) {} + v_int32x8(int v0, int v1, int v2, int v3, + int v4, int v5, int v6, int v7) + { + val = _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7); + } + /* coverity[uninit_ctor]: suppress warning */ + v_int32x8() {} + + int get0() const { return _v_cvtsi256_si32(val); } +}; + +struct v_float32x8 +{ + typedef float lane_type; + enum { nlanes = 8 }; + __m256 val; + + explicit v_float32x8(__m256 v) : val(v) {} + v_float32x8(float v0, float v1, float v2, float v3, + float v4, float v5, float v6, float v7) + { + val = _mm256_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7); + } + /* coverity[uninit_ctor]: suppress warning */ + v_float32x8() {} + + float get0() const { return _mm_cvtss_f32(_mm256_castps256_ps128(val)); } +}; + +struct v_uint64x4 +{ + typedef uint64 lane_type; + enum { nlanes = 4 }; + __m256i val; + + explicit v_uint64x4(__m256i v) : val(v) {} + v_uint64x4(uint64 v0, uint64 v1, uint64 v2, uint64 v3) + { val = _mm256_setr_epi64x((int64)v0, (int64)v1, (int64)v2, (int64)v3); } + /* coverity[uninit_ctor]: suppress warning */ + v_uint64x4() {} + + uint64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (uint64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #endif + } +}; + +struct v_int64x4 +{ + typedef int64 lane_type; + enum { nlanes = 4 }; + __m256i val; + + explicit v_int64x4(__m256i v) : val(v) {} + v_int64x4(int64 v0, int64 v1, int64 v2, int64 v3) + { val = _mm256_setr_epi64x(v0, v1, v2, v3); } + /* coverity[uninit_ctor]: suppress warning */ + v_int64x4() {} + + int64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (int64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #endif + } +}; + +struct v_float64x4 +{ + typedef double lane_type; + enum { nlanes = 4 }; + __m256d val; + + explicit v_float64x4(__m256d v) : val(v) {} + v_float64x4(double v0, double v1, double v2, double v3) + { val = _mm256_setr_pd(v0, v1, v2, v3); } + /* coverity[uninit_ctor]: suppress warning */ + v_float64x4() {} + + double get0() const { return _mm_cvtsd_f64(_mm256_castpd256_pd128(val)); } +}; + +//////////////// Load and store operations /////////////// + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE(_Tpvec, _Tp) \ + inline _Tpvec v256_load(const _Tp* ptr) \ + { return _Tpvec(_mm256_loadu_si256((const __m256i*)ptr)); } \ + inline _Tpvec v256_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm256_load_si256((const __m256i*)ptr)); } \ + inline _Tpvec v256_load_low(const _Tp* ptr) \ + { \ + __m128i v128 = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpvec(_mm256_castsi128_si256(v128)); \ + } \ + inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + __m128i vlo = _mm_loadu_si128((const __m128i*)ptr0); \ + __m128i vhi = _mm_loadu_si128((const __m128i*)ptr1); \ + return _Tpvec(_v256_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm256_store_si256((__m256i*)ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_si256((__m256i*)ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_si256((__m256i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_si256((__m256i*)ptr, a.val); \ + else \ + _mm256_store_si256((__m256i*)ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_si128((__m128i*)ptr, _v256_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint8x32, uchar) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int8x32, schar) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint16x16, ushort) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int16x16, short) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint32x8, unsigned) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int32x8, int) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint64x4, uint64) +OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int64x4, int64) + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg) \ + inline _Tpvec v256_load(const _Tp* ptr) \ + { return _Tpvec(_mm256_loadu_##suffix(ptr)); } \ + inline _Tpvec v256_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm256_load_##suffix(ptr)); } \ + inline _Tpvec v256_load_low(const _Tp* ptr) \ + { \ + return _Tpvec(_mm256_cast##suffix##128_##suffix##256 \ + (_mm_loadu_##suffix(ptr))); \ + } \ + inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + halfreg vlo = _mm_loadu_##suffix(ptr0); \ + halfreg vhi = _mm_loadu_##suffix(ptr1); \ + return _Tpvec(_v256_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm256_store_##suffix(ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm256_stream_##suffix(ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm256_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm256_stream_##suffix(ptr, a.val); \ + else \ + _mm256_store_##suffix(ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_##suffix(ptr, _v256_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm_storeu_##suffix(ptr, _v256_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float32x8, float, ps, __m128) +OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float64x4, double, pd, __m128d) + +#define OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, _Tpvecf, suffix, cast) \ + inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a) \ + { return _Tpvec(cast(a.val)); } + +#define OPENCV_HAL_IMPL_AVX_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s) \ + inline _Tpvec v256_setzero_##suffix() \ + { return _Tpvec(_mm256_setzero_si256()); } \ + inline _Tpvec v256_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm256_set1_##ssuffix((ctype_s)v)); } \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float32x8, suffix, _mm256_castps_si256) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float64x4, suffix, _mm256_castpd_si256) + +OPENCV_HAL_IMPL_AVX_INIT(v_uint8x32, uchar, u8, epi8, char) +OPENCV_HAL_IMPL_AVX_INIT(v_int8x32, schar, s8, epi8, char) +OPENCV_HAL_IMPL_AVX_INIT(v_uint16x16, ushort, u16, epi16, short) +OPENCV_HAL_IMPL_AVX_INIT(v_int16x16, short, s16, epi16, short) +OPENCV_HAL_IMPL_AVX_INIT(v_uint32x8, unsigned, u32, epi32, int) +OPENCV_HAL_IMPL_AVX_INIT(v_int32x8, int, s32, epi32, int) +OPENCV_HAL_IMPL_AVX_INIT(v_uint64x4, uint64, u64, epi64x, int64) +OPENCV_HAL_IMPL_AVX_INIT(v_int64x4, int64, s64, epi64x, int64) + +#define OPENCV_HAL_IMPL_AVX_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \ + inline _Tpvec v256_setzero_##suffix() \ + { return _Tpvec(_mm256_setzero_##zsuffix()); } \ + inline _Tpvec v256_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm256_set1_##zsuffix(v)); } \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4, suffix, cast) \ + OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4, suffix, cast) + +OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float32x8, float, f32, ps, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float64x4, double, f64, pd, _mm256_castsi256_pd) + +inline v_float32x8 v_reinterpret_as_f32(const v_float32x8& a) +{ return a; } +inline v_float32x8 v_reinterpret_as_f32(const v_float64x4& a) +{ return v_float32x8(_mm256_castpd_ps(a.val)); } + +inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a) +{ return a; } +inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a) +{ return v_float64x4(_mm256_castps_pd(a.val)); } + +/* Recombine */ +/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(perm(a.val, b.val, 0x20)); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(perm(a.val, b.val, 0x31)); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { c = v_combine_low(a, b); d = v_combine_high(a, b); } + +#define OPENCV_HAL_IMPL_AVX_UNPACKS(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, _mm256_permute2x128_si256) \ + inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, \ + _Tpvec& b0, _Tpvec& b1) \ + { \ + __m256i v0 = _v256_shuffle_odd_64(a0.val); \ + __m256i v1 = _v256_shuffle_odd_64(a1.val); \ + b0.val = _mm256_unpacklo_##suffix(v0, v1); \ + b1.val = _mm256_unpackhi_##suffix(v0, v1); \ + } + +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACKS(v_int64x4, epi64) +OPENCV_HAL_IMPL_AVX_COMBINE(v_float32x8, _mm256_permute2f128_ps) +OPENCV_HAL_IMPL_AVX_COMBINE(v_float64x4, _mm256_permute2f128_pd) + +inline void v_zip(const v_float32x8& a0, const v_float32x8& a1, v_float32x8& b0, v_float32x8& b1) +{ + __m256 v0 = _mm256_unpacklo_ps(a0.val, a1.val); + __m256 v1 = _mm256_unpackhi_ps(a0.val, a1.val); + v_recombine(v_float32x8(v0), v_float32x8(v1), b0, b1); +} + +inline void v_zip(const v_float64x4& a0, const v_float64x4& a1, v_float64x4& b0, v_float64x4& b1) +{ + __m256d v0 = _v_shuffle_odd_64(a0.val); + __m256d v1 = _v_shuffle_odd_64(a1.val); + b0.val = _mm256_unpacklo_pd(v0, v1); + b1.val = _mm256_unpackhi_pd(v0, v1); +}*/ + +//////////////// Variant Value reordering /////////////// + +// unpacks +#define OPENCV_HAL_IMPL_AVX_UNPACK(_Tpvec, suffix) \ + inline _Tpvec v256_unpacklo(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_unpacklo_##suffix(a.val, b.val)); } \ + inline _Tpvec v256_unpackhi(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_unpackhi_##suffix(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_UNPACK(v_uint64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACK(v_int64x4, epi64) +OPENCV_HAL_IMPL_AVX_UNPACK(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_UNPACK(v_float64x4, pd) + +// blend +#define OPENCV_HAL_IMPL_AVX_BLEND(_Tpvec, suffix) \ + template \ + inline _Tpvec v256_blend(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_blend_##suffix(a.val, b.val, m)); } + +OPENCV_HAL_IMPL_AVX_BLEND(v_uint16x16, epi16) +OPENCV_HAL_IMPL_AVX_BLEND(v_int16x16, epi16) +OPENCV_HAL_IMPL_AVX_BLEND(v_uint32x8, epi32) +OPENCV_HAL_IMPL_AVX_BLEND(v_int32x8, epi32) +OPENCV_HAL_IMPL_AVX_BLEND(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_BLEND(v_float64x4, pd) + +template +inline v_uint64x4 v256_blend(const v_uint64x4& a, const v_uint64x4& b) +{ + enum {M0 = m}; + enum {M1 = (M0 | (M0 << 2)) & 0x33}; + enum {M2 = (M1 | (M1 << 1)) & 0x55}; + enum {MM = M2 | (M2 << 1)}; + return v_uint64x4(_mm256_blend_epi32(a.val, b.val, MM)); +} +template +inline v_int64x4 v256_blend(const v_int64x4& a, const v_int64x4& b) +{ return v_int64x4(v256_blend(v_uint64x4(a.val), v_uint64x4(b.val)).val); } + +// shuffle +// todo: emulate 64bit +#define OPENCV_HAL_IMPL_AVX_SHUFFLE(_Tpvec, intrin) \ + template \ + inline _Tpvec v256_shuffle(const _Tpvec& a) \ + { return _Tpvec(_mm256_##intrin(a.val, m)); } + +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_uint32x8, shuffle_epi32) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_int32x8, shuffle_epi32) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float32x8, permute_ps) +OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float64x4, permute_pd) + +template +inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1) +{ + ab0 = v256_unpacklo(a, b); + ab1 = v256_unpackhi(a, b); +} + +template +inline _Tpvec v256_combine_diagonal(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0xf0)); } + +inline v_float32x8 v256_combine_diagonal(const v_float32x8& a, const v_float32x8& b) +{ return v256_blend<0xf0>(a, b); } + +inline v_float64x4 v256_combine_diagonal(const v_float64x4& a, const v_float64x4& b) +{ return v256_blend<0xc>(a, b); } + +template +inline _Tpvec v256_alignr_128(const _Tpvec& a, const _Tpvec& b) +{ return v256_permute2x128<0x21>(a, b); } + +template +inline _Tpvec v256_alignr_64(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(_mm256_alignr_epi8(a.val, b.val, 8)); } +inline v_float64x4 v256_alignr_64(const v_float64x4& a, const v_float64x4& b) +{ return v_float64x4(_mm256_shuffle_pd(b.val, a.val, _MM_SHUFFLE(0, 0, 1, 1))); } +// todo: emulate float32 + +template +inline _Tpvec v256_swap_halves(const _Tpvec& a) +{ return v256_permute2x128<1>(a, a); } + +template +inline _Tpvec v256_reverse_64(const _Tpvec& a) +{ return v256_permute4x64<_MM_SHUFFLE(0, 1, 2, 3)>(a); } + +// ZIP +#define OPENCV_HAL_IMPL_AVX_ZIP(_Tpvec) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return v256_permute2x128<0x20>(a, b); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return v256_permute2x128<0x31>(a, b); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { \ + _Tpvec a1b0 = v256_alignr_128(a, b); \ + c = v256_combine_diagonal(a, a1b0); \ + d = v256_combine_diagonal(a1b0, b); \ + } \ + inline void v_zip(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& ab0, _Tpvec& ab1) \ + { \ + _Tpvec ab0ab2, ab1ab3; \ + v256_zip(a, b, ab0ab2, ab1ab3); \ + v_recombine(ab0ab2, ab1ab3, ab0, ab1); \ + } + +OPENCV_HAL_IMPL_AVX_ZIP(v_uint8x32) +OPENCV_HAL_IMPL_AVX_ZIP(v_int8x32) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint16x16) +OPENCV_HAL_IMPL_AVX_ZIP(v_int16x16) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_int32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_uint64x4) +OPENCV_HAL_IMPL_AVX_ZIP(v_int64x4) +OPENCV_HAL_IMPL_AVX_ZIP(v_float32x8) +OPENCV_HAL_IMPL_AVX_ZIP(v_float64x4) + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ + +/** Arithmetics **/ +#define OPENCV_HAL_IMPL_AVX_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint8x32, _mm256_adds_epu8) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint8x32, _mm256_subs_epu8) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int8x32, _mm256_adds_epi8) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int8x32, _mm256_subs_epi8) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint16x16, _mm256_adds_epu16) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint16x16, _mm256_subs_epu16) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int16x16, _mm256_adds_epi16) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int16x16, _mm256_subs_epi16) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint32x8, _mm256_add_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint32x8, _mm256_sub_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_uint32x8, _mm256_mullo_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int32x8, _mm256_add_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int32x8, _mm256_sub_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_int32x8, _mm256_mullo_epi32) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint64x4, _mm256_add_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint64x4, _mm256_sub_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int64x4, _mm256_add_epi64) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int64x4, _mm256_sub_epi64) + +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float32x8, _mm256_add_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float32x8, _mm256_sub_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float32x8, _mm256_mul_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float32x8, _mm256_div_ps) +OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float64x4, _mm256_add_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float64x4, _mm256_sub_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float64x4, _mm256_mul_pd) +OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float64x4, _mm256_div_pd) + +// saturating multiply 8-bit, 16-bit +inline v_uint8x32 operator * (const v_uint8x32& a, const v_uint8x32& b) +{ + v_uint16x16 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_int8x32 operator * (const v_int8x32& a, const v_int8x32& b) +{ + v_int16x16 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_uint16x16 operator * (const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i pl = _mm256_mullo_epi16(a.val, b.val); + __m256i ph = _mm256_mulhi_epu16(a.val, b.val); + __m256i p0 = _mm256_unpacklo_epi16(pl, ph); + __m256i p1 = _mm256_unpackhi_epi16(pl, ph); + return v_uint16x16(_v256_packs_epu32(p0, p1)); +} +inline v_int16x16 operator * (const v_int16x16& a, const v_int16x16& b) +{ + __m256i pl = _mm256_mullo_epi16(a.val, b.val); + __m256i ph = _mm256_mulhi_epi16(a.val, b.val); + __m256i p0 = _mm256_unpacklo_epi16(pl, ph); + __m256i p1 = _mm256_unpackhi_epi16(pl, ph); + return v_int16x16(_mm256_packs_epi32(p0, p1)); +} +inline v_uint8x32& operator *= (v_uint8x32& a, const v_uint8x32& b) +{ a = a * b; return a; } +inline v_int8x32& operator *= (v_int8x32& a, const v_int8x32& b) +{ a = a * b; return a; } +inline v_uint16x16& operator *= (v_uint16x16& a, const v_uint16x16& b) +{ a = a * b; return a; } +inline v_int16x16& operator *= (v_int16x16& a, const v_int16x16& b) +{ a = a * b; return a; } + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \ + inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint8x32, _mm256_add_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int8x32, _mm256_add_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint16x16, _mm256_add_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int16x16, _mm256_add_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint8x32, _mm256_sub_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int8x32, _mm256_sub_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint16x16, _mm256_sub_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int16x16, _mm256_sub_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_uint16x16, _mm256_mullo_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_int16x16, _mm256_mullo_epi16) + +inline v_uint8x32 v_mul_wrap(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i ad = _mm256_srai_epi16(a.val, 8); + __m256i bd = _mm256_srai_epi16(b.val, 8); + __m256i p0 = _mm256_mullo_epi16(a.val, b.val); // even + __m256i p1 = _mm256_slli_epi16(_mm256_mullo_epi16(ad, bd), 8); // odd + + const __m256i b01 = _mm256_set1_epi32(0xFF00FF00); + return v_uint8x32(_mm256_blendv_epi8(p0, p1, b01)); +} +inline v_int8x32 v_mul_wrap(const v_int8x32& a, const v_int8x32& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +// Multiply and expand +inline void v_mul_expand(const v_uint8x32& a, const v_uint8x32& b, + v_uint16x16& c, v_uint16x16& d) +{ + v_uint16x16 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x32& a, const v_int8x32& b, + v_int16x16& c, v_int16x16& d) +{ + v_int16x16 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x16& a, const v_int16x16& b, + v_int32x8& c, v_int32x8& d) +{ + v_int16x16 vhi = v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); + + v_int16x16 v0, v1; + v_zip(v_mul_wrap(a, b), vhi, v0, v1); + + c = v_reinterpret_as_s32(v0); + d = v_reinterpret_as_s32(v1); +} + +inline void v_mul_expand(const v_uint16x16& a, const v_uint16x16& b, + v_uint32x8& c, v_uint32x8& d) +{ + v_uint16x16 vhi = v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); + + v_uint16x16 v0, v1; + v_zip(v_mul_wrap(a, b), vhi, v0, v1); + + c = v_reinterpret_as_u32(v0); + d = v_reinterpret_as_u32(v1); +} + +inline void v_mul_expand(const v_uint32x8& a, const v_uint32x8& b, + v_uint64x4& c, v_uint64x4& d) +{ + __m256i v0 = _mm256_mul_epu32(a.val, b.val); + __m256i v1 = _mm256_mul_epu32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32)); + v_zip(v_uint64x4(v0), v_uint64x4(v1), c, d); +} + +inline v_int16x16 v_mul_hi(const v_int16x16& a, const v_int16x16& b) { return v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); } +inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); } + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_AVX_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \ + inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \ + inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ + { return _Tpsvec(srai(a.val, imm)); } \ + template \ + inline _Tpuvec v_shl(const _Tpuvec& a) \ + { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shl(const _Tpsvec& a) \ + { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shr(const _Tpuvec& a) \ + { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shr(const _Tpsvec& a) \ + { return _Tpsvec(srai(a.val, imm)); } + +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint16x16, v_int16x16, epi16, _mm256_srai_epi16) +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint32x8, v_int32x8, epi32, _mm256_srai_epi32) + +inline __m256i _mm256_srai_epi64xx(const __m256i a, int imm) +{ + __m256i d = _mm256_set1_epi64x((int64)1 << 63); + __m256i r = _mm256_srli_epi64(_mm256_add_epi64(a, d), imm); + return _mm256_sub_epi64(r, _mm256_srli_epi64(d, imm)); +} +OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint64x4, v_int64x4, epi64, _mm256_srai_epi64xx) + + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_AVX_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(&, _Tpvec, _mm256_and_##suffix) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(|, _Tpvec, _mm256_or_##suffix) \ + OPENCV_HAL_IMPL_AVX_BIN_OP(^, _Tpvec, _mm256_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { return _Tpvec(_mm256_xor_##suffix(a.val, not_const)); } + +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint8x32, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int8x32, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint16x16, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int16x16, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint32x8, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int32x8, si256, _mm256_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint64x4, si256, _mm256_set1_epi64x(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int64x4, si256, _mm256_set1_epi64x(-1)) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float32x8, ps, _mm256_castsi256_ps(_mm256_set1_epi32(-1))) +OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float64x4, pd, _mm256_castsi256_pd(_mm256_set1_epi32(-1))) + +/** Select **/ +#define OPENCV_HAL_IMPL_AVX_SELECT(_Tpvec, suffix) \ + inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_blendv_##suffix(b.val, a.val, mask.val)); } + +OPENCV_HAL_IMPL_AVX_SELECT(v_uint8x32, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int8x32, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_uint16x16, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int16x16, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_uint32x8, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_int32x8, epi8) +OPENCV_HAL_IMPL_AVX_SELECT(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_SELECT(v_float64x4, pd) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpvec) \ + inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a == b); } \ + inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ + { return b > a; } \ + inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a < b); } \ + inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ + { return b >= a; } + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_INT(_Tpuvec, _Tpsvec, suffix, sbit) \ + inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \ + { return _Tpuvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \ + inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \ + { \ + __m256i smask = _mm256_set1_##suffix(sbit); \ + return _Tpuvec(_mm256_cmpgt_##suffix( \ + _mm256_xor_si256(a.val, smask), \ + _mm256_xor_si256(b.val, smask))); \ + } \ + inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \ + { return _Tpsvec(_mm256_cmpeq_##suffix(a.val, b.val)); } \ + inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \ + { return _Tpsvec(_mm256_cmpgt_##suffix(a.val, b.val)); } \ + OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpuvec) \ + OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpsvec) + +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint8x32, v_int8x32, epi8, (char)-128) +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint16x16, v_int16x16, epi16, (short)-32768) +OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint32x8, v_int32x8, epi32, (int)0x80000000) + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(_Tpvec) \ + inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_cmpeq_epi64(a.val, b.val)); } \ + inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ + { return ~(a == b); } + +OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_uint64x4) +OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4) + +#define OPENCV_HAL_IMPL_AVX_CMP_FLT(bin_op, imm8, _Tpvec, suffix) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm256_cmp_##suffix(a.val, b.val, imm8)); } + +#define OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(==, _CMP_EQ_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(<, _CMP_LT_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(>, _CMP_GT_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(<=, _CMP_LE_OQ, _Tpvec, suffix) \ + OPENCV_HAL_IMPL_AVX_CMP_FLT(>=, _CMP_GE_OQ, _Tpvec, suffix) + +OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float64x4, pd) + +inline v_float32x8 v_not_nan(const v_float32x8& a) +{ return v_float32x8(_mm256_cmp_ps(a.val, a.val, _CMP_ORD_Q)); } +inline v_float64x4 v_not_nan(const v_float64x4& a) +{ return v_float64x4(_mm256_cmp_pd(a.val, a.val, _CMP_ORD_Q)); } + +/** min/max **/ +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint8x32, _mm256_min_epu8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint8x32, _mm256_max_epu8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int8x32, _mm256_min_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int8x32, _mm256_max_epi8) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint16x16, _mm256_min_epu16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint16x16, _mm256_max_epu16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int16x16, _mm256_min_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int16x16, _mm256_max_epi16) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint32x8, _mm256_min_epu32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint32x8, _mm256_max_epu32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int32x8, _mm256_min_epi32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int32x8, _mm256_max_epi32) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float32x8, _mm256_min_ps) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float32x8, _mm256_max_ps) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float64x4, _mm256_min_pd) +OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd) + +/** Rotate **/ +template +inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b) +{ + enum {IMM_R = (16 - imm) & 0xFF}; + enum {IMM_R2 = (32 - imm) & 0xFF}; + + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); + + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, IMM_R)); + return v_uint8x32(_mm256_alignr_epi8(swap, b.val, IMM_R2)); // imm < 32 +} + +template +inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + + if (imm == 0) return a; + if (imm == 32) return b; + if (imm > 32) return v_uint8x32(); + + __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21); + if (imm == 16) return v_uint8x32(swap); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm)); + return v_uint8x32(_mm256_alignr_epi8(b.val, swap, IMM_L)); +} + +template +inline v_uint8x32 v_rotate_left(const v_uint8x32& a) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + enum {IMM_R = (16 - imm) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + + // ESAC control[3] ? [127:0] = 0 + __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0)); + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swapz, IMM_R)); + return v_uint8x32(_mm256_slli_si256(swapz, IMM_L)); +} + +template +inline v_uint8x32 v_rotate_right(const v_uint8x32& a) +{ + enum {IMM_L = (imm - 16) & 0xFF}; + + if (imm == 0) return a; + if (imm > 32) return v_uint8x32(); + + // ESAC control[3] ? [127:0] = 0 + __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1)); + if (imm == 16) return v_uint8x32(swapz); + if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swapz, a.val, imm)); + return v_uint8x32(_mm256_srli_si256(swapz, IMM_L)); +} + +#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \ + template \ + inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a), \ + v_reinterpret_as_u8(b)); \ + return _Tpvec(cast(ret.val)); \ + } \ + template \ + inline _Tpvec intrin(const _Tpvec& a) \ + { \ + enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \ + v_uint8x32 ret = intrin(v_reinterpret_as_u8(a)); \ + return _Tpvec(cast(ret.val)); \ + } + +#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec) \ + OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, _Tpvec, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, _Tpvec, OPENCV_HAL_NOP) + +OPENCV_HAL_IMPL_AVX_ROTATE(v_int8x32) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint16x16) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int16x16) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint32x8) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int32x8) +OPENCV_HAL_IMPL_AVX_ROTATE(v_uint64x4) +OPENCV_HAL_IMPL_AVX_ROTATE(v_int64x4) + +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float32x8, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float32x8, _mm256_castsi256_ps) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float64x4, _mm256_castsi256_pd) +OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd) + +/** Reverse **/ +inline v_uint8x32 v_reverse(const v_uint8x32 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint8x32(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int8x32 v_reverse(const v_int8x32 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x16 v_reverse(const v_uint16x16 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint16x16(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int16x16 v_reverse(const v_int16x16 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x8 v_reverse(const v_uint32x8 &a) +{ + static const __m256i perm = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0); + return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm)); +} + +inline v_int32x8 v_reverse(const v_int32x8 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x8 v_reverse(const v_float32x8 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x4 v_reverse(const v_uint64x4 &a) +{ + return v_uint64x4(_mm256_permute4x64_epi64(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int64x4 v_reverse(const v_int64x4 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x4 v_reverse(const v_float64x4 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +////////// Reduce and mask ///////// + +/** Reduce **/ +inline unsigned v_reduce_sum(const v_uint8x32& a) +{ + __m256i half = _mm256_sad_epu8(a.val, _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline int v_reduce_sum(const v_int8x32& a) +{ + __m256i half = _mm256_sad_epu8(_mm256_xor_si256(a.val, _mm256_set1_epi8((schar)-128)), _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))) - 4096; +} +#define OPENCV_HAL_IMPL_AVX_REDUCE_32(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i val = intrin(_v256_extract_low(a.val), _v256_extract_high(a.val)); \ + val = intrin(val, _mm_srli_si128(val,8)); \ + val = intrin(val, _mm_srli_si128(val,4)); \ + val = intrin(val, _mm_srli_si128(val,2)); \ + val = intrin(val, _mm_srli_si128(val,1)); \ + return (sctype)_mm_cvtsi128_si32(val); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, min, _mm_min_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, min, _mm_min_epi8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, max, _mm_max_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, max, _mm_max_epi8) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_16(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i v0 = _v256_extract_low(a.val); \ + __m128i v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_srli_si128(v0, 8)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 4)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 2)); \ + return (sctype) _mm_cvtsi128_si32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, min, _mm_min_epu16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16, short, min, _mm_min_epi16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, max, _mm_max_epu16) +OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16, short, max, _mm_max_epi16) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_8(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i v0 = _v256_extract_low(a.val); \ + __m128i v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_srli_si128(v0, 8)); \ + v0 = intrin(v0, _mm_srli_si128(v0, 4)); \ + return (sctype) _mm_cvtsi128_si32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, min, _mm_min_epu32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, min, _mm_min_epi32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, max, _mm_max_epu32) +OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, max, _mm_max_epi32) + +#define OPENCV_HAL_IMPL_AVX_REDUCE_FLT(func, intrin) \ + inline float v_reduce_##func(const v_float32x8& a) \ + { \ + __m128 v0 = _v256_extract_low(a.val); \ + __m128 v1 = _v256_extract_high(a.val); \ + v0 = intrin(v0, v1); \ + v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 3, 2))); \ + v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 0, 1))); \ + return _mm_cvtss_f32(v0); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_FLT(min, _mm_min_ps) +OPENCV_HAL_IMPL_AVX_REDUCE_FLT(max, _mm_max_ps) + +inline int v_reduce_sum(const v_int32x8& a) +{ + __m256i s0 = _mm256_hadd_epi32(a.val, a.val); + s0 = _mm256_hadd_epi32(s0, s0); + + __m128i s1 = _v256_extract_high(s0); + s1 = _mm_add_epi32(_v256_extract_low(s0), s1); + + return _mm_cvtsi128_si32(s1); +} + +inline unsigned v_reduce_sum(const v_uint32x8& a) +{ return v_reduce_sum(v_reinterpret_as_s32(a)); } + +inline int v_reduce_sum(const v_int16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +inline float v_reduce_sum(const v_float32x8& a) +{ + __m256 s0 = _mm256_hadd_ps(a.val, a.val); + s0 = _mm256_hadd_ps(s0, s0); + + __m128 s1 = _v256_extract_high(s0); + s1 = _mm_add_ps(_v256_extract_low(s0), s1); + + return _mm_cvtss_f32(s1); +} + +inline uint64 v_reduce_sum(const v_uint64x4& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x4& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} +inline double v_reduce_sum(const v_float64x4& a) +{ + __m256d s0 = _mm256_hadd_pd(a.val, a.val); + return _mm_cvtsd_f64(_mm_add_pd(_v256_extract_low(s0), _v256_extract_high(s0))); +} + +inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b, + const v_float32x8& c, const v_float32x8& d) +{ + __m256 ab = _mm256_hadd_ps(a.val, b.val); + __m256 cd = _mm256_hadd_ps(c.val, d.val); + return v_float32x8(_mm256_hadd_ps(ab, cd)); +} + +inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i half = _mm256_sad_epu8(a.val, b.val); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b) +{ + __m256i half = _mm256_set1_epi8(0x7f); + half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b) +{ + v_uint32x8 l, h; + v_expand(v_add_wrap(a - b, b - a), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x16& a, const v_int16x16& b) +{ + v_uint32x8 l, h; + v_expand(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x8& a, const v_uint32x8& b) +{ + return v_reduce_sum(v_max(a, b) - v_min(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 m = a < b; + return v_reduce_sum(v_reinterpret_as_u32(((a - b) ^ m) - m)); +} +inline float v_reduce_sad(const v_float32x8& a, const v_float32x8& b) +{ + return v_reduce_sum((a - b) & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)))); +} + +/** Popcount **/ +inline v_uint8x32 v_popcount(const v_uint8x32& a) +{ + __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); + __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); + return v_uint8x32(_mm256_add_epi8(_mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256( a.val , _popcnt_mask)), + _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_mm256_srli_epi16(a.val, 4), _popcnt_mask)))); +} +inline v_uint16x16 v_popcount(const v_uint16x16& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v256_setall_u16(0x00ff); +} +inline v_uint32x8 v_popcount(const v_uint32x8& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v256_setall_u32(0x000000ff); +} +inline v_uint64x4 v_popcount(const v_uint64x4& a) +{ + return v_uint64x4(_mm256_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm256_setzero_si256())); +} +inline v_uint8x32 v_popcount(const v_int8x32& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x16 v_popcount(const v_int16x16& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x8 v_popcount(const v_int32x8& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x4 v_popcount(const v_int64x4& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +/** Mask **/ +inline int v_signmask(const v_int8x32& a) +{ return _mm256_movemask_epi8(a.val); } +inline int v_signmask(const v_uint8x32& a) +{ return v_signmask(v_reinterpret_as_s8(a)); } + +inline int v_signmask(const v_int16x16& a) +{ return v_signmask(v_pack(a, a)) & 0xFFFF; } +inline int v_signmask(const v_uint16x16& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } + +inline int v_signmask(const v_float32x8& a) +{ return _mm256_movemask_ps(a.val); } +inline int v_signmask(const v_float64x4& a) +{ return _mm256_movemask_pd(a.val); } + +inline int v_signmask(const v_int32x8& a) +{ return v_signmask(v_reinterpret_as_f32(a)); } +inline int v_signmask(const v_uint32x8& a) +{ return v_signmask(v_reinterpret_as_f32(a)); } + +inline int v_signmask(const v_int64x4& a) +{ return v_signmask(v_reinterpret_as_f64(a)); } +inline int v_signmask(const v_uint64x4& a) +{ return v_signmask(v_reinterpret_as_f64(a)); } + +inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +/** Checks **/ +#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \ + inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \ + inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; } +OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1) +OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1) +OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15) +OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15) +OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255) +OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15) + +#define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec) \ + inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \ + inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; } +OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16) +OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16) + +////////// Other math ///////// + +/** Some frequent operations **/ +#if CV_FMA3 +#define OPENCV_HAL_IMPL_AVX_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); } +#else +#define OPENCV_HAL_IMPL_AVX_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_add_##suffix(_mm256_mul_##suffix(a.val, b.val), c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm256_add_##suffix(_mm256_mul_##suffix(a.val, b.val), c.val)); } +#endif + +#define OPENCV_HAL_IMPL_AVX_MISC(_Tpvec, suffix) \ + inline _Tpvec v_sqrt(const _Tpvec& x) \ + { return _Tpvec(_mm256_sqrt_##suffix(x.val)); } \ + inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_fma(a, a, b * b); } \ + inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_sqrt(v_fma(a, a, b*b)); } + +OPENCV_HAL_IMPL_AVX_MULADD(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_MULADD(v_float64x4, pd) +OPENCV_HAL_IMPL_AVX_MISC(v_float32x8, ps) +OPENCV_HAL_IMPL_AVX_MISC(v_float64x4, pd) + +inline v_int32x8 v_fma(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c) +{ + return a * b + c; +} + +inline v_int32x8 v_muladd(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x8 v_invsqrt(const v_float32x8& x) +{ + v_float32x8 half = x * v256_setall_f32(0.5); + v_float32x8 t = v_float32x8(_mm256_rsqrt_ps(x.val)); + // todo: _mm256_fnmsub_ps + t *= v256_setall_f32(1.5) - ((t * t) * half); + return t; +} + +inline v_float64x4 v_invsqrt(const v_float64x4& x) +{ + return v256_setall_f64(1.) / v_sqrt(x); +} + +/** Absolute values **/ +#define OPENCV_HAL_IMPL_AVX_ABS(_Tpvec, suffix) \ + inline v_u##_Tpvec v_abs(const v_##_Tpvec& x) \ + { return v_u##_Tpvec(_mm256_abs_##suffix(x.val)); } + +OPENCV_HAL_IMPL_AVX_ABS(int8x32, epi8) +OPENCV_HAL_IMPL_AVX_ABS(int16x16, epi16) +OPENCV_HAL_IMPL_AVX_ABS(int32x8, epi32) + +inline v_float32x8 v_abs(const v_float32x8& x) +{ return x & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); } +inline v_float64x4 v_abs(const v_float64x4& x) +{ return x & v_float64x4(_mm256_castsi256_pd(_mm256_srli_epi64(_mm256_set1_epi64x(-1), 1))); } + +/** Absolute difference **/ +inline v_uint8x32 v_absdiff(const v_uint8x32& a, const v_uint8x32& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x16 v_absdiff(const v_uint16x16& a, const v_uint16x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x8 v_absdiff(const v_uint32x8& a, const v_uint32x8& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x32 v_absdiff(const v_int8x32& a, const v_int8x32& b) +{ + v_int8x32 d = v_sub_wrap(a, b); + v_int8x32 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} + +inline v_uint16x16 v_absdiff(const v_int16x16& a, const v_int16x16& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } + +inline v_uint32x8 v_absdiff(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 d = a - b; + v_int32x8 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +inline v_float32x8 v_absdiff(const v_float32x8& a, const v_float32x8& b) +{ return v_abs(a - b); } + +inline v_float64x4 v_absdiff(const v_float64x4& a, const v_float64x4& b) +{ return v_abs(a - b); } + +/** Saturating absolute difference **/ +inline v_int8x32 v_absdiffs(const v_int8x32& a, const v_int8x32& b) +{ + v_int8x32 d = a - b; + v_int8x32 m = a < b; + return (d ^ m) - m; +} +inline v_int16x16 v_absdiffs(const v_int16x16& a, const v_int16x16& b) +{ return v_max(a, b) - v_min(a, b); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x8 v_round(const v_float32x8& a) +{ return v_int32x8(_mm256_cvtps_epi32(a.val)); } + +inline v_int32x8 v_round(const v_float64x4& a) +{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvtpd_epi32(a.val))); } + +inline v_int32x8 v_round(const v_float64x4& a, const v_float64x4& b) +{ + __m128i ai = _mm256_cvtpd_epi32(a.val), bi = _mm256_cvtpd_epi32(b.val); + return v_int32x8(_v256_combine(ai, bi)); +} + +inline v_int32x8 v_trunc(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(a.val)); } + +inline v_int32x8 v_trunc(const v_float64x4& a) +{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvttpd_epi32(a.val))); } + +inline v_int32x8 v_floor(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(_mm256_floor_ps(a.val))); } + +inline v_int32x8 v_floor(const v_float64x4& a) +{ return v_trunc(v_float64x4(_mm256_floor_pd(a.val))); } + +inline v_int32x8 v_ceil(const v_float32x8& a) +{ return v_int32x8(_mm256_cvttps_epi32(_mm256_ceil_ps(a.val))); } + +inline v_int32x8 v_ceil(const v_float64x4& a) +{ return v_trunc(v_float64x4(_mm256_ceil_pd(a.val))); } + +/** To float **/ +inline v_float32x8 v_cvt_f32(const v_int32x8& a) +{ return v_float32x8(_mm256_cvtepi32_ps(a.val)); } + +inline v_float32x8 v_cvt_f32(const v_float64x4& a) +{ return v_float32x8(_mm256_castps128_ps256(_mm256_cvtpd_ps(a.val))); } + +inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b) +{ + __m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val); + return v_float32x8(_v256_combine(af, bf)); +} + +inline v_float64x4 v_cvt_f64(const v_int32x8& a) +{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_low(a.val))); } + +inline v_float64x4 v_cvt_f64_high(const v_int32x8& a) +{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_high(a.val))); } + +inline v_float64x4 v_cvt_f64(const v_float32x8& a) +{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_low(a.val))); } + +inline v_float64x4 v_cvt_f64_high(const v_float32x8& a) +{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_high(a.val))); } + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x4 v_cvt_f64(const v_int64x4& v) +{ + // constants encoded as floating-point + __m256i magic_i_lo = _mm256_set1_epi64x(0x4330000000000000); // 2^52 + __m256i magic_i_hi32 = _mm256_set1_epi64x(0x4530000080000000); // 2^84 + 2^63 + __m256i magic_i_all = _mm256_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m256d magic_d_all = _mm256_castsi256_pd(magic_i_all); + + // Blend the 32 lowest significant bits of v with magic_int_lo + __m256i v_lo = _mm256_blend_epi32(magic_i_lo, v.val, 0x55); + // Extract the 32 most significant bits of v + __m256i v_hi = _mm256_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm256_xor_si256(v_hi, magic_i_hi32); + // Compute in double precision + __m256d v_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m256d result = _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo)); + return v_float64x4(result); +} + +////////////// Lookup table access //////////////////// + +inline v_int8x32 v256_lut(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_setr_epi8(tab[idx[ 0]], tab[idx[ 1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[ 8]], tab[idx[ 9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]], + tab[idx[16]], tab[idx[17]], tab[idx[18]], tab[idx[19]], tab[idx[20]], tab[idx[21]], tab[idx[22]], tab[idx[23]], + tab[idx[24]], tab[idx[25]], tab[idx[26]], tab[idx[27]], tab[idx[28]], tab[idx[29]], tab[idx[30]], tab[idx[31]])); +} +inline v_int8x32 v256_lut_pairs(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_setr_epi16(*(const short*)(tab + idx[ 0]), *(const short*)(tab + idx[ 1]), *(const short*)(tab + idx[ 2]), *(const short*)(tab + idx[ 3]), + *(const short*)(tab + idx[ 4]), *(const short*)(tab + idx[ 5]), *(const short*)(tab + idx[ 6]), *(const short*)(tab + idx[ 7]), + *(const short*)(tab + idx[ 8]), *(const short*)(tab + idx[ 9]), *(const short*)(tab + idx[10]), *(const short*)(tab + idx[11]), + *(const short*)(tab + idx[12]), *(const short*)(tab + idx[13]), *(const short*)(tab + idx[14]), *(const short*)(tab + idx[15]))); +} +inline v_int8x32 v256_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x32(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 1)); +} +inline v_uint8x32 v256_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut((const schar *)tab, idx)); } +inline v_uint8x32 v256_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x32 v256_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_quads((const schar *)tab, idx)); } + +inline v_int16x16 v256_lut(const short* tab, const int* idx) +{ + return v_int16x16(_mm256_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]])); +} +inline v_int16x16 v256_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x16(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 2)); +} +inline v_int16x16 v256_lut_quads(const short* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int16x16(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 2));//Looks like intrinsic has wrong definition +#else + return v_int16x16(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 2)); +#endif +} +inline v_uint16x16 v256_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut((const short *)tab, idx)); } +inline v_uint16x16 v256_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_pairs((const short *)tab, idx)); } +inline v_uint16x16 v256_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_quads((const short *)tab, idx)); } + +inline v_int32x8 v256_lut(const int* tab, const int* idx) +{ + return v_int32x8(_mm256_i32gather_epi32(tab, _mm256_loadu_si256((const __m256i*)idx), 4)); +} +inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int32x8(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 4)); +#else + return v_int32x8(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 4)); +#endif +} +inline v_int32x8 v256_lut_quads(const int* tab, const int* idx) +{ + return v_int32x8(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1])))); +} +inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); } +inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); } +inline v_uint32x8 v256_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_quads((const int *)tab, idx)); } + +inline v_int64x4 v256_lut(const int64* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int64x4(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 8)); +#else + return v_int64x4(_mm256_i32gather_epi64(tab, _mm_loadu_si128((const __m128i*)idx), 8)); +#endif +} +inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx) +{ + return v_int64x4(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1])))); +} +inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); } +inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); } + +inline v_float32x8 v256_lut(const float* tab, const int* idx) +{ + return v_float32x8(_mm256_i32gather_ps(tab, _mm256_loadu_si256((const __m256i*)idx), 4)); +} +inline v_float32x8 v256_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_pairs((const int *)tab, idx)); } +inline v_float32x8 v256_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_quads((const int *)tab, idx)); } + +inline v_float64x4 v256_lut(const double* tab, const int* idx) +{ + return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8)); +} +inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_v256_combine(_mm_loadu_pd(tab + idx[0]), _mm_loadu_pd(tab + idx[1]))); } + +inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec) +{ + return v_int32x8(_mm256_i32gather_epi32(tab, idxvec.val, 4)); +} + +inline v_uint32x8 v_lut(const unsigned* tab, const v_int32x8& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x8 v_lut(const float* tab, const v_int32x8& idxvec) +{ + return v_float32x8(_mm256_i32gather_ps(tab, idxvec.val, 4)); +} + +inline v_float64x4 v_lut(const double* tab, const v_int32x8& idxvec) +{ + return v_float64x4(_mm256_i32gather_pd(tab, _mm256_castsi256_si128(idxvec.val), 8)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x8& idxvec, v_float32x8& x, v_float32x8& y) +{ + int CV_DECL_ALIGNED(32) idx[8]; + v_store_aligned(idx, idxvec); + __m128 z = _mm_setzero_ps(); + __m128 xy01, xy45, xy23, xy67; + xy01 = _mm_loadl_pi(z, (const __m64*)(tab + idx[0])); + xy01 = _mm_loadh_pi(xy01, (const __m64*)(tab + idx[1])); + xy45 = _mm_loadl_pi(z, (const __m64*)(tab + idx[4])); + xy45 = _mm_loadh_pi(xy45, (const __m64*)(tab + idx[5])); + __m256 xy0145 = _v256_combine(xy01, xy45); + xy23 = _mm_loadl_pi(z, (const __m64*)(tab + idx[2])); + xy23 = _mm_loadh_pi(xy23, (const __m64*)(tab + idx[3])); + xy67 = _mm_loadl_pi(z, (const __m64*)(tab + idx[6])); + xy67 = _mm_loadh_pi(xy67, (const __m64*)(tab + idx[7])); + __m256 xy2367 = _v256_combine(xy23, xy67); + + __m256 xxyy0145 = _mm256_unpacklo_ps(xy0145, xy2367); + __m256 xxyy2367 = _mm256_unpackhi_ps(xy0145, xy2367); + + x = v_float32x8(_mm256_unpacklo_ps(xxyy0145, xxyy2367)); + y = v_float32x8(_mm256_unpackhi_ps(xxyy0145, xxyy2367)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x8& idxvec, v_float64x4& x, v_float64x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_low(idx, idxvec); + __m128d xy0 = _mm_loadu_pd(tab + idx[0]); + __m128d xy2 = _mm_loadu_pd(tab + idx[2]); + __m128d xy1 = _mm_loadu_pd(tab + idx[1]); + __m128d xy3 = _mm_loadu_pd(tab + idx[3]); + __m256d xy02 = _v256_combine(xy0, xy2); + __m256d xy13 = _v256_combine(xy1, xy3); + + x = v_float64x4(_mm256_unpacklo_pd(xy02, xy13)); + y = v_float64x4(_mm256_unpackhi_pd(xy02, xy13)); +} + +inline v_int8x32 v_interleave_pairs(const v_int8x32& vec) +{ + return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200, 0x0f0d0e0c0b090a08, 0x0705060403010200))); +} +inline v_uint8x32 v_interleave_pairs(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x32 v_interleave_quads(const v_int8x32& vec) +{ + return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400, 0x0f0b0e0a0d090c08, 0x0703060205010400))); +} +inline v_uint8x32 v_interleave_quads(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x16 v_interleave_pairs(const v_int16x16& vec) +{ + return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100, 0x0f0e0b0a0d0c0908, 0x0706030205040100))); +} +inline v_uint16x16 v_interleave_pairs(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x16 v_interleave_quads(const v_int16x16& vec) +{ + return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100, 0x0f0e07060d0c0504, 0x0b0a030209080100))); +} +inline v_uint16x16 v_interleave_quads(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x8 v_interleave_pairs(const v_int32x8& vec) +{ + return v_int32x8(_mm256_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0))); +} +inline v_uint32x8 v_interleave_pairs(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x8 v_interleave_pairs(const v_float32x8& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x32 v_pack_triplets(const v_int8x32& vec) +{ + return v_int8x32(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100))), + _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint8x32 v_pack_triplets(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x16 v_pack_triplets(const v_int16x16& vec) +{ + return v_int16x16(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100))), + _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint16x16 v_pack_triplets(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x8 v_pack_triplets(const v_int32x8& vec) +{ + return v_int32x8(_mm256_permutevar8x32_epi32(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} +inline v_uint32x8 v_pack_triplets(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); } +inline v_float32x8 v_pack_triplets(const v_float32x8& vec) +{ + return v_float32x8(_mm256_permutevar8x32_ps(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000))); +} + +////////// Matrix operations ///////// + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b) +{ return v_int32x8(_mm256_madd_epi16(a.val, b.val)); } +inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b) +{ + __m256i even = _mm256_mul_epi32(a.val, b.val); + __m256i odd = _mm256_mul_epi32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32)); + return v_int64x4(_mm256_add_epi64(even, odd)); +} +inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b) +{ + __m256i even_m = _mm256_set1_epi32(0xFF00FF00); + __m256i even_a = _mm256_blendv_epi8(a.val, _mm256_setzero_si256(), even_m); + __m256i odd_a = _mm256_srli_epi16(a.val, 8); + + __m256i even_b = _mm256_blendv_epi8(b.val, _mm256_setzero_si256(), even_m); + __m256i odd_b = _mm256_srli_epi16(b.val, 8); + + __m256i prod0 = _mm256_madd_epi16(even_a, even_b); + __m256i prod1 = _mm256_madd_epi16(odd_a, odd_b); + return v_uint32x8(_mm256_add_epi32(prod0, prod1)); +} +inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b) +{ + __m256i even_a = _mm256_srai_epi16(_mm256_bslli_epi128(a.val, 1), 8); + __m256i odd_a = _mm256_srai_epi16(a.val, 8); + + __m256i even_b = _mm256_srai_epi16(_mm256_bslli_epi128(b.val, 1), 8); + __m256i odd_b = _mm256_srai_epi16(b.val, 8); + + __m256i prod0 = _mm256_madd_epi16(even_a, even_b); + __m256i prod1 = _mm256_madd_epi16(odd_a, odd_b); + return v_int32x8(_mm256_add_epi32(prod0, prod1)); +} +inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i mullo = _mm256_mullo_epi16(a.val, b.val); + __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val); + __m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi); + __m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi); + + __m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA); + __m256i p13 = _mm256_srli_epi64(mul0, 32); + __m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA); + __m256i p57 = _mm256_srli_epi64(mul1, 32); + + __m256i p15_ = _mm256_add_epi64(p02, p13); + __m256i p9d_ = _mm256_add_epi64(p46, p57); + + return v_uint64x4(_mm256_add_epi64( + _mm256_unpacklo_epi64(p15_, p9d_), + _mm256_unpackhi_epi64(p15_, p9d_) + )); +} +inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b) +{ + __m256i prod = _mm256_madd_epi16(a.val, b.val); + __m256i sign = _mm256_srai_epi32(prod, 31); + + __m256i lo = _mm256_unpacklo_epi32(prod, sign); + __m256i hi = _mm256_unpackhi_epi32(prod, sign); + + return v_int64x4(_mm256_add_epi64( + _mm256_unpacklo_epi64(lo, hi), + _mm256_unpackhi_epi64(lo, hi) + )); +} +inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b) +{ return v_dotprod(a, b); } +inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b) +{ return v_dotprod(a, b); } +inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i mullo = _mm256_mullo_epi16(a.val, b.val); + __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val); + __m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi); + __m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi); + + __m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA); + __m256i p13 = _mm256_srli_epi64(mul0, 32); + __m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA); + __m256i p57 = _mm256_srli_epi64(mul1, 32); + + __m256i p15_ = _mm256_add_epi64(p02, p13); + __m256i p9d_ = _mm256_add_epi64(p46, p57); + + return v_uint64x4(_mm256_add_epi64(p15_, p9d_)); +} +inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b) +{ + __m256i prod = _mm256_madd_epi16(a.val, b.val); + __m256i sign = _mm256_srai_epi32(prod, 31); + __m256i lo = _mm256_unpacklo_epi32(prod, sign); + __m256i hi = _mm256_unpackhi_epi32(prod, sign); + return v_int64x4(_mm256_add_epi64(lo, hi)); +} +inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_AVX_SPLAT2_PS(a, im) \ + v_float32x8(_mm256_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im))) + +inline v_float32x8 v_matmul(const v_float32x8& v, const v_float32x8& m0, + const v_float32x8& m1, const v_float32x8& m2, + const v_float32x8& m3) +{ + v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0); + v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1); + v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2); + v_float32x8 v37 = OPENCV_HAL_AVX_SPLAT2_PS(v, 3); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3))); +} + +inline v_float32x8 v_matmuladd(const v_float32x8& v, const v_float32x8& m0, + const v_float32x8& m1, const v_float32x8& m2, + const v_float32x8& a) +{ + v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0); + v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1); + v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a))); +} + +#define OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ + inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ + { \ + __m256i t0 = cast_from(_mm256_unpacklo_##suffix(a0.val, a1.val)); \ + __m256i t1 = cast_from(_mm256_unpacklo_##suffix(a2.val, a3.val)); \ + __m256i t2 = cast_from(_mm256_unpackhi_##suffix(a0.val, a1.val)); \ + __m256i t3 = cast_from(_mm256_unpackhi_##suffix(a2.val, a3.val)); \ + b0.val = cast_to(_mm256_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm256_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm256_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm256_unpackhi_epi64(t2, t3)); \ + } + +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_uint32x8, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_int32x8, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_float32x8, ps, _mm256_castps_si256, _mm256_castsi256_ps) + +//////////////// Value reordering /////////////// + +/* Expand */ +#define OPENCV_HAL_IMPL_AVX_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(_v256_extract_low(a.val)); \ + b1.val = intrin(_v256_extract_high(a.val)); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v256_extract_low(a.val))); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v256_extract_high(a.val))); } \ + inline _Tpwvec v256_load_expand(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint8x32, v_uint16x16, uchar, _mm256_cvtepu8_epi16) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int8x32, v_int16x16, schar, _mm256_cvtepi8_epi16) +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint16x16, v_uint32x8, ushort, _mm256_cvtepu16_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int16x16, v_int32x8, short, _mm256_cvtepi16_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND(v_uint32x8, v_uint64x4, unsigned, _mm256_cvtepu32_epi64) +OPENCV_HAL_IMPL_AVX_EXPAND(v_int32x8, v_int64x4, int, _mm256_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_AVX_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v256_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_uint32x8, uchar, _mm256_cvtepu8_epi32) +OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_int32x8, schar, _mm256_cvtepi8_epi32) + +/* pack */ +// 16 +inline v_int8x32 v_pack(const v_int16x16& a, const v_int16x16& b) +{ return v_int8x32(_v256_shuffle_odd_64(_mm256_packs_epi16(a.val, b.val))); } + +inline v_uint8x32 v_pack(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i t = _mm256_set1_epi16(255); + __m256i a1 = _mm256_min_epu16(a.val, t); + __m256i b1 = _mm256_min_epu16(b.val, t); + return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a1, b1))); +} + +inline v_uint8x32 v_pack_u(const v_int16x16& a, const v_int16x16& b) +{ + return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a.val, b.val))); +} + +inline void v_pack_store(schar* ptr, const v_int16x16& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(uchar* ptr, const v_uint16x16& a) +{ + const __m256i m = _mm256_set1_epi16(255); + __m256i am = _mm256_min_epu16(a.val, m); + am = _v256_shuffle_odd_64(_mm256_packus_epi16(am, am)); + v_store_low(ptr, v_uint8x32(am)); +} + +inline void v_pack_u_store(uchar* ptr, const v_int16x16& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + +template inline +v_uint8x32 v_rshr_pack(const v_uint16x16& a, const v_uint16x16& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1))); + return v_pack_u(v_reinterpret_as_s16((a + delta) >> n), + v_reinterpret_as_s16((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x16& a) +{ + v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1))); + v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n)); +} + +template inline +v_uint8x32 v_rshr_pack_u(const v_int16x16& a, const v_int16x16& b) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x16& a) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int8x32 v_rshr_pack(const v_int16x16& a, const v_int16x16& b) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x16& a) +{ + v_int16x16 delta = v256_setall_s16((short)(1 << (n-1))); + v_pack_store(ptr, (a + delta) >> n); +} + +// 32 +inline v_int16x16 v_pack(const v_int32x8& a, const v_int32x8& b) +{ return v_int16x16(_v256_shuffle_odd_64(_mm256_packs_epi32(a.val, b.val))); } + +inline v_uint16x16 v_pack(const v_uint32x8& a, const v_uint32x8& b) +{ return v_uint16x16(_v256_shuffle_odd_64(_v256_packs_epu32(a.val, b.val))); } + +inline v_uint16x16 v_pack_u(const v_int32x8& a, const v_int32x8& b) +{ return v_uint16x16(_v256_shuffle_odd_64(_mm256_packus_epi32(a.val, b.val))); } + +inline void v_pack_store(short* ptr, const v_int32x8& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x8& a) +{ + const __m256i m = _mm256_set1_epi32(65535); + __m256i am = _mm256_min_epu32(a.val, m); + am = _v256_shuffle_odd_64(_mm256_packus_epi32(am, am)); + v_store_low(ptr, v_uint16x16(am)); +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x8& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + + +template inline +v_uint16x16 v_rshr_pack(const v_uint32x8& a, const v_uint32x8& b) +{ + // we assume that n > 0, and so the shifted 32-bit values can be treated as signed numbers. + v_uint32x8 delta = v256_setall_u32(1 << (n-1)); + return v_pack_u(v_reinterpret_as_s32((a + delta) >> n), + v_reinterpret_as_s32((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x8& a) +{ + v_uint32x8 delta = v256_setall_u32(1 << (n-1)); + v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n)); +} + +template inline +v_uint16x16 v_rshr_pack_u(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x8& a) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int16x16 v_rshr_pack(const v_int32x8& a, const v_int32x8& b) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x8& a) +{ + v_int32x8 delta = v256_setall_s32(1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// 64 +// Non-saturating pack +inline v_uint32x8 v_pack(const v_uint64x4& a, const v_uint64x4& b) +{ + __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0)); + __m256i b0 = _mm256_shuffle_epi32(b.val, _MM_SHUFFLE(0, 0, 2, 0)); + __m256i ab = _mm256_unpacklo_epi64(a0, b0); // a0, a1, b0, b1, a2, a3, b2, b3 + return v_uint32x8(_v256_shuffle_odd_64(ab)); +} + +inline v_int32x8 v_pack(const v_int64x4& a, const v_int64x4& b) +{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); } + +inline void v_pack_store(unsigned* ptr, const v_uint64x4& a) +{ + __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0)); + v_store_low(ptr, v_uint32x8(_v256_shuffle_odd_64(a0))); +} + +inline void v_pack_store(int* ptr, const v_int64x4& b) +{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); } + +template inline +v_uint32x8 v_rshr_pack(const v_uint64x4& a, const v_uint64x4& b) +{ + v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x4& a) +{ + v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +template inline +v_int32x8 v_rshr_pack(const v_int64x4& a, const v_int64x4& b) +{ + v_int64x4 delta = v256_setall_s64((int64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x4& a) +{ + v_int64x4 delta = v256_setall_s64((int64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// pack boolean +inline v_uint8x32 v_pack_b(const v_uint16x16& a, const v_uint16x16& b) +{ + __m256i ab = _mm256_packs_epi16(a.val, b.val); + return v_uint8x32(_v256_shuffle_odd_64(ab)); +} + +inline v_uint8x32 v_pack_b(const v_uint32x8& a, const v_uint32x8& b, + const v_uint32x8& c, const v_uint32x8& d) +{ + __m256i ab = _mm256_packs_epi32(a.val, b.val); + __m256i cd = _mm256_packs_epi32(c.val, d.val); + + __m256i abcd = _v256_shuffle_odd_64(_mm256_packs_epi16(ab, cd)); + return v_uint8x32(_mm256_shuffle_epi32(abcd, _MM_SHUFFLE(3, 1, 2, 0))); +} + +inline v_uint8x32 v_pack_b(const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c, + const v_uint64x4& d, const v_uint64x4& e, const v_uint64x4& f, + const v_uint64x4& g, const v_uint64x4& h) +{ + __m256i ab = _mm256_packs_epi32(a.val, b.val); + __m256i cd = _mm256_packs_epi32(c.val, d.val); + __m256i ef = _mm256_packs_epi32(e.val, f.val); + __m256i gh = _mm256_packs_epi32(g.val, h.val); + + __m256i abcd = _mm256_packs_epi32(ab, cd); + __m256i efgh = _mm256_packs_epi32(ef, gh); + __m256i pkall = _v256_shuffle_odd_64(_mm256_packs_epi16(abcd, efgh)); + + __m256i rev = _mm256_alignr_epi8(pkall, pkall, 8); + return v_uint8x32(_mm256_unpacklo_epi16(pkall, rev)); +} + +/* Recombine */ +// its up there with load and store operations + +/* Extract */ +#define OPENCV_HAL_IMPL_AVX_EXTRACT(_Tpvec) \ + template \ + inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ + { return v_rotate_right(a, b); } + +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint8x32) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int8x32) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint16x16) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int16x16) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint64x4) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_int64x4) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_float32x8) +OPENCV_HAL_IMPL_AVX_EXTRACT(v_float64x4) + +template +inline uchar v_extract_n(v_uint8x32 a) +{ + return (uchar)_v256_extract_epi8(a.val); +} + +template +inline schar v_extract_n(v_int8x32 a) +{ + return (schar)v_extract_n(v_reinterpret_as_u8(a)); +} + +template +inline ushort v_extract_n(v_uint16x16 a) +{ + return (ushort)_v256_extract_epi16(a.val); +} + +template +inline short v_extract_n(v_int16x16 a) +{ + return (short)v_extract_n(v_reinterpret_as_u16(a)); +} + +template +inline uint v_extract_n(v_uint32x8 a) +{ + return (uint)_v256_extract_epi32(a.val); +} + +template +inline int v_extract_n(v_int32x8 a) +{ + return (int)v_extract_n(v_reinterpret_as_u32(a)); +} + +template +inline uint64 v_extract_n(v_uint64x4 a) +{ + return (uint64)_v256_extract_epi64(a.val); +} + +template +inline int64 v_extract_n(v_int64x4 v) +{ + return (int64)v_extract_n(v_reinterpret_as_u64(v)); +} + +template +inline float v_extract_n(v_float32x8 v) +{ + union { uint iv; float fv; } d; + d.iv = v_extract_n(v_reinterpret_as_u32(v)); + return d.fv; +} + +template +inline double v_extract_n(v_float64x4 v) +{ + union { uint64 iv; double dv; } d; + d.iv = v_extract_n(v_reinterpret_as_u64(v)); + return d.dv; +} + +template +inline v_uint32x8 v_broadcast_element(v_uint32x8 a) +{ + static const __m256i perm = _mm256_set1_epi32((char)i); + return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm)); +} + +template +inline v_int32x8 v_broadcast_element(const v_int32x8 &a) +{ return v_reinterpret_as_s32(v_broadcast_element(v_reinterpret_as_u32(a))); } + +template +inline v_float32x8 v_broadcast_element(const v_float32x8 &a) +{ return v_reinterpret_as_f32(v_broadcast_element(v_reinterpret_as_u32(a))); } + + +///////////////////// load deinterleave ///////////////////////////// + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + + const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, + 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + __m256i p0 = _mm256_shuffle_epi8(ab0, sh); + __m256i p1 = _mm256_shuffle_epi8(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint8x32(a0); + b = v_uint8x32(b0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + + const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); + __m256i p0 = _mm256_shuffle_epi8(ab0, sh); + __m256i p1 = _mm256_shuffle_epi8(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint16x16(a0); + b = v_uint16x16(b0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + + enum { sh = 0+2*4+1*16+3*64 }; + __m256i p0 = _mm256_shuffle_epi32(ab0, sh); + __m256i p1 = _mm256_shuffle_epi32(ab1, sh); + __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint32x8(a0); + b = v_uint32x8(b0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b ) +{ + __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + + __m256i pl = _mm256_permute2x128_si256(ab0, ab1, 0 + 2*16); + __m256i ph = _mm256_permute2x128_si256(ab0, ab1, 1 + 3*16); + __m256i a0 = _mm256_unpacklo_epi64(pl, ph); + __m256i b0 = _mm256_unpackhi_epi64(pl, ph); + a = v_uint64x4(a0); + b = v_uint64x4(b0); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1); + + __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); + __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); + __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); + + const __m256i + sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, + 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13), + sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, + 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14), + sh_r = _mm256_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, + 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); + b0 = _mm256_shuffle_epi8(b0, sh_b); + g0 = _mm256_shuffle_epi8(g0, sh_g); + r0 = _mm256_shuffle_epi8(r0, sh_r); + + a = v_uint8x32(b0); + b = v_uint8x32(g0); + c = v_uint8x32(r0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); + __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1); + __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1); + __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0); + const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, + 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); + const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + b0 = _mm256_shuffle_epi8(b0, sh_b); + g0 = _mm256_shuffle_epi8(g0, sh_g); + r0 = _mm256_shuffle_epi8(r0, sh_r); + + a = v_uint16x16(b0); + b = v_uint16x16(g0); + c = v_uint16x16(r0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + + __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16); + __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16); + + __m256i b0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_low, s02_high, 0x24), bgr1, 0x92); + __m256i g0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_high, s02_low, 0x92), bgr1, 0x24); + __m256i r0 = _mm256_blend_epi32(_mm256_blend_epi32(bgr1, s02_low, 0x24), s02_high, 0x92); + + b0 = _mm256_shuffle_epi32(b0, 0x6c); + g0 = _mm256_shuffle_epi32(g0, 0xb1); + r0 = _mm256_shuffle_epi32(r0, 0xc6); + + a = v_uint32x8(b0); + b = v_uint32x8(g0); + c = v_uint32x8(r0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + + __m256i s01 = _mm256_blend_epi32(bgr0, bgr1, 0xf0); + __m256i s12 = _mm256_blend_epi32(bgr1, bgr2, 0xf0); + __m256i s20r = _mm256_permute4x64_epi64(_mm256_blend_epi32(bgr2, bgr0, 0xf0), 0x1b); + __m256i b0 = _mm256_unpacklo_epi64(s01, s20r); + __m256i g0 = _mm256_alignr_epi8(s12, s01, 8); + __m256i r0 = _mm256_unpackhi_epi64(s20r, s12); + + a = v_uint64x4(b0); + b = v_uint64x4(g0); + c = v_uint64x4(r0); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c, v_uint8x32& d ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64)); + __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96)); + const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + + __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); + __m256i p1 = _mm256_shuffle_epi8(bgr1, sh); + __m256i p2 = _mm256_shuffle_epi8(bgr2, sh); + __m256i p3 = _mm256_shuffle_epi8(bgr3, sh); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint8x32(b0); + b = v_uint8x32(g0); + c = v_uint8x32(r0); + d = v_uint8x32(a0); +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c, v_uint16x16& d ) +{ + __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48)); + const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, + 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15); + __m256i p0 = _mm256_shuffle_epi8(bgr0, sh); + __m256i p1 = _mm256_shuffle_epi8(bgr1, sh); + __m256i p2 = _mm256_shuffle_epi8(bgr2, sh); + __m256i p3 = _mm256_shuffle_epi8(bgr3, sh); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint16x16(b0); + b = v_uint16x16(g0); + c = v_uint16x16(r0); + d = v_uint16x16(a0); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c, v_uint32x8& d ) +{ + __m256i p0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i p1 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i p2 = _mm256_loadu_si256((const __m256i*)(ptr + 16)); + __m256i p3 = _mm256_loadu_si256((const __m256i*)(ptr + 24)); + + __m256i p01l = _mm256_unpacklo_epi32(p0, p1); + __m256i p01h = _mm256_unpackhi_epi32(p0, p1); + __m256i p23l = _mm256_unpacklo_epi32(p2, p3); + __m256i p23h = _mm256_unpackhi_epi32(p2, p3); + + __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16); + __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16); + __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16); + __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi32(pll, plh); + __m256i g0 = _mm256_unpackhi_epi32(pll, plh); + __m256i r0 = _mm256_unpacklo_epi32(phl, phh); + __m256i a0 = _mm256_unpackhi_epi32(phl, phh); + + a = v_uint32x8(b0); + b = v_uint32x8(g0); + c = v_uint32x8(r0); + d = v_uint32x8(a0); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c, v_uint64x4& d ) +{ + __m256i bgra0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i bgra1 = _mm256_loadu_si256((const __m256i*)(ptr + 4)); + __m256i bgra2 = _mm256_loadu_si256((const __m256i*)(ptr + 8)); + __m256i bgra3 = _mm256_loadu_si256((const __m256i*)(ptr + 12)); + + __m256i l02 = _mm256_permute2x128_si256(bgra0, bgra2, 0 + 2*16); + __m256i h02 = _mm256_permute2x128_si256(bgra0, bgra2, 1 + 3*16); + __m256i l13 = _mm256_permute2x128_si256(bgra1, bgra3, 0 + 2*16); + __m256i h13 = _mm256_permute2x128_si256(bgra1, bgra3, 1 + 3*16); + + __m256i b0 = _mm256_unpacklo_epi64(l02, l13); + __m256i g0 = _mm256_unpackhi_epi64(l02, l13); + __m256i r0 = _mm256_unpacklo_epi64(h02, h13); + __m256i a0 = _mm256_unpackhi_epi64(h02, h13); + + a = v_uint64x4(b0); + b = v_uint64x4(g0); + c = v_uint64x4(r0); + d = v_uint64x4(a0); +} + +///////////////////////////// store interleave ///////////////////////////////////// + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 32), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 32), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 32), xy1); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 16), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 16), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 16), xy1); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 8), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 8), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 8), xy1); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val); + __m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val); + + __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16); + __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, xy0); + _mm256_stream_si256((__m256i*)(ptr + 4), xy1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, xy0); + _mm256_store_si256((__m256i*)(ptr + 4), xy1); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, xy0); + _mm256_storeu_si256((__m256i*)(ptr + 4), xy1); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b, const v_uint8x32& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + const __m256i sh_b = _mm256_setr_epi8( + 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, + 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); + const __m256i sh_g = _mm256_setr_epi8( + 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, + 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); + const __m256i sh_r = _mm256_setr_epi8( + 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, + 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); + + __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b); + __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g); + __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r); + + const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, + 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, + 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + + __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); + __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1); + __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16); + __m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 32), bgr1); + _mm256_store_si256((__m256i*)(ptr + 64), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgr2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b, const v_uint16x16& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + const __m256i sh_b = _mm256_setr_epi8( + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, + 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m256i sh_g = _mm256_setr_epi8( + 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, + 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); + const __m256i sh_r = _mm256_setr_epi8( + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, + 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + + __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b); + __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g); + __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r); + + const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, + 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0); + const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, + -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0); + + __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1); + __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1); + __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p2, 0 + 2*16); + //__m256i bgr1 = p1; + __m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 16), p1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 16), p1); + _mm256_store_si256((__m256i*)(ptr + 32), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 16), p1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgr2); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b, const v_uint32x8& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i b0 = _mm256_shuffle_epi32(a.val, 0x6c); + __m256i g0 = _mm256_shuffle_epi32(b.val, 0xb1); + __m256i r0 = _mm256_shuffle_epi32(c.val, 0xc6); + + __m256i p0 = _mm256_blend_epi32(_mm256_blend_epi32(b0, g0, 0x92), r0, 0x24); + __m256i p1 = _mm256_blend_epi32(_mm256_blend_epi32(g0, r0, 0x92), b0, 0x24); + __m256i p2 = _mm256_blend_epi32(_mm256_blend_epi32(r0, b0, 0x92), g0, 0x24); + + __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + //__m256i bgr1 = p2; + __m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 8), p2); + _mm256_stream_si256((__m256i*)(ptr + 16), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 8), p2); + _mm256_store_si256((__m256i*)(ptr + 16), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 8), p2); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgr2); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i s01 = _mm256_unpacklo_epi64(a.val, b.val); + __m256i s12 = _mm256_unpackhi_epi64(b.val, c.val); + __m256i s20 = _mm256_blend_epi32(c.val, a.val, 0xcc); + + __m256i bgr0 = _mm256_permute2x128_si256(s01, s20, 0 + 2*16); + __m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f); + __m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgr0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgr1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgr0); + _mm256_store_si256((__m256i*)(ptr + 4), bgr1); + _mm256_store_si256((__m256i*)(ptr + 8), bgr2); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgr0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgr1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgr2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b, + const v_uint8x32& c, const v_uint8x32& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi8(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi8(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi8(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi8(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi16(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi16(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi16(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi16(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 64), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 96), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 32), bgra1); + _mm256_store_si256((__m256i*)(ptr + 64), bgra2); + _mm256_store_si256((__m256i*)(ptr + 96), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 64), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 96), bgra3); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b, + const v_uint16x16& c, const v_uint16x16& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi16(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi16(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi16(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi16(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi32(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi32(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi32(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi32(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 32), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 48), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 16), bgra1); + _mm256_store_si256((__m256i*)(ptr + 32), bgra2); + _mm256_store_si256((__m256i*)(ptr + 48), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 32), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 48), bgra3); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b, + const v_uint32x8& c, const v_uint32x8& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi32(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi32(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi32(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi32(c.val, d.val); + + __m256i bgra0_ = _mm256_unpacklo_epi64(bg0, ra0); + __m256i bgra1_ = _mm256_unpackhi_epi64(bg0, ra0); + __m256i bgra2_ = _mm256_unpacklo_epi64(bg1, ra1); + __m256i bgra3_ = _mm256_unpackhi_epi64(bg1, ra1); + + __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16); + __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16); + __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 16), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 24), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 8), bgra1); + _mm256_store_si256((__m256i*)(ptr + 16), bgra2); + _mm256_store_si256((__m256i*)(ptr + 24), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 16), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 24), bgra3); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b, + const v_uint64x4& c, const v_uint64x4& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m256i bg0 = _mm256_unpacklo_epi64(a.val, b.val); + __m256i bg1 = _mm256_unpackhi_epi64(a.val, b.val); + __m256i ra0 = _mm256_unpacklo_epi64(c.val, d.val); + __m256i ra1 = _mm256_unpackhi_epi64(c.val, d.val); + + __m256i bgra0 = _mm256_permute2x128_si256(bg0, ra0, 0 + 2*16); + __m256i bgra1 = _mm256_permute2x128_si256(bg1, ra1, 0 + 2*16); + __m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16); + __m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm256_stream_si256((__m256i*)ptr, bgra0); + _mm256_stream_si256((__m256i*)(ptr + 4), bgra1); + _mm256_stream_si256((__m256i*)(ptr + 8), bgra2); + _mm256_stream_si256((__m256i*)(ptr + 12), bgra3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm256_store_si256((__m256i*)ptr, bgra0); + _mm256_store_si256((__m256i*)(ptr + 4), bgra1); + _mm256_store_si256((__m256i*)(ptr + 8), bgra2); + _mm256_store_si256((__m256i*)(ptr + 12), bgra3); + } + else + { + _mm256_storeu_si256((__m256i*)ptr, bgra0); + _mm256_storeu_si256((__m256i*)(ptr + 4), bgra1); + _mm256_storeu_si256((__m256i*)(ptr + 8), bgra2); + _mm256_storeu_si256((__m256i*)(ptr + 12), bgra3); + } +} + +#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int16x16, short, s16, v_uint16x16, ushort, u16) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int32x8, int, s32, v_uint32x8, unsigned, u32) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, unsigned, u32) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64) +OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64) + +// +// FP16 +// + +inline v_float32x8 v256_load_expand(const float16_t* ptr) +{ +#if CV_FP16 + return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); +#else + float CV_DECL_ALIGNED(32) buf[8]; + for (int i = 0; i < 8; i++) + buf[i] = (float)ptr[i]; + return v256_load_aligned(buf); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x8& a) +{ +#if CV_FP16 + __m128i ah = _mm256_cvtps_ph(a.val, 0); + _mm_storeu_si128((__m128i*)ptr, ah); +#else + float CV_DECL_ALIGNED(32) buf[8]; + v_store_aligned(buf, a); + for (int i = 0; i < 8; i++) + ptr[i] = float16_t(buf[i]); +#endif +} + +// +// end of FP16 +// + +inline void v256_cleanup() { _mm256_zeroall(); } + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_AVX_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_avx512.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_avx512.hpp new file mode 100644 index 0000000..d20d6dd --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_avx512.hpp @@ -0,0 +1,3090 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_AVX512_HPP +#define OPENCV_HAL_INTRIN_AVX512_HPP + +#if defined(_MSC_VER) && (_MSC_VER < 1920/*MSVS2019*/) +# pragma warning(disable:4146) // unary minus operator applied to unsigned type, result still unsigned +# pragma warning(disable:4309) // 'argument': truncation of constant value +# pragma warning(disable:4310) // cast truncates constant value +#endif + +#define CVT_ROUND_MODES_IMPLEMENTED 0 + +#define CV_SIMD512 1 +#define CV_SIMD512_64F 1 +#define CV_SIMD512_FP16 0 // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1) + +#define _v512_set_epu64(a7, a6, a5, a4, a3, a2, a1, a0) _mm512_set_epi64((int64)(a7),(int64)(a6),(int64)(a5),(int64)(a4),(int64)(a3),(int64)(a2),(int64)(a1),(int64)(a0)) +#define _v512_set_epu32(a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _mm512_set_epi64(((int64)(a15)<<32)|(int64)(a14), ((int64)(a13)<<32)|(int64)(a12), ((int64)(a11)<<32)|(int64)(a10), ((int64)( a9)<<32)|(int64)( a8), \ + ((int64)( a7)<<32)|(int64)( a6), ((int64)( a5)<<32)|(int64)( a4), ((int64)( a3)<<32)|(int64)( a2), ((int64)( a1)<<32)|(int64)( a0)) +#define _v512_set_epu16(a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu32(((unsigned)(a31)<<16)|(unsigned)(a30), ((unsigned)(a29)<<16)|(unsigned)(a28), ((unsigned)(a27)<<16)|(unsigned)(a26), ((unsigned)(a25)<<16)|(unsigned)(a24), \ + ((unsigned)(a23)<<16)|(unsigned)(a22), ((unsigned)(a21)<<16)|(unsigned)(a20), ((unsigned)(a19)<<16)|(unsigned)(a18), ((unsigned)(a17)<<16)|(unsigned)(a16), \ + ((unsigned)(a15)<<16)|(unsigned)(a14), ((unsigned)(a13)<<16)|(unsigned)(a12), ((unsigned)(a11)<<16)|(unsigned)(a10), ((unsigned)( a9)<<16)|(unsigned)( a8), \ + ((unsigned)( a7)<<16)|(unsigned)( a6), ((unsigned)( a5)<<16)|(unsigned)( a4), ((unsigned)( a3)<<16)|(unsigned)( a2), ((unsigned)( a1)<<16)|(unsigned)( a0)) +#define _v512_set_epu8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \ + a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \ + a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu32(((unsigned)(a63)<<24)|((unsigned)(a62)<<16)|((unsigned)(a61)<<8)|(unsigned)(a60),((unsigned)(a59)<<24)|((unsigned)(a58)<<16)|((unsigned)(a57)<<8)|(unsigned)(a56), \ + ((unsigned)(a55)<<24)|((unsigned)(a54)<<16)|((unsigned)(a53)<<8)|(unsigned)(a52),((unsigned)(a51)<<24)|((unsigned)(a50)<<16)|((unsigned)(a49)<<8)|(unsigned)(a48), \ + ((unsigned)(a47)<<24)|((unsigned)(a46)<<16)|((unsigned)(a45)<<8)|(unsigned)(a44),((unsigned)(a43)<<24)|((unsigned)(a42)<<16)|((unsigned)(a41)<<8)|(unsigned)(a40), \ + ((unsigned)(a39)<<24)|((unsigned)(a38)<<16)|((unsigned)(a37)<<8)|(unsigned)(a36),((unsigned)(a35)<<24)|((unsigned)(a34)<<16)|((unsigned)(a33)<<8)|(unsigned)(a32), \ + ((unsigned)(a31)<<24)|((unsigned)(a30)<<16)|((unsigned)(a29)<<8)|(unsigned)(a28),((unsigned)(a27)<<24)|((unsigned)(a26)<<16)|((unsigned)(a25)<<8)|(unsigned)(a24), \ + ((unsigned)(a23)<<24)|((unsigned)(a22)<<16)|((unsigned)(a21)<<8)|(unsigned)(a20),((unsigned)(a19)<<24)|((unsigned)(a18)<<16)|((unsigned)(a17)<<8)|(unsigned)(a16), \ + ((unsigned)(a15)<<24)|((unsigned)(a14)<<16)|((unsigned)(a13)<<8)|(unsigned)(a12),((unsigned)(a11)<<24)|((unsigned)(a10)<<16)|((unsigned)( a9)<<8)|(unsigned)( a8), \ + ((unsigned)( a7)<<24)|((unsigned)( a6)<<16)|((unsigned)( a5)<<8)|(unsigned)( a4),((unsigned)( a3)<<24)|((unsigned)( a2)<<16)|((unsigned)( a1)<<8)|(unsigned)( a0)) +#define _v512_set_epi8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \ + a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \ + a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \ + a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0) \ + _v512_set_epu8((uchar)(a63), (uchar)(a62), (uchar)(a61), (uchar)(a60), (uchar)(a59), (uchar)(a58), (uchar)(a57), (uchar)(a56), \ + (uchar)(a55), (uchar)(a54), (uchar)(a53), (uchar)(a52), (uchar)(a51), (uchar)(a50), (uchar)(a49), (uchar)(a48), \ + (uchar)(a47), (uchar)(a46), (uchar)(a45), (uchar)(a44), (uchar)(a43), (uchar)(a42), (uchar)(a41), (uchar)(a40), \ + (uchar)(a39), (uchar)(a38), (uchar)(a37), (uchar)(a36), (uchar)(a35), (uchar)(a34), (uchar)(a33), (uchar)(a32), \ + (uchar)(a31), (uchar)(a30), (uchar)(a29), (uchar)(a28), (uchar)(a27), (uchar)(a26), (uchar)(a25), (uchar)(a24), \ + (uchar)(a23), (uchar)(a22), (uchar)(a21), (uchar)(a20), (uchar)(a19), (uchar)(a18), (uchar)(a17), (uchar)(a16), \ + (uchar)(a15), (uchar)(a14), (uchar)(a13), (uchar)(a12), (uchar)(a11), (uchar)(a10), (uchar)( a9), (uchar)( a8), \ + (uchar)( a7), (uchar)( a6), (uchar)( a5), (uchar)( a4), (uchar)( a3), (uchar)( a2), (uchar)( a1), (uchar)( a0)) + +#ifndef _mm512_cvtpd_pslo +#ifdef _mm512_zextsi256_si512 +#define _mm512_cvtpd_pslo(a) _mm512_zextps256_ps512(_mm512_cvtpd_ps(a)) +#else +//if preferred way to extend with zeros is unavailable +#define _mm512_cvtpd_pslo(a) _mm512_castps256_ps512(_mm512_cvtpd_ps(a)) +#endif +#endif +///////// Utils //////////// + +namespace +{ + +inline __m512i _v512_combine(const __m256i& lo, const __m256i& hi) +{ return _mm512_inserti32x8(_mm512_castsi256_si512(lo), hi, 1); } + +inline __m512 _v512_combine(const __m256& lo, const __m256& hi) +{ return _mm512_insertf32x8(_mm512_castps256_ps512(lo), hi, 1); } + +inline __m512d _v512_combine(const __m256d& lo, const __m256d& hi) +{ return _mm512_insertf64x4(_mm512_castpd256_pd512(lo), hi, 1); } + +inline int _v_cvtsi512_si32(const __m512i& a) +{ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); } + +inline __m256i _v512_extract_high(const __m512i& v) +{ return _mm512_extracti32x8_epi32(v, 1); } + +inline __m256 _v512_extract_high(const __m512& v) +{ return _mm512_extractf32x8_ps(v, 1); } + +inline __m256d _v512_extract_high(const __m512d& v) +{ return _mm512_extractf64x4_pd(v, 1); } + +inline __m256i _v512_extract_low(const __m512i& v) +{ return _mm512_castsi512_si256(v); } + +inline __m256 _v512_extract_low(const __m512& v) +{ return _mm512_castps512_ps256(v); } + +inline __m256d _v512_extract_low(const __m512d& v) +{ return _mm512_castpd512_pd256(v); } + +inline __m512i _v512_insert(const __m512i& a, const __m256i& b) +{ return _mm512_inserti32x8(a, b, 0); } + +inline __m512 _v512_insert(const __m512& a, const __m256& b) +{ return _mm512_insertf32x8(a, b, 0); } + +inline __m512d _v512_insert(const __m512d& a, const __m256d& b) +{ return _mm512_insertf64x4(a, b, 0); } + +} + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x64 +{ + typedef uchar lane_type; + enum { nlanes = 64 }; + __m512i val; + + explicit v_uint8x64(__m512i v) : val(v) {} + v_uint8x64(uchar v0, uchar v1, uchar v2, uchar v3, + uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, + uchar v12, uchar v13, uchar v14, uchar v15, + uchar v16, uchar v17, uchar v18, uchar v19, + uchar v20, uchar v21, uchar v22, uchar v23, + uchar v24, uchar v25, uchar v26, uchar v27, + uchar v28, uchar v29, uchar v30, uchar v31, + uchar v32, uchar v33, uchar v34, uchar v35, + uchar v36, uchar v37, uchar v38, uchar v39, + uchar v40, uchar v41, uchar v42, uchar v43, + uchar v44, uchar v45, uchar v46, uchar v47, + uchar v48, uchar v49, uchar v50, uchar v51, + uchar v52, uchar v53, uchar v54, uchar v55, + uchar v56, uchar v57, uchar v58, uchar v59, + uchar v60, uchar v61, uchar v62, uchar v63) + { + val = _v512_set_epu8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_uint8x64() {} + + static inline v_uint8x64 zero() { return v_uint8x64(_mm512_setzero_si512()); } + + uchar get0() const { return (uchar)_v_cvtsi512_si32(val); } +}; + +struct v_int8x64 +{ + typedef schar lane_type; + enum { nlanes = 64 }; + __m512i val; + + explicit v_int8x64(__m512i v) : val(v) {} + v_int8x64(schar v0, schar v1, schar v2, schar v3, + schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, + schar v12, schar v13, schar v14, schar v15, + schar v16, schar v17, schar v18, schar v19, + schar v20, schar v21, schar v22, schar v23, + schar v24, schar v25, schar v26, schar v27, + schar v28, schar v29, schar v30, schar v31, + schar v32, schar v33, schar v34, schar v35, + schar v36, schar v37, schar v38, schar v39, + schar v40, schar v41, schar v42, schar v43, + schar v44, schar v45, schar v46, schar v47, + schar v48, schar v49, schar v50, schar v51, + schar v52, schar v53, schar v54, schar v55, + schar v56, schar v57, schar v58, schar v59, + schar v60, schar v61, schar v62, schar v63) + { + val = _v512_set_epi8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_int8x64() {} + + static inline v_int8x64 zero() { return v_int8x64(_mm512_setzero_si512()); } + + schar get0() const { return (schar)_v_cvtsi512_si32(val); } +}; + +struct v_uint16x32 +{ + typedef ushort lane_type; + enum { nlanes = 32 }; + __m512i val; + + explicit v_uint16x32(__m512i v) : val(v) {} + v_uint16x32(ushort v0, ushort v1, ushort v2, ushort v3, + ushort v4, ushort v5, ushort v6, ushort v7, + ushort v8, ushort v9, ushort v10, ushort v11, + ushort v12, ushort v13, ushort v14, ushort v15, + ushort v16, ushort v17, ushort v18, ushort v19, + ushort v20, ushort v21, ushort v22, ushort v23, + ushort v24, ushort v25, ushort v26, ushort v27, + ushort v28, ushort v29, ushort v30, ushort v31) + { + val = _v512_set_epu16(v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0); + } + v_uint16x32() {} + + static inline v_uint16x32 zero() { return v_uint16x32(_mm512_setzero_si512()); } + + ushort get0() const { return (ushort)_v_cvtsi512_si32(val); } +}; + +struct v_int16x32 +{ + typedef short lane_type; + enum { nlanes = 32 }; + __m512i val; + + explicit v_int16x32(__m512i v) : val(v) {} + v_int16x32(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7, + short v8, short v9, short v10, short v11, short v12, short v13, short v14, short v15, + short v16, short v17, short v18, short v19, short v20, short v21, short v22, short v23, + short v24, short v25, short v26, short v27, short v28, short v29, short v30, short v31) + { + val = _v512_set_epu16((ushort)v31, (ushort)v30, (ushort)v29, (ushort)v28, (ushort)v27, (ushort)v26, (ushort)v25, (ushort)v24, + (ushort)v23, (ushort)v22, (ushort)v21, (ushort)v20, (ushort)v19, (ushort)v18, (ushort)v17, (ushort)v16, + (ushort)v15, (ushort)v14, (ushort)v13, (ushort)v12, (ushort)v11, (ushort)v10, (ushort)v9 , (ushort)v8, + (ushort)v7 , (ushort)v6 , (ushort)v5 , (ushort)v4 , (ushort)v3 , (ushort)v2 , (ushort)v1 , (ushort)v0); + } + v_int16x32() {} + + static inline v_int16x32 zero() { return v_int16x32(_mm512_setzero_si512()); } + + short get0() const { return (short)_v_cvtsi512_si32(val); } +}; + +struct v_uint32x16 +{ + typedef unsigned lane_type; + enum { nlanes = 16 }; + __m512i val; + + explicit v_uint32x16(__m512i v) : val(v) {} + v_uint32x16(unsigned v0, unsigned v1, unsigned v2, unsigned v3, + unsigned v4, unsigned v5, unsigned v6, unsigned v7, + unsigned v8, unsigned v9, unsigned v10, unsigned v11, + unsigned v12, unsigned v13, unsigned v14, unsigned v15) + { + val = _mm512_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3, (int)v4, (int)v5, (int)v6, (int)v7, + (int)v8, (int)v9, (int)v10, (int)v11, (int)v12, (int)v13, (int)v14, (int)v15); + } + v_uint32x16() {} + + static inline v_uint32x16 zero() { return v_uint32x16(_mm512_setzero_si512()); } + + unsigned get0() const { return (unsigned)_v_cvtsi512_si32(val); } +}; + +struct v_int32x16 +{ + typedef int lane_type; + enum { nlanes = 16 }; + __m512i val; + + explicit v_int32x16(__m512i v) : val(v) {} + v_int32x16(int v0, int v1, int v2, int v3, int v4, int v5, int v6, int v7, + int v8, int v9, int v10, int v11, int v12, int v13, int v14, int v15) + { + val = _mm512_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); + } + v_int32x16() {} + + static inline v_int32x16 zero() { return v_int32x16(_mm512_setzero_si512()); } + + int get0() const { return _v_cvtsi512_si32(val); } +}; + +struct v_float32x16 +{ + typedef float lane_type; + enum { nlanes = 16 }; + __m512 val; + + explicit v_float32x16(__m512 v) : val(v) {} + v_float32x16(float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, + float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15) + { + val = _mm512_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); + } + v_float32x16() {} + + static inline v_float32x16 zero() { return v_float32x16(_mm512_setzero_ps()); } + + float get0() const { return _mm_cvtss_f32(_mm512_castps512_ps128(val)); } +}; + +struct v_uint64x8 +{ + typedef uint64 lane_type; + enum { nlanes = 8 }; + __m512i val; + + explicit v_uint64x8(__m512i v) : val(v) {} + v_uint64x8(uint64 v0, uint64 v1, uint64 v2, uint64 v3, uint64 v4, uint64 v5, uint64 v6, uint64 v7) + { val = _mm512_setr_epi64((int64)v0, (int64)v1, (int64)v2, (int64)v3, (int64)v4, (int64)v5, (int64)v6, (int64)v7); } + v_uint64x8() {} + + static inline v_uint64x8 zero() { return v_uint64x8(_mm512_setzero_si512()); } + + uint64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (uint64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val)); + int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32))); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #endif + } +}; + +struct v_int64x8 +{ + typedef int64 lane_type; + enum { nlanes = 8 }; + __m512i val; + + explicit v_int64x8(__m512i v) : val(v) {} + v_int64x8(int64 v0, int64 v1, int64 v2, int64 v3, int64 v4, int64 v5, int64 v6, int64 v7) + { val = _mm512_setr_epi64(v0, v1, v2, v3, v4, v5, v6, v7); } + v_int64x8() {} + + static inline v_int64x8 zero() { return v_int64x8(_mm512_setzero_si512()); } + + int64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (int64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val)); + int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32))); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #endif + } +}; + +struct v_float64x8 +{ + typedef double lane_type; + enum { nlanes = 8 }; + __m512d val; + + explicit v_float64x8(__m512d v) : val(v) {} + v_float64x8(double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7) + { val = _mm512_setr_pd(v0, v1, v2, v3, v4, v5, v6, v7); } + v_float64x8() {} + + static inline v_float64x8 zero() { return v_float64x8(_mm512_setzero_pd()); } + + double get0() const { return _mm_cvtsd_f64(_mm512_castpd512_pd128(val)); } +}; + +//////////////// Load and store operations /////////////// + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE(_Tpvec, _Tp) \ + inline _Tpvec v512_load(const _Tp* ptr) \ + { return _Tpvec(_mm512_loadu_si512((const __m512i*)ptr)); } \ + inline _Tpvec v512_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm512_load_si512((const __m512i*)ptr)); } \ + inline _Tpvec v512_load_low(const _Tp* ptr) \ + { \ + __m256i v256 = _mm256_loadu_si256((const __m256i*)ptr); \ + return _Tpvec(_mm512_castsi256_si512(v256)); \ + } \ + inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + __m256i vlo = _mm256_loadu_si256((const __m256i*)ptr0); \ + __m256i vhi = _mm256_loadu_si256((const __m256i*)ptr1); \ + return _Tpvec(_v512_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm512_storeu_si512((__m512i*)ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm512_store_si512((__m512i*)ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm512_stream_si512((__m512i*)ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm512_storeu_si512((__m512i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm512_stream_si512((__m512i*)ptr, a.val); \ + else \ + _mm512_store_si512((__m512i*)ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint8x64, uchar) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int8x64, schar) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint16x32, ushort) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int16x32, short) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint32x16, unsigned) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int32x16, int) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint64x8, uint64) +OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int64x8, int64) + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg) \ + inline _Tpvec v512_load(const _Tp* ptr) \ + { return _Tpvec(_mm512_loadu_##suffix(ptr)); } \ + inline _Tpvec v512_load_aligned(const _Tp* ptr) \ + { return _Tpvec(_mm512_load_##suffix(ptr)); } \ + inline _Tpvec v512_load_low(const _Tp* ptr) \ + { \ + return _Tpvec(_mm512_cast##suffix##256_##suffix##512 \ + (_mm256_loadu_##suffix(ptr))); \ + } \ + inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ + { \ + halfreg vlo = _mm256_loadu_##suffix(ptr0); \ + halfreg vhi = _mm256_loadu_##suffix(ptr1); \ + return _Tpvec(_v512_combine(vlo, vhi)); \ + } \ + inline void v_store(_Tp* ptr, const _Tpvec& a) \ + { _mm512_storeu_##suffix(ptr, a.val); } \ + inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ + { _mm512_store_##suffix(ptr, a.val); } \ + inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ + { _mm512_stream_##suffix(ptr, a.val); } \ + inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ + { \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm512_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm512_stream_##suffix(ptr, a.val); \ + else \ + _mm512_store_##suffix(ptr, a.val); \ + } \ + inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, _v512_extract_low(a.val)); } \ + inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ + { _mm256_storeu_##suffix(ptr, _v512_extract_high(a.val)); } + +OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float32x16, float, ps, __m256) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float64x8, double, pd, __m256d) + +#define OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, _Tpvecf, suffix, cast) \ + inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a) \ + { return _Tpvec(cast(a.val)); } + +#define OPENCV_HAL_IMPL_AVX512_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s) \ + inline _Tpvec v512_setzero_##suffix() \ + { return _Tpvec(_mm512_setzero_si512()); } \ + inline _Tpvec v512_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm512_set1_##ssuffix((ctype_s)v)); } \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8, suffix, OPENCV_HAL_NOP) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float32x16, suffix, _mm512_castps_si512) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float64x8, suffix, _mm512_castpd_si512) + +OPENCV_HAL_IMPL_AVX512_INIT(v_uint8x64, uchar, u8, epi8, char) +OPENCV_HAL_IMPL_AVX512_INIT(v_int8x64, schar, s8, epi8, char) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint16x32, ushort, u16, epi16, short) +OPENCV_HAL_IMPL_AVX512_INIT(v_int16x32, short, s16, epi16, short) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint32x16, unsigned, u32, epi32, int) +OPENCV_HAL_IMPL_AVX512_INIT(v_int32x16, int, s32, epi32, int) +OPENCV_HAL_IMPL_AVX512_INIT(v_uint64x8, uint64, u64, epi64, int64) +OPENCV_HAL_IMPL_AVX512_INIT(v_int64x8, int64, s64, epi64, int64) + +#define OPENCV_HAL_IMPL_AVX512_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \ + inline _Tpvec v512_setzero_##suffix() \ + { return _Tpvec(_mm512_setzero_##zsuffix()); } \ + inline _Tpvec v512_setall_##suffix(_Tp v) \ + { return _Tpvec(_mm512_set1_##zsuffix(v)); } \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8, suffix, cast) \ + OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8, suffix, cast) + +OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float32x16, float, f32, ps, _mm512_castsi512_ps) +OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float64x8, double, f64, pd, _mm512_castsi512_pd) + +inline v_float32x16 v_reinterpret_as_f32(const v_float32x16& a) +{ return a; } +inline v_float32x16 v_reinterpret_as_f32(const v_float64x8& a) +{ return v_float32x16(_mm512_castpd_ps(a.val)); } + +inline v_float64x8 v_reinterpret_as_f64(const v_float64x8& a) +{ return a; } +inline v_float64x8 v_reinterpret_as_f64(const v_float32x16& a) +{ return v_float64x8(_mm512_castps_pd(a.val)); } + +// FP16 +inline v_float32x16 v512_load_expand(const float16_t* ptr) +{ + return v_float32x16(_mm512_cvtph_ps(_mm256_loadu_si256((const __m256i*)ptr))); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x16& a) +{ + __m256i ah = _mm512_cvtps_ph(a.val, 0); + _mm256_storeu_si256((__m256i*)ptr, ah); +} + +/* Recombine & ZIP */ +inline void v_zip(const v_int8x64& a, const v_int8x64& b, v_int8x64& ab0, v_int8x64& ab1) +{ +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8( 95, 31, 94, 30, 93, 29, 92, 28, 91, 27, 90, 26, 89, 25, 88, 24, + 87, 23, 86, 22, 85, 21, 84, 20, 83, 19, 82, 18, 81, 17, 80, 16, + 79, 15, 78, 14, 77, 13, 76, 12, 75, 11, 74, 10, 73, 9, 72, 8, + 71, 7, 70, 6, 69, 5, 68, 4, 67, 3, 66, 2, 65, 1, 64, 0); + ab0 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu8(127, 63, 126, 62, 125, 61, 124, 60, 123, 59, 122, 58, 121, 57, 120, 56, + 119, 55, 118, 54, 117, 53, 116, 52, 115, 51, 114, 50, 113, 49, 112, 48, + 111, 47, 110, 46, 109, 45, 108, 44, 107, 43, 106, 42, 105, 41, 104, 40, + 103, 39, 102, 38, 101, 37, 100, 36, 99, 35, 98, 34, 97, 33, 96, 32); + ab1 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask1, b.val)); +#else + __m512i low = _mm512_unpacklo_epi8(a.val, b.val); + __m512i high = _mm512_unpackhi_epi8(a.val, b.val); + ab0 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(11, 10, 3, 2, 9, 8, 1, 0), high)); + ab1 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(15, 14, 7, 6, 13, 12, 5, 4), high)); +#endif +} +inline void v_zip(const v_int16x32& a, const v_int16x32& b, v_int16x32& ab0, v_int16x32& ab1) +{ + __m512i mask0 = _v512_set_epu16(47, 15, 46, 14, 45, 13, 44, 12, 43, 11, 42, 10, 41, 9, 40, 8, + 39, 7, 38, 6, 37, 5, 36, 4, 35, 3, 34, 2, 33, 1, 32, 0); + ab0 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu16(63, 31, 62, 30, 61, 29, 60, 28, 59, 27, 58, 26, 57, 25, 56, 24, + 55, 23, 54, 22, 53, 21, 52, 20, 51, 19, 50, 18, 49, 17, 48, 16); + ab1 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask1, b.val)); +} +inline void v_zip(const v_int32x16& a, const v_int32x16& b, v_int32x16& ab0, v_int32x16& ab1) +{ + __m512i mask0 = _v512_set_epu32(23, 7, 22, 6, 21, 5, 20, 4, 19, 3, 18, 2, 17, 1, 16, 0); + ab0 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu32(31, 15, 30, 14, 29, 13, 28, 12, 27, 11, 26, 10, 25, 9, 24, 8); + ab1 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask1, b.val)); +} +inline void v_zip(const v_int64x8& a, const v_int64x8& b, v_int64x8& ab0, v_int64x8& ab1) +{ + __m512i mask0 = _v512_set_epu64(11, 3, 10, 2, 9, 1, 8, 0); + ab0 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask0, b.val)); + __m512i mask1 = _v512_set_epu64(15, 7, 14, 6, 13, 5, 12, 4); + ab1 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask1, b.val)); +} + +inline void v_zip(const v_uint8x64& a, const v_uint8x64& b, v_uint8x64& ab0, v_uint8x64& ab1) +{ + v_int8x64 i0, i1; + v_zip(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b), i0, i1); + ab0 = v_reinterpret_as_u8(i0); + ab1 = v_reinterpret_as_u8(i1); +} +inline void v_zip(const v_uint16x32& a, const v_uint16x32& b, v_uint16x32& ab0, v_uint16x32& ab1) +{ + v_int16x32 i0, i1; + v_zip(v_reinterpret_as_s16(a), v_reinterpret_as_s16(b), i0, i1); + ab0 = v_reinterpret_as_u16(i0); + ab1 = v_reinterpret_as_u16(i1); +} +inline void v_zip(const v_uint32x16& a, const v_uint32x16& b, v_uint32x16& ab0, v_uint32x16& ab1) +{ + v_int32x16 i0, i1; + v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1); + ab0 = v_reinterpret_as_u32(i0); + ab1 = v_reinterpret_as_u32(i1); +} +inline void v_zip(const v_uint64x8& a, const v_uint64x8& b, v_uint64x8& ab0, v_uint64x8& ab1) +{ + v_int64x8 i0, i1; + v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1); + ab0 = v_reinterpret_as_u64(i0); + ab1 = v_reinterpret_as_u64(i1); +} +inline void v_zip(const v_float32x16& a, const v_float32x16& b, v_float32x16& ab0, v_float32x16& ab1) +{ + v_int32x16 i0, i1; + v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1); + ab0 = v_reinterpret_as_f32(i0); + ab1 = v_reinterpret_as_f32(i1); +} +inline void v_zip(const v_float64x8& a, const v_float64x8& b, v_float64x8& ab0, v_float64x8& ab1) +{ + v_int64x8 i0, i1; + v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1); + ab0 = v_reinterpret_as_f64(i0); + ab1 = v_reinterpret_as_f64(i1); +} + +#define OPENCV_HAL_IMPL_AVX512_COMBINE(_Tpvec, suffix) \ + inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_v512_combine(_v512_extract_low(a.val), _v512_extract_low(b.val))); } \ + inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_v512_insert(b.val, _v512_extract_high(a.val))); } \ + inline void v_recombine(const _Tpvec& a, const _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ + { \ + c.val = _v512_combine(_v512_extract_low(a.val),_v512_extract_low(b.val)); \ + d.val = _v512_insert(b.val,_v512_extract_high(a.val)); \ + } + + +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint8x64, epi8) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int8x64, epi8) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint16x32, epi16) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int16x32, epi16) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint64x8, epi64) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_int64x8, epi64) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_COMBINE(v_float64x8, pd) + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_AVX512_BIN_FUNC(func, _Tpvec, intrin) \ + inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint8x64, _mm512_add_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int8x64, _mm512_add_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint16x32, _mm512_add_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int16x32, _mm512_add_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint8x64, _mm512_sub_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int8x64, _mm512_sub_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint16x32, _mm512_sub_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int16x32, _mm512_sub_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_uint16x32, _mm512_mullo_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_int16x32, _mm512_mullo_epi16) + +inline v_uint8x64 v_mul_wrap(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i ad = _mm512_srai_epi16(a.val, 8); + __m512i bd = _mm512_srai_epi16(b.val, 8); + __m512i p0 = _mm512_mullo_epi16(a.val, b.val); // even + __m512i p1 = _mm512_slli_epi16(_mm512_mullo_epi16(ad, bd), 8); // odd + return v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, p0, p1)); +} +inline v_int8x64 v_mul_wrap(const v_int8x64& a, const v_int8x64& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +#define OPENCV_HAL_IMPL_AVX512_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(intrin(a.val, b.val)); } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint32x16, _mm512_add_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint32x16, _mm512_sub_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int32x16, _mm512_add_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int32x16, _mm512_sub_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint64x8, _mm512_add_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint64x8, _mm512_sub_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int64x8, _mm512_add_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int64x8, _mm512_sub_epi64) + +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint32x16, _mm512_mullo_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int32x16, _mm512_mullo_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint64x8, _mm512_mullo_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int64x8, _mm512_mullo_epi64) + +/** Saturating arithmetics **/ +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint8x64, _mm512_adds_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint8x64, _mm512_subs_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int8x64, _mm512_adds_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int8x64, _mm512_subs_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint16x32, _mm512_adds_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint16x32, _mm512_subs_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int16x32, _mm512_adds_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int16x32, _mm512_subs_epi16) + +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float32x16, _mm512_add_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float32x16, _mm512_sub_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float32x16, _mm512_mul_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float32x16, _mm512_div_ps) +OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float64x8, _mm512_add_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float64x8, _mm512_sub_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float64x8, _mm512_mul_pd) +OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float64x8, _mm512_div_pd) + +// saturating multiply +inline v_uint8x64 operator * (const v_uint8x64& a, const v_uint8x64& b) +{ + v_uint16x32 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_int8x64 operator * (const v_int8x64& a, const v_int8x64& b) +{ + v_int16x32 c, d; + v_mul_expand(a, b, c, d); + return v_pack(c, d); +} +inline v_uint16x32 operator * (const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i pl = _mm512_mullo_epi16(a.val, b.val); + __m512i ph = _mm512_mulhi_epu16(a.val, b.val); + __m512i p0 = _mm512_unpacklo_epi16(pl, ph); + __m512i p1 = _mm512_unpackhi_epi16(pl, ph); + + const __m512i m = _mm512_set1_epi32(65535); + return v_uint16x32(_mm512_packus_epi32(_mm512_min_epu32(p0, m), _mm512_min_epu32(p1, m))); +} +inline v_int16x32 operator * (const v_int16x32& a, const v_int16x32& b) +{ + __m512i pl = _mm512_mullo_epi16(a.val, b.val); + __m512i ph = _mm512_mulhi_epi16(a.val, b.val); + __m512i p0 = _mm512_unpacklo_epi16(pl, ph); + __m512i p1 = _mm512_unpackhi_epi16(pl, ph); + return v_int16x32(_mm512_packs_epi32(p0, p1)); +} + +inline v_uint8x64& operator *= (v_uint8x64& a, const v_uint8x64& b) +{ a = a * b; return a; } +inline v_int8x64& operator *= (v_int8x64& a, const v_int8x64& b) +{ a = a * b; return a; } +inline v_uint16x32& operator *= (v_uint16x32& a, const v_uint16x32& b) +{ a = a * b; return a; } +inline v_int16x32& operator *= (v_int16x32& a, const v_int16x32& b) +{ a = a * b; return a; } + +inline v_int16x32 v_mul_hi(const v_int16x32& a, const v_int16x32& b) { return v_int16x32(_mm512_mulhi_epi16(a.val, b.val)); } +inline v_uint16x32 v_mul_hi(const v_uint16x32& a, const v_uint16x32& b) { return v_uint16x32(_mm512_mulhi_epu16(a.val, b.val)); } + +// Multiply and expand +inline void v_mul_expand(const v_uint8x64& a, const v_uint8x64& b, + v_uint16x32& c, v_uint16x32& d) +{ + v_uint16x32 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x64& a, const v_int8x64& b, + v_int16x32& c, v_int16x32& d) +{ + v_int16x32 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x32& a, const v_int16x32& b, + v_int32x16& c, v_int32x16& d) +{ + v_int16x32 v0, v1; + v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1); + + c = v_reinterpret_as_s32(v0); + d = v_reinterpret_as_s32(v1); +} + +inline void v_mul_expand(const v_uint16x32& a, const v_uint16x32& b, + v_uint32x16& c, v_uint32x16& d) +{ + v_uint16x32 v0, v1; + v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1); + + c = v_reinterpret_as_u32(v0); + d = v_reinterpret_as_u32(v1); +} + +inline void v_mul_expand(const v_uint32x16& a, const v_uint32x16& b, + v_uint64x8& c, v_uint64x8& d) +{ + v_zip(v_uint64x8(_mm512_mul_epu32(a.val, b.val)), + v_uint64x8(_mm512_mul_epu32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d); +} + +inline void v_mul_expand(const v_int32x16& a, const v_int32x16& b, + v_int64x8& c, v_int64x8& d) +{ + v_zip(v_int64x8(_mm512_mul_epi32(a.val, b.val)), + v_int64x8(_mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d); +} + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_AVX512_SHIFT_OP(_Tpuvec, _Tpsvec, suffix) \ + inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); } \ + inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ + { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); } \ + inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ + { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shl(const _Tpuvec& a) \ + { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shl(const _Tpsvec& a) \ + { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); } \ + template \ + inline _Tpuvec v_shr(const _Tpuvec& a) \ + { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); } \ + template \ + inline _Tpsvec v_shr(const _Tpsvec& a) \ + { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); } + +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint16x32, v_int16x32, epi16) +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint32x16, v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint64x8, v_int64x8, epi64) + + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_AVX512_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(&, _Tpvec, _mm512_and_##suffix) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(|, _Tpvec, _mm512_or_##suffix) \ + OPENCV_HAL_IMPL_AVX512_BIN_OP(^, _Tpvec, _mm512_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { return _Tpvec(_mm512_xor_##suffix(a.val, not_const)); } + +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint8x64, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int8x64, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint16x32, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int16x32, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint32x16, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int32x16, si512, _mm512_set1_epi32(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint64x8, si512, _mm512_set1_epi64(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int64x8, si512, _mm512_set1_epi64(-1)) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float32x16, ps, _mm512_castsi512_ps(_mm512_set1_epi32(-1))) +OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float64x8, pd, _mm512_castsi512_pd(_mm512_set1_epi32(-1))) + +/** Select **/ +#define OPENCV_HAL_IMPL_AVX512_SELECT(_Tpvec, suffix, zsuf) \ + inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_mask_blend_##suffix(_mm512_cmp_##suffix##_mask(mask.val, _mm512_setzero_##zsuf(), _MM_CMPINT_EQ), a.val, b.val)); } + +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint8x64, epi8, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int8x64, epi8, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint16x32, epi16, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int16x32, epi16, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint32x16, epi32, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int32x16, epi32, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_uint64x8, epi64, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_int64x8, epi64, si512) +OPENCV_HAL_IMPL_AVX512_SELECT(v_float32x16, ps, ps) +OPENCV_HAL_IMPL_AVX512_SELECT(v_float64x8, pd, pd) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_AVX512_CMP_INT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval)); } + +#define OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(_Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(==, _MM_CMPINT_EQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(!=, _MM_CMPINT_NE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(<, _MM_CMPINT_LT, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(>, _MM_CMPINT_NLE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(<=, _MM_CMPINT_LE, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_INT(>=, _MM_CMPINT_NLT, _Tpvec, sufcmp, sufset, tval) + +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint8x64, epu8, epi8, (char)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int8x64, epi8, epi8, (char)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint16x32, epu16, epi16, (short)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int16x32, epi16, epi16, (short)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint32x16, epu32, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int32x16, epi32, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint64x8, epu64, epi64, (int64)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int64x8, epi64, epi64, (int64)-1) + +#define OPENCV_HAL_IMPL_AVX512_CMP_FLT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { return _Tpvec(_mm512_castsi512_##sufcmp(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval))); } + +#define OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(_Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(==, _CMP_EQ_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(<, _CMP_LT_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(>, _CMP_GT_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(<=, _CMP_LE_OQ, _Tpvec, sufcmp, sufset, tval) \ + OPENCV_HAL_IMPL_AVX512_CMP_FLT(>=, _CMP_GE_OQ, _Tpvec, sufcmp, sufset, tval) + +OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float32x16, ps, epi32, (int)-1) +OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float64x8, pd, epi64, (int64)-1) + +inline v_float32x16 v_not_nan(const v_float32x16& a) +{ return v_float32x16(_mm512_castsi512_ps(_mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a.val, a.val, _CMP_ORD_Q), (int)-1))); } +inline v_float64x8 v_not_nan(const v_float64x8& a) +{ return v_float64x8(_mm512_castsi512_pd(_mm512_maskz_set1_epi64(_mm512_cmp_pd_mask(a.val, a.val, _CMP_ORD_Q), (int64)-1))); } + +/** min/max **/ +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint8x64, _mm512_min_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint8x64, _mm512_max_epu8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int8x64, _mm512_min_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int8x64, _mm512_max_epi8) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint16x32, _mm512_min_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint16x32, _mm512_max_epu16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int16x32, _mm512_min_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int16x32, _mm512_max_epi16) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint32x16, _mm512_min_epu32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint32x16, _mm512_max_epu32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int32x16, _mm512_min_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int32x16, _mm512_max_epi32) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint64x8, _mm512_min_epu64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint64x8, _mm512_max_epu64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int64x8, _mm512_min_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int64x8, _mm512_max_epi64) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float32x16, _mm512_min_ps) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float32x16, _mm512_max_ps) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float64x8, _mm512_min_pd) +OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float64x8, _mm512_max_pd) + +/** Rotate **/ +namespace { + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32 ), imm4 *8), + _mm512_slli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32 + 1), (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, 15), imm4 *8), + _mm512_slli_epi32( b.val, (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { + return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16), imm4 *8), + _mm512_slli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 15), (4-imm4)*8))); + }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { return v_int8x64(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, 15), imm4*8)); }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b) + { return v_int8x64(_mm512_alignr_epi32(b.val, a.val, imm32)); }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64&) { return a; }}; + template + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) + { return v_int8x64(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16)); }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) { return b; }}; + template<> + struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }}; +} +template inline v_int8x64 v_rotate_right(const v_int8x64& a, const v_int8x64& b) +{ + return imm >= 128 ? v_int8x64() : +#if CV_AVX_512VBMI + v_int8x64(_mm512_permutex2var_epi8(a.val, + _v512_set_epu8(0x3f + imm, 0x3e + imm, 0x3d + imm, 0x3c + imm, 0x3b + imm, 0x3a + imm, 0x39 + imm, 0x38 + imm, + 0x37 + imm, 0x36 + imm, 0x35 + imm, 0x34 + imm, 0x33 + imm, 0x32 + imm, 0x31 + imm, 0x30 + imm, + 0x2f + imm, 0x2e + imm, 0x2d + imm, 0x2c + imm, 0x2b + imm, 0x2a + imm, 0x29 + imm, 0x28 + imm, + 0x27 + imm, 0x26 + imm, 0x25 + imm, 0x24 + imm, 0x23 + imm, 0x22 + imm, 0x21 + imm, 0x20 + imm, + 0x1f + imm, 0x1e + imm, 0x1d + imm, 0x1c + imm, 0x1b + imm, 0x1a + imm, 0x19 + imm, 0x18 + imm, + 0x17 + imm, 0x16 + imm, 0x15 + imm, 0x14 + imm, 0x13 + imm, 0x12 + imm, 0x11 + imm, 0x10 + imm, + 0x0f + imm, 0x0e + imm, 0x0d + imm, 0x0c + imm, 0x0b + imm, 0x0a + imm, 0x09 + imm, 0x08 + imm, + 0x07 + imm, 0x06 + imm, 0x05 + imm, 0x04 + imm, 0x03 + imm, 0x02 + imm, 0x01 + imm, 0x00 + imm), b.val)); +#else + _v_rotate_right 15), imm/4>::eval(a, b); +#endif +} +template +inline v_int8x64 v_rotate_left(const v_int8x64& a, const v_int8x64& b) +{ + if (imm == 0) return a; + if (imm == 64) return b; + if (imm >= 128) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_permutex2var_epi8(b.val, + _v512_set_epi8(0x7f - imm,0x7e - imm,0x7d - imm,0x7c - imm,0x7b - imm,0x7a - imm,0x79 - imm,0x78 - imm, + 0x77 - imm,0x76 - imm,0x75 - imm,0x74 - imm,0x73 - imm,0x72 - imm,0x71 - imm,0x70 - imm, + 0x6f - imm,0x6e - imm,0x6d - imm,0x6c - imm,0x6b - imm,0x6a - imm,0x69 - imm,0x68 - imm, + 0x67 - imm,0x66 - imm,0x65 - imm,0x64 - imm,0x63 - imm,0x62 - imm,0x61 - imm,0x60 - imm, + 0x5f - imm,0x5e - imm,0x5d - imm,0x5c - imm,0x5b - imm,0x5a - imm,0x59 - imm,0x58 - imm, + 0x57 - imm,0x56 - imm,0x55 - imm,0x54 - imm,0x53 - imm,0x52 - imm,0x51 - imm,0x50 - imm, + 0x4f - imm,0x4e - imm,0x4d - imm,0x4c - imm,0x4b - imm,0x4a - imm,0x49 - imm,0x48 - imm, + 0x47 - imm,0x46 - imm,0x45 - imm,0x44 - imm,0x43 - imm,0x42 - imm,0x41 - imm,0x40 - imm), a.val)); +#else + return imm < 64 ? v_rotate_right<64 - imm>(b, a) : v_rotate_right<128 - imm>(v512_setzero_s8(), b); +#endif +} +template +inline v_int8x64 v_rotate_right(const v_int8x64& a) +{ + if (imm == 0) return a; + if (imm >= 64) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF >> imm, + _v512_set_epu8(0x3f + imm,0x3e + imm,0x3d + imm,0x3c + imm,0x3b + imm,0x3a + imm,0x39 + imm,0x38 + imm, + 0x37 + imm,0x36 + imm,0x35 + imm,0x34 + imm,0x33 + imm,0x32 + imm,0x31 + imm,0x30 + imm, + 0x2f + imm,0x2e + imm,0x2d + imm,0x2c + imm,0x2b + imm,0x2a + imm,0x29 + imm,0x28 + imm, + 0x27 + imm,0x26 + imm,0x25 + imm,0x24 + imm,0x23 + imm,0x22 + imm,0x21 + imm,0x20 + imm, + 0x1f + imm,0x1e + imm,0x1d + imm,0x1c + imm,0x1b + imm,0x1a + imm,0x19 + imm,0x18 + imm, + 0x17 + imm,0x16 + imm,0x15 + imm,0x14 + imm,0x13 + imm,0x12 + imm,0x11 + imm,0x10 + imm, + 0x0f + imm,0x0e + imm,0x0d + imm,0x0c + imm,0x0b + imm,0x0a + imm,0x09 + imm,0x08 + imm, + 0x07 + imm,0x06 + imm,0x05 + imm,0x04 + imm,0x03 + imm,0x02 + imm,0x01 + imm,0x00 + imm), a.val)); +#else + return v_rotate_right(a, v512_setzero_s8()); +#endif +} +template +inline v_int8x64 v_rotate_left(const v_int8x64& a) +{ + if (imm == 0) return a; + if (imm >= 64) return v_int8x64(); +#if CV_AVX_512VBMI + return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF << imm, + _v512_set_epi8(0x3f - imm,0x3e - imm,0x3d - imm,0x3c - imm,0x3b - imm,0x3a - imm,0x39 - imm,0x38 - imm, + 0x37 - imm,0x36 - imm,0x35 - imm,0x34 - imm,0x33 - imm,0x32 - imm,0x31 - imm,0x30 - imm, + 0x2f - imm,0x2e - imm,0x2d - imm,0x2c - imm,0x2b - imm,0x2a - imm,0x29 - imm,0x28 - imm, + 0x27 - imm,0x26 - imm,0x25 - imm,0x24 - imm,0x23 - imm,0x22 - imm,0x21 - imm,0x20 - imm, + 0x1f - imm,0x1e - imm,0x1d - imm,0x1c - imm,0x1b - imm,0x1a - imm,0x19 - imm,0x18 - imm, + 0x17 - imm,0x16 - imm,0x15 - imm,0x14 - imm,0x13 - imm,0x12 - imm,0x11 - imm,0x10 - imm, + 0x0f - imm,0x0e - imm,0x0d - imm,0x0c - imm,0x0b - imm,0x0a - imm,0x09 - imm,0x08 - imm, + 0x07 - imm,0x06 - imm,0x05 - imm,0x04 - imm,0x03 - imm,0x02 - imm,0x01 - imm,0x00 - imm), a.val)); +#else + return v_rotate_right<64 - imm>(v512_setzero_s8(), a); +#endif +} + +#define OPENCV_HAL_IMPL_AVX512_ROTATE_PM(_Tpvec, suffix) \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ return v_reinterpret_as_##suffix(v_rotate_left(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ return v_reinterpret_as_##suffix(v_rotate_right(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ return v_reinterpret_as_##suffix(v_rotate_left(v_reinterpret_as_s8(a))); } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ return v_reinterpret_as_##suffix(v_rotate_right(v_reinterpret_as_s8(a))); } + +#define OPENCV_HAL_IMPL_AVX512_ROTATE_EC(_Tpvec, suffix) \ +template \ +inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + enum { SHIFT2 = (_Tpvec::nlanes - imm) }; \ + enum { MASK = ((1 << _Tpvec::nlanes) - 1) }; \ + if (imm == 0) return a; \ + if (imm == _Tpvec::nlanes) return b; \ + if (imm >= 2*_Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << SHIFT2)&MASK, b.val), (MASK << (imm))&MASK, a.val)); \ +} \ +template \ +inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + enum { SHIFT2 = (_Tpvec::nlanes - imm) }; \ + enum { MASK = ((1 << _Tpvec::nlanes) - 1) }; \ + if (imm == 0) return a; \ + if (imm == _Tpvec::nlanes) return b; \ + if (imm >= 2*_Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << (imm))&MASK, a.val), (MASK << SHIFT2)&MASK, b.val)); \ +} \ +template \ +inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + if (imm == 0) return a; \ + if (imm >= _Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_maskz_expand_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val)); \ +} \ +template \ +inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + if (imm == 0) return a; \ + if (imm >= _Tpvec::nlanes) return _Tpvec::zero(); \ + return _Tpvec(_mm512_maskz_compress_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val)); \ +} + +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint8x64, u8) +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint16x32, u16) +OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_int16x32, s16) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint64x8, epi64) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int64x8, epi64) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float64x8, pd) + +/** Reverse **/ +inline v_uint8x64 v_reverse(const v_uint8x64 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f, + 0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f, + 0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f); + return v_uint8x64(_mm512_permutexvar_epi8(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint8x64(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int8x64 v_reverse(const v_int8x64 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x32 v_reverse(const v_uint16x32 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00000001, 0x00020003, 0x00040005, 0x00060007, + 0x00080009, 0x000a000b, 0x000c000d, 0x000e000f, + 0x00100011, 0x00120013, 0x00140015, 0x00160017, + 0x00180019, 0x001a001b, 0x001c001d, 0x001e001f); + return v_uint16x32(_mm512_permutexvar_epi16(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint16x32(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int16x32 v_reverse(const v_int16x32 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x16 v_reverse(const v_uint32x16 &a) +{ + static const __m512i perm = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15); + return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val)); +} + +inline v_int32x16 v_reverse(const v_int32x16 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x16 v_reverse(const v_float32x16 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x8 v_reverse(const v_uint64x8 &a) +{ + static const __m512i perm = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + return v_uint64x8(_mm512_permutexvar_epi64(perm, a.val)); +} + +inline v_int64x8 v_reverse(const v_int64x8 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x8 v_reverse(const v_float64x8 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +////////// Reduce ///////// + +/** Reduce **/ +#define OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64(a, b) a + b +#define OPENCV_HAL_IMPL_AVX512_REDUCE_8(sctype, func, _Tpvec, ifunc, scop) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + sctype CV_DECL_ALIGNED(64) idx[2]; \ + _mm_store_si128((__m128i*)idx, _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1))); \ + return scop(idx[0], idx[1]); } +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, min, v_uint64x8, min_epu64, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, max, v_uint64x8, max_epu64, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, sum, v_uint64x8, add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, min, v_int64x8, min_epi64, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, max, v_int64x8, max_epi64, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64, sum, v_int64x8, add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_8F(func, ifunc, scop) \ + inline double v_reduce_##func(const v_float64x8& a) \ + { __m256d half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + double CV_DECL_ALIGNED(64) idx[2]; \ + _mm_store_pd(idx, _mm_##ifunc(_mm256_castpd256_pd128(half), _mm256_extractf128_pd(half, 1))); \ + return scop(idx[0], idx[1]); } +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(min, min_pd, min) +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(max, max_pd, max) +OPENCV_HAL_IMPL_AVX512_REDUCE_8F(sum, add_pd, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_16(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, min, v_uint32x16, min_epu32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, max, v_uint32x16, max_epu32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(int, min, v_int32x16, min_epi32) +OPENCV_HAL_IMPL_AVX512_REDUCE_16(int, max, v_int32x16, max_epi32) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_16F(func, ifunc) \ + inline float v_reduce_##func(const v_float32x16& a) \ + { __m256 half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128 quarter = _mm_##ifunc(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 3, 2))); \ + quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 0, 1))); \ + return _mm_cvtss_f32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_16F(min, min_ps) +OPENCV_HAL_IMPL_AVX512_REDUCE_16F(max, max_ps) + +inline float v_reduce_sum(const v_float32x16& a) +{ + __m256 half = _mm256_add_ps(_v512_extract_low(a.val), _v512_extract_high(a.val)); + __m128 quarter = _mm_add_ps(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1)); + quarter = _mm_hadd_ps(quarter, quarter); + return _mm_cvtss_f32(_mm_hadd_ps(quarter, quarter)); +} +inline int v_reduce_sum(const v_int32x16& a) +{ + __m256i half = _mm256_add_epi32(_v512_extract_low(a.val), _v512_extract_high(a.val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + quarter = _mm_hadd_epi32(quarter, quarter); + return _mm_cvtsi128_si32(_mm_hadd_epi32(quarter, quarter)); +} +inline uint v_reduce_sum(const v_uint32x16& a) +{ return (uint)v_reduce_sum(v_reinterpret_as_s32(a)); } + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_32(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, min, v_uint16x32, min_epu16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, max, v_uint16x32, max_epu16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(short, min, v_int16x32, min_epi16) +OPENCV_HAL_IMPL_AVX512_REDUCE_32(short, max, v_int16x32, max_epi16) + +inline int v_reduce_sum(const v_int16x32& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline uint v_reduce_sum(const v_uint16x32& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_64(sctype, func, _Tpvec, ifunc) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val)); \ + __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2)); \ + quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 1)); \ + return (sctype)_mm_cvtsi128_si32(quarter); } +OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, min, v_uint8x64, min_epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, max, v_uint8x64, max_epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, min, v_int8x64, min_epi8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, max, v_int8x64, max_epi8) + +#define OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(sctype, _Tpvec, suffix) \ + inline sctype v_reduce_sum(const _Tpvec& a) \ + { __m512i a16 = _mm512_add_epi16(_mm512_cvt##suffix##_epi16(_v512_extract_low(a.val)), \ + _mm512_cvt##suffix##_epi16(_v512_extract_high(a.val))); \ + a16 = _mm512_cvtepi16_epi32(_mm256_add_epi16(_v512_extract_low(a16), _v512_extract_high(a16))); \ + __m256i a8 = _mm256_add_epi32(_v512_extract_low(a16), _v512_extract_high(a16)); \ + __m128i a4 = _mm_add_epi32(_mm256_castsi256_si128(a8), _mm256_extracti128_si256(a8, 1)); \ + a4 = _mm_hadd_epi32(a4, a4); \ + return (sctype)_mm_cvtsi128_si32(_mm_hadd_epi32(a4, a4)); } +OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(uint, v_uint8x64, epu8) +OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(int, v_int8x64, epi8) + +inline v_float32x16 v_reduce_sum4(const v_float32x16& a, const v_float32x16& b, + const v_float32x16& c, const v_float32x16& d) +{ + __m256 abl = _mm256_hadd_ps(_v512_extract_low(a.val), _v512_extract_low(b.val)); + __m256 abh = _mm256_hadd_ps(_v512_extract_high(a.val), _v512_extract_high(b.val)); + __m256 cdl = _mm256_hadd_ps(_v512_extract_low(c.val), _v512_extract_low(d.val)); + __m256 cdh = _mm256_hadd_ps(_v512_extract_high(c.val), _v512_extract_high(d.val)); + return v_float32x16(_v512_combine(_mm256_hadd_ps(abl, cdl), _mm256_hadd_ps(abh, cdh))); +} + +inline unsigned v_reduce_sad(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i val = _mm512_sad_epu8(a.val, b.val); + __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_int8x64& a, const v_int8x64& b) +{ + __m512i val = _mm512_set1_epi8(-128); + val = _mm512_sad_epu8(_mm512_add_epi8(a.val, val), _mm512_add_epi8(b.val, val)); + __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val)); + __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline unsigned v_reduce_sad(const v_uint16x32& a, const v_uint16x32& b) +{ return v_reduce_sum(v_add_wrap(a - b, b - a)); } +inline unsigned v_reduce_sad(const v_int16x32& a, const v_int16x32& b) +{ return v_reduce_sum(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)))); } +inline unsigned v_reduce_sad(const v_uint32x16& a, const v_uint32x16& b) +{ return v_reduce_sum(v_max(a, b) - v_min(a, b)); } +inline unsigned v_reduce_sad(const v_int32x16& a, const v_int32x16& b) +{ return v_reduce_sum(v_reinterpret_as_u32(v_max(a, b) - v_min(a, b))); } +inline float v_reduce_sad(const v_float32x16& a, const v_float32x16& b) +{ return v_reduce_sum((a - b) & v_float32x16(_mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffff)))); } +inline double v_reduce_sad(const v_float64x8& a, const v_float64x8& b) +{ return v_reduce_sum((a - b) & v_float64x8(_mm512_castsi512_pd(_mm512_set1_epi64(0x7fffffffffffffff)))); } + +/** Popcount **/ +inline v_uint8x64 v_popcount(const v_int8x64& a) +{ +#if CV_AVX_512BITALG + return v_uint8x64(_mm512_popcnt_epi8(a.val)); +#elif CV_AVX_512VBMI + __m512i _popcnt_table0 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + __m512i _popcnt_table1 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, + 6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2, + 5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1); + return v_uint8x64(_mm512_sub_epi8(_mm512_permutex2var_epi8(_popcnt_table0, a.val, _popcnt_table1), _mm512_movm_epi8(_mm512_movepi8_mask(a.val)))); +#else + __m512i _popcnt_table = _mm512_set4_epi32(0x04030302, 0x03020201, 0x03020201, 0x02010100); + __m512i _popcnt_mask = _mm512_set1_epi8(0x0F); + + return v_uint8x64(_mm512_add_epi8(_mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512( a.val, _popcnt_mask)), + _mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512(_mm512_srli_epi16(a.val, 4), _popcnt_mask)))); +#endif +} +inline v_uint16x32 v_popcount(const v_int16x32& a) +{ +#if CV_AVX_512BITALG + return v_uint16x32(_mm512_popcnt_epi16(a.val)); +#elif CV_AVX_512VPOPCNTDQ + __m512i zero = _mm512_setzero_si512(); + return v_uint16x32(_mm512_packs_epi32(_mm512_popcnt_epi32(_mm512_unpacklo_epi16(a.val, zero)), + _mm512_popcnt_epi32(_mm512_unpackhi_epi16(a.val, zero)))); +#else + v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v512_setall_u16(0x00ff); +#endif +} +inline v_uint32x16 v_popcount(const v_int32x16& a) +{ +#if CV_AVX_512VPOPCNTDQ + return v_uint32x16(_mm512_popcnt_epi32(a.val)); +#else + v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v512_setall_u32(0x000000ff); +#endif +} +inline v_uint64x8 v_popcount(const v_int64x8& a) +{ +#if CV_AVX_512VPOPCNTDQ + return v_uint64x8(_mm512_popcnt_epi64(a.val)); +#else + return v_uint64x8(_mm512_sad_epu8(v_popcount(v_reinterpret_as_s8(a)).val, _mm512_setzero_si512())); +#endif +} + + +inline v_uint8x64 v_popcount(const v_uint8x64& a) { return v_popcount(v_reinterpret_as_s8 (a)); } +inline v_uint16x32 v_popcount(const v_uint16x32& a) { return v_popcount(v_reinterpret_as_s16(a)); } +inline v_uint32x16 v_popcount(const v_uint32x16& a) { return v_popcount(v_reinterpret_as_s32(a)); } +inline v_uint64x8 v_popcount(const v_uint64x8& a) { return v_popcount(v_reinterpret_as_s64(a)); } + + +////////// Other math ///////// + +/** Some frequent operations **/ +#if CV_FMA3 +#define OPENCV_HAL_IMPL_AVX512_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); } +#else +#define OPENCV_HAL_IMPL_AVX512_MULADD(_Tpvec, suffix) \ + inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_add_##suffix(_mm512_mul_##suffix(a.val, b.val), c.val)); } \ + inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { return _Tpvec(_mm512_add_##suffix(_mm512_mul_##suffix(a.val, b.val), c.val)); } +#endif + +#define OPENCV_HAL_IMPL_AVX512_MISC(_Tpvec, suffix) \ + inline _Tpvec v_sqrt(const _Tpvec& x) \ + { return _Tpvec(_mm512_sqrt_##suffix(x.val)); } \ + inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_fma(a, a, b * b); } \ + inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ + { return v_sqrt(v_fma(a, a, b * b)); } + +OPENCV_HAL_IMPL_AVX512_MULADD(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_MULADD(v_float64x8, pd) +OPENCV_HAL_IMPL_AVX512_MISC(v_float32x16, ps) +OPENCV_HAL_IMPL_AVX512_MISC(v_float64x8, pd) + +inline v_int32x16 v_fma(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c) +{ return a * b + c; } +inline v_int32x16 v_muladd(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c) +{ return v_fma(a, b, c); } + +inline v_float32x16 v_invsqrt(const v_float32x16& x) +{ +#if CV_AVX_512ER + return v_float32x16(_mm512_rsqrt28_ps(x.val)); +#else + v_float32x16 half = x * v512_setall_f32(0.5); + v_float32x16 t = v_float32x16(_mm512_rsqrt14_ps(x.val)); + t *= v512_setall_f32(1.5) - ((t * t) * half); + return t; +#endif +} + +inline v_float64x8 v_invsqrt(const v_float64x8& x) +{ +#if CV_AVX_512ER + return v_float64x8(_mm512_rsqrt28_pd(x.val)); +#else + return v512_setall_f64(1.) / v_sqrt(x); +// v_float64x8 half = x * v512_setall_f64(0.5); +// v_float64x8 t = v_float64x8(_mm512_rsqrt14_pd(x.val)); +// t *= v512_setall_f64(1.5) - ((t * t) * half); +// t *= v512_setall_f64(1.5) - ((t * t) * half); +// return t; +#endif +} + +/** Absolute values **/ +#define OPENCV_HAL_IMPL_AVX512_ABS(_Tpvec, _Tpuvec, suffix) \ + inline _Tpuvec v_abs(const _Tpvec& x) \ + { return _Tpuvec(_mm512_abs_##suffix(x.val)); } + +OPENCV_HAL_IMPL_AVX512_ABS(v_int8x64, v_uint8x64, epi8) +OPENCV_HAL_IMPL_AVX512_ABS(v_int16x32, v_uint16x32, epi16) +OPENCV_HAL_IMPL_AVX512_ABS(v_int32x16, v_uint32x16, epi32) +OPENCV_HAL_IMPL_AVX512_ABS(v_int64x8, v_uint64x8, epi64) + +inline v_float32x16 v_abs(const v_float32x16& x) +{ +#ifdef _mm512_abs_pd + return v_float32x16(_mm512_abs_ps(x.val)); +#else + return v_float32x16(_mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(x.val), + _v512_set_epu64(0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, + 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)))); +#endif +} + +inline v_float64x8 v_abs(const v_float64x8& x) +{ +#ifdef _mm512_abs_pd + #if defined __GNUC__ && (__GNUC__ < 7 || (__GNUC__ == 7 && __GNUC_MINOR__ <= 3) || (__GNUC__ == 8 && __GNUC_MINOR__ <= 2)) + // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87476 + return v_float64x8(_mm512_abs_pd(_mm512_castpd_ps(x.val))); + #else + return v_float64x8(_mm512_abs_pd(x.val)); + #endif +#else + return v_float64x8(_mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(x.val), + _v512_set_epu64(0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, + 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)))); +#endif +} + +/** Absolute difference **/ +inline v_uint8x64 v_absdiff(const v_uint8x64& a, const v_uint8x64& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x32 v_absdiff(const v_uint16x32& a, const v_uint16x32& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x16 v_absdiff(const v_uint32x16& a, const v_uint32x16& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x64 v_absdiff(const v_int8x64& a, const v_int8x64& b) +{ + v_int8x64 d = v_sub_wrap(a, b); + v_int8x64 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} + +inline v_uint16x32 v_absdiff(const v_int16x32& a, const v_int16x32& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } + +inline v_uint32x16 v_absdiff(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 d = a - b; + v_int32x16 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +inline v_float32x16 v_absdiff(const v_float32x16& a, const v_float32x16& b) +{ return v_abs(a - b); } + +inline v_float64x8 v_absdiff(const v_float64x8& a, const v_float64x8& b) +{ return v_abs(a - b); } + +/** Saturating absolute difference **/ +inline v_int8x64 v_absdiffs(const v_int8x64& a, const v_int8x64& b) +{ + v_int8x64 d = a - b; + v_int8x64 m = a < b; + return (d ^ m) - m; +} +inline v_int16x32 v_absdiffs(const v_int16x32& a, const v_int16x32& b) +{ return v_max(a, b) - v_min(a, b); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x16 v_round(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(a.val)); } + +inline v_int32x16 v_round(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(a.val))); } + +inline v_int32x16 v_round(const v_float64x8& a, const v_float64x8& b) +{ return v_int32x16(_v512_combine(_mm512_cvtpd_epi32(a.val), _mm512_cvtpd_epi32(b.val))); } + +inline v_int32x16 v_trunc(const v_float32x16& a) +{ return v_int32x16(_mm512_cvttps_epi32(a.val)); } + +inline v_int32x16 v_trunc(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvttpd_epi32(a.val))); } + +#if CVT_ROUND_MODES_IMPLEMENTED +inline v_int32x16 v_floor(const v_float32x16& a) +{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)); } + +inline v_int32x16 v_floor(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC))); } + +inline v_int32x16 v_ceil(const v_float32x16& a) +{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); } + +inline v_int32x16 v_ceil(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC))); } +#else +inline v_int32x16 v_floor(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 1))); } + +inline v_int32x16 v_floor(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 1)))); } + +inline v_int32x16 v_ceil(const v_float32x16& a) +{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 2))); } + +inline v_int32x16 v_ceil(const v_float64x8& a) +{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 2)))); } +#endif + +/** To float **/ +inline v_float32x16 v_cvt_f32(const v_int32x16& a) +{ return v_float32x16(_mm512_cvtepi32_ps(a.val)); } + +inline v_float32x16 v_cvt_f32(const v_float64x8& a) +{ return v_float32x16(_mm512_cvtpd_pslo(a.val)); } + +inline v_float32x16 v_cvt_f32(const v_float64x8& a, const v_float64x8& b) +{ return v_float32x16(_v512_combine(_mm512_cvtpd_ps(a.val), _mm512_cvtpd_ps(b.val))); } + +inline v_float64x8 v_cvt_f64(const v_int32x16& a) +{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_low(a.val))); } + +inline v_float64x8 v_cvt_f64_high(const v_int32x16& a) +{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_high(a.val))); } + +inline v_float64x8 v_cvt_f64(const v_float32x16& a) +{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_low(a.val))); } + +inline v_float64x8 v_cvt_f64_high(const v_float32x16& a) +{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_high(a.val))); } + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x8 v_cvt_f64(const v_int64x8& v) +{ +#if CV_AVX_512DQ + return v_float64x8(_mm512_cvtepi64_pd(v.val)); +#else + // constants encoded as floating-point + __m512i magic_i_lo = _mm512_set1_epi64(0x4330000000000000); // 2^52 + __m512i magic_i_hi32 = _mm512_set1_epi64(0x4530000080000000); // 2^84 + 2^63 + __m512i magic_i_all = _mm512_set1_epi64(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m512d magic_d_all = _mm512_castsi512_pd(magic_i_all); + + // Blend the 32 lowest significant bits of v with magic_int_lo + __m512i v_lo = _mm512_mask_blend_epi32(0x5555, magic_i_lo, v.val); + // Extract the 32 most significant bits of v + __m512i v_hi = _mm512_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm512_xor_si512(v_hi, magic_i_hi32); + // Compute in double precision + __m512d v_hi_dbl = _mm512_sub_pd(_mm512_castsi512_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m512d result = _mm512_add_pd(v_hi_dbl, _mm512_castsi512_pd(v_lo)); + return v_float64x8(result); +#endif +} + +////////////// Lookup table access //////////////////// + +inline v_int8x64 v512_lut(const schar* tab, const int* idx) +{ + __m128i p0 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 1)); + __m128i p1 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1)); + __m128i p2 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 2), (const int *)tab, 1)); + __m128i p3 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 3), (const int *)tab, 1)); + return v_int8x64(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(p0), p1, 1), p2, 2), p3, 3)); +} +inline v_int8x64 v512_lut_pairs(const schar* tab, const int* idx) +{ + __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 1)); + __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1)); + return v_int8x64(_v512_combine(p0, p1)); +} +inline v_int8x64 v512_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x64(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 1)); +} +inline v_uint8x64 v512_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut((const schar *)tab, idx)); } +inline v_uint8x64 v512_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x64 v512_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_quads((const schar *)tab, idx)); } + +inline v_int16x32 v512_lut(const short* tab, const int* idx) +{ + __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx ), (const int *)tab, 2)); + __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 2)); + return v_int16x32(_v512_combine(p0, p1)); +} +inline v_int16x32 v512_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x32(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 2)); +} +inline v_int16x32 v512_lut_quads(const short* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 2)); +#else + return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 2)); +#endif +} +inline v_uint16x32 v512_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut((const short *)tab, idx)); } +inline v_uint16x32 v512_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_pairs((const short *)tab, idx)); } +inline v_uint16x32 v512_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_quads((const short *)tab, idx)); } + +inline v_int32x16 v512_lut(const int* tab, const int* idx) +{ + return v_int32x16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), tab, 4)); +} +inline v_int32x16 v512_lut_pairs(const int* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 4)); +#else + return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 4)); +#endif +} +inline v_int32x16 v512_lut_quads(const int* tab, const int* idx) +{ + return v_int32x16(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512( + _mm_loadu_si128((const __m128i*)(tab + idx[0]))), + _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1), + _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2), + _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3)); +} +inline v_uint32x16 v512_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut((const int *)tab, idx)); } +inline v_uint32x16 v512_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_pairs((const int *)tab, idx)); } +inline v_uint32x16 v512_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_quads((const int *)tab, idx)); } + +inline v_int64x8 v512_lut(const int64* tab, const int* idx) +{ +#if defined(__GNUC__) + return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 8)); +#else + return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), tab , 8)); +#endif +} +inline v_int64x8 v512_lut_pairs(const int64* tab, const int* idx) +{ + return v_int64x8(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512( + _mm_loadu_si128((const __m128i*)(tab + idx[0]))), + _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1), + _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2), + _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3)); +} +inline v_uint64x8 v512_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut((const int64 *)tab, idx)); } +inline v_uint64x8 v512_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut_pairs((const int64 *)tab, idx)); } + +inline v_float32x16 v512_lut(const float* tab, const int* idx) +{ + return v_float32x16(_mm512_i32gather_ps(_mm512_loadu_si512((const __m512i*)idx), tab, 4)); +} +inline v_float32x16 v512_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_pairs((const int *)tab, idx)); } +inline v_float32x16 v512_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_quads((const int *)tab, idx)); } + +inline v_float64x8 v512_lut(const double* tab, const int* idx) +{ + return v_float64x8(_mm512_i32gather_pd(_mm256_loadu_si256((const __m256i*)idx), tab, 8)); +} +inline v_float64x8 v512_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x8(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_castpd128_pd512( + _mm_loadu_pd(tab + idx[0])), + _mm_loadu_pd(tab + idx[1]), 1), + _mm_loadu_pd(tab + idx[2]), 2), + _mm_loadu_pd(tab + idx[3]), 3)); +} + +inline v_int32x16 v_lut(const int* tab, const v_int32x16& idxvec) +{ + return v_int32x16(_mm512_i32gather_epi32(idxvec.val, tab, 4)); +} + +inline v_uint32x16 v_lut(const unsigned* tab, const v_int32x16& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x16 v_lut(const float* tab, const v_int32x16& idxvec) +{ + return v_float32x16(_mm512_i32gather_ps(idxvec.val, tab, 4)); +} + +inline v_float64x8 v_lut(const double* tab, const v_int32x16& idxvec) +{ + return v_float64x8(_mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x16& idxvec, v_float32x16& x, v_float32x16& y) +{ + x.val = _mm512_i32gather_ps(idxvec.val, tab, 4); + y.val = _mm512_i32gather_ps(idxvec.val, &tab[1], 4); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x16& idxvec, v_float64x8& x, v_float64x8& y) +{ + x.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8); + y.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), &tab[1], 8); +} + +inline v_int8x64 v_interleave_pairs(const v_int8x64& vec) +{ + return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0d0e0c, 0x0b090a08, 0x07050604, 0x03010200))); +} +inline v_uint8x64 v_interleave_pairs(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x64 v_interleave_quads(const v_int8x64& vec) +{ + return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0b0e0a, 0x0d090c08, 0x07030602, 0x05010400))); +} +inline v_uint8x64 v_interleave_quads(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x32 v_interleave_pairs(const v_int16x32& vec) +{ + return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0b0a, 0x0d0c0908, 0x07060302, 0x05040100))); +} +inline v_uint16x32 v_interleave_pairs(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x32 v_interleave_quads(const v_int16x32& vec) +{ + return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0706, 0x0d0c0504, 0x0b0a0302, 0x09080100))); +} +inline v_uint16x32 v_interleave_quads(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x16 v_interleave_pairs(const v_int32x16& vec) +{ + return v_int32x16(_mm512_shuffle_epi32(vec.val, _MM_PERM_ACBD)); +} +inline v_uint32x16 v_interleave_pairs(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x16 v_interleave_pairs(const v_float32x16& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x64 v_pack_triplets(const v_int8x64& vec) +{ + return v_int8x64(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), + _mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0xffffff0f, 0x0e0d0c0a, 0x09080605, 0x04020100)))); +} +inline v_uint8x64 v_pack_triplets(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x32 v_pack_triplets(const v_int16x32& vec) +{ + return v_int16x32(_mm512_permutexvar_epi16(_v512_set_epu64(0x001f001f001f001f, 0x001f001f001f001f, 0x001e001d001c001a, 0x0019001800160015, + 0x0014001200110010, 0x000e000d000c000a, 0x0009000800060005, 0x0004000200010000), vec.val)); +} +inline v_uint16x32 v_pack_triplets(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x16 v_pack_triplets(const v_int32x16& vec) +{ + return v_int32x16(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val)); +} +inline v_uint32x16 v_pack_triplets(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); } +inline v_float32x16 v_pack_triplets(const v_float32x16& vec) +{ + return v_float32x16(_mm512_permutexvar_ps(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a, + 0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val)); +} + +////////// Matrix operations ///////// + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b) +{ return v_int32x16(_mm512_madd_epi16(a.val, b.val)); } +inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b) +{ + __m512i even = _mm512_mul_epi32(a.val, b.val); + __m512i odd = _mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32)); + return v_int64x8(_mm512_add_epi64(even, odd)); +} +inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b) +{ + __m512i even_a = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, _mm512_setzero_si512()); + __m512i odd_a = _mm512_srli_epi16(a.val, 8); + + __m512i even_b = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b.val, _mm512_setzero_si512()); + __m512i odd_b = _mm512_srli_epi16(b.val, 8); + + __m512i prod0 = _mm512_madd_epi16(even_a, even_b); + __m512i prod1 = _mm512_madd_epi16(odd_a, odd_b); + return v_uint32x16(_mm512_add_epi32(prod0, prod1)); +} +inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b) +{ + __m512i even_a = _mm512_srai_epi16(_mm512_bslli_epi128(a.val, 1), 8); + __m512i odd_a = _mm512_srai_epi16(a.val, 8); + + __m512i even_b = _mm512_srai_epi16(_mm512_bslli_epi128(b.val, 1), 8); + __m512i odd_b = _mm512_srai_epi16(b.val, 8); + + __m512i prod0 = _mm512_madd_epi16(even_a, even_b); + __m512i prod1 = _mm512_madd_epi16(odd_a, odd_b); + return v_int32x16(_mm512_add_epi32(prod0, prod1)); +} +inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i mullo = _mm512_mullo_epi16(a.val, b.val); + __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val); + __m512i mul0 = _mm512_unpacklo_epi16(mullo, mulhi); + __m512i mul1 = _mm512_unpackhi_epi16(mullo, mulhi); + + __m512i p02 = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512()); + __m512i p13 = _mm512_srli_epi64(mul0, 32); + __m512i p46 = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512()); + __m512i p57 = _mm512_srli_epi64(mul1, 32); + + __m512i p15_ = _mm512_add_epi64(p02, p13); + __m512i p9d_ = _mm512_add_epi64(p46, p57); + + return v_uint64x8(_mm512_add_epi64( + _mm512_unpacklo_epi64(p15_, p9d_), + _mm512_unpackhi_epi64(p15_, p9d_) + )); +} +inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b) +{ + __m512i prod = _mm512_madd_epi16(a.val, b.val); + __m512i even = _mm512_srai_epi64(_mm512_bslli_epi128(prod, 4), 32); + __m512i odd = _mm512_srai_epi64(prod, 32); + return v_int64x8(_mm512_add_epi64(even, odd)); +} +inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b) +{ return v_dotprod(a, b); } +inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b) +{ return v_dotprod(a, b); } +inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b) +{ + __m512i mullo = _mm512_mullo_epi16(a.val, b.val); + __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val); + __m512i mul0 = _mm512_unpacklo_epi16(mullo, mulhi); + __m512i mul1 = _mm512_unpackhi_epi16(mullo, mulhi); + + __m512i p02 = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512()); + __m512i p13 = _mm512_srli_epi64(mul0, 32); + __m512i p46 = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512()); + __m512i p57 = _mm512_srli_epi64(mul1, 32); + + __m512i p15_ = _mm512_add_epi64(p02, p13); + __m512i p9d_ = _mm512_add_epi64(p46, p57); + return v_uint64x8(_mm512_add_epi64(p15_, p9d_)); +} +inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c) +{ return v_dotprod_expand(a, b) + c; } + + +#define OPENCV_HAL_AVX512_SPLAT2_PS(a, im) \ + v_float32x16(_mm512_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im))) + +inline v_float32x16 v_matmul(const v_float32x16& v, + const v_float32x16& m0, const v_float32x16& m1, + const v_float32x16& m2, const v_float32x16& m3) +{ + v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0); + v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1); + v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2); + v_float32x16 v37 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 3); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3))); +} + +inline v_float32x16 v_matmuladd(const v_float32x16& v, + const v_float32x16& m0, const v_float32x16& m1, + const v_float32x16& m2, const v_float32x16& a) +{ + v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0); + v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1); + v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2); + return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a))); +} + +#define OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ + inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ + { \ + __m512i t0 = cast_from(_mm512_unpacklo_##suffix(a0.val, a1.val)); \ + __m512i t1 = cast_from(_mm512_unpacklo_##suffix(a2.val, a3.val)); \ + __m512i t2 = cast_from(_mm512_unpackhi_##suffix(a0.val, a1.val)); \ + __m512i t3 = cast_from(_mm512_unpackhi_##suffix(a2.val, a3.val)); \ + b0.val = cast_to(_mm512_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm512_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm512_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm512_unpackhi_epi64(t2, t3)); \ + } + +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_uint32x16, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_int32x16, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_float32x16, ps, _mm512_castps_si512, _mm512_castsi512_ps) + +//////////////// Value reordering /////////////// + +/* Expand */ +#define OPENCV_HAL_IMPL_AVX512_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(_v512_extract_low(a.val)); \ + b1.val = intrin(_v512_extract_high(a.val)); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v512_extract_low(a.val))); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(intrin(_v512_extract_high(a.val))); } \ + inline _Tpwvec v512_load_expand(const _Tp* ptr) \ + { \ + __m256i a = _mm256_loadu_si256((const __m256i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint8x64, v_uint16x32, uchar, _mm512_cvtepu8_epi16) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int8x64, v_int16x32, schar, _mm512_cvtepi8_epi16) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint16x32, v_uint32x16, ushort, _mm512_cvtepu16_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int16x32, v_int32x16, short, _mm512_cvtepi16_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint32x16, v_uint64x8, unsigned, _mm512_cvtepu32_epi64) +OPENCV_HAL_IMPL_AVX512_EXPAND(v_int32x16, v_int64x8, int, _mm512_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_AVX512_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v512_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadu_si128((const __m128i*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_uint32x16, uchar, _mm512_cvtepu8_epi32) +OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_int32x16, schar, _mm512_cvtepi8_epi32) + +/* pack */ +// 16 +inline v_int8x64 v_pack(const v_int16x32& a, const v_int16x32& b) +{ return v_int8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } + +inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b) +{ + const __m512i t = _mm512_set1_epi16(255); + return v_uint8x64(_v512_combine(_mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, t)), _mm512_cvtepi16_epi8(_mm512_min_epu16(b.val, t)))); +} + +inline v_uint8x64 v_pack_u(const v_int16x32& a, const v_int16x32& b) +{ + return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val))); +} + +inline void v_pack_store(schar* ptr, const v_int16x32& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(uchar* ptr, const v_uint16x32& a) +{ + const __m512i m = _mm512_set1_epi16(255); + _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, m))); +} + +inline void v_pack_u_store(uchar* ptr, const v_int16x32& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + +template inline +v_uint8x64 v_rshr_pack(const v_uint16x32& a, const v_uint16x32& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1))); + return v_pack_u(v_reinterpret_as_s16((a + delta) >> n), + v_reinterpret_as_s16((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x32& a) +{ + v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1))); + v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n)); +} + +template inline +v_uint8x64 v_rshr_pack_u(const v_int16x32& a, const v_int16x32& b) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x32& a) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int8x64 v_rshr_pack(const v_int16x32& a, const v_int16x32& b) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x32& a) +{ + v_int16x32 delta = v512_setall_s16((short)(1 << (n-1))); + v_pack_store(ptr, (a + delta) >> n); +} + +// 32 +inline v_int16x32 v_pack(const v_int32x16& a, const v_int32x16& b) +{ return v_int16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi32(a.val, b.val))); } + +inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b) +{ + const __m512i m = _mm512_set1_epi32(65535); + return v_uint16x32(_v512_combine(_mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m)), _mm512_cvtepi32_epi16(_mm512_min_epu32(b.val, m)))); +} + +inline v_uint16x32 v_pack_u(const v_int32x16& a, const v_int32x16& b) +{ return v_uint16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi32(a.val, b.val))); } + +inline void v_pack_store(short* ptr, const v_int32x16& a) +{ v_store_low(ptr, v_pack(a, a)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x16& a) +{ + const __m512i m = _mm512_set1_epi32(65535); + _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m))); +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x16& a) +{ v_store_low(ptr, v_pack_u(a, a)); } + + +template inline +v_uint16x32 v_rshr_pack(const v_uint32x16& a, const v_uint32x16& b) +{ + v_uint32x16 delta = v512_setall_u32(1 << (n-1)); + return v_pack_u(v_reinterpret_as_s32((a + delta) >> n), + v_reinterpret_as_s32((b + delta) >> n)); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x16& a) +{ + v_uint32x16 delta = v512_setall_u32(1 << (n-1)); + v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n)); +} + +template inline +v_uint16x32 v_rshr_pack_u(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + return v_pack_u((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x16& a) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + v_pack_u_store(ptr, (a + delta) >> n); +} + +template inline +v_int16x32 v_rshr_pack(const v_int32x16& a, const v_int32x16& b) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x16& a) +{ + v_int32x16 delta = v512_setall_s32(1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// 64 +// Non-saturating pack +inline v_uint32x16 v_pack(const v_uint64x8& a, const v_uint64x8& b) +{ return v_uint32x16(_v512_combine(_mm512_cvtepi64_epi32(a.val), _mm512_cvtepi64_epi32(b.val))); } + +inline v_int32x16 v_pack(const v_int64x8& a, const v_int64x8& b) +{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); } + +inline void v_pack_store(unsigned* ptr, const v_uint64x8& a) +{ _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi64_epi32(a.val)); } + +inline void v_pack_store(int* ptr, const v_int64x8& b) +{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); } + +template inline +v_uint32x16 v_rshr_pack(const v_uint64x8& a, const v_uint64x8& b) +{ + v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x8& a) +{ + v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +template inline +v_int32x16 v_rshr_pack(const v_int64x8& a, const v_int64x8& b) +{ + v_int64x8 delta = v512_setall_s64((int64)1 << (n-1)); + return v_pack((a + delta) >> n, (b + delta) >> n); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x8& a) +{ + v_int64x8 delta = v512_setall_s64((int64)1 << (n-1)); + v_pack_store(ptr, (a + delta) >> n); +} + +// pack boolean +inline v_uint8x64 v_pack_b(const v_uint16x32& a, const v_uint16x32& b) +{ return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } + +inline v_uint8x64 v_pack_b(const v_uint32x16& a, const v_uint32x16& b, + const v_uint32x16& c, const v_uint32x16& d) +{ + __m512i ab = _mm512_packs_epi32(a.val, b.val); + __m512i cd = _mm512_packs_epi32(c.val, d.val); + + return v_uint8x64(_mm512_permutexvar_epi32(_v512_set_epu32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0), _mm512_packs_epi16(ab, cd))); +} + +inline v_uint8x64 v_pack_b(const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c, + const v_uint64x8& d, const v_uint64x8& e, const v_uint64x8& f, + const v_uint64x8& g, const v_uint64x8& h) +{ + __m512i ab = _mm512_packs_epi32(a.val, b.val); + __m512i cd = _mm512_packs_epi32(c.val, d.val); + __m512i ef = _mm512_packs_epi32(e.val, f.val); + __m512i gh = _mm512_packs_epi32(g.val, h.val); + + __m512i abcd = _mm512_packs_epi32(ab, cd); + __m512i efgh = _mm512_packs_epi32(ef, gh); + + return v_uint8x64(_mm512_permutexvar_epi16(_v512_set_epu16(31, 23, 15, 7, 30, 22, 14, 6, 29, 21, 13, 5, 28, 20, 12, 4, + 27, 19, 11, 3, 26, 18, 10, 2, 25, 17, 9, 1, 24, 16, 8, 0), _mm512_packs_epi16(abcd, efgh))); +} + +/* Recombine */ +// its up there with load and store operations + +/* Extract */ +#define OPENCV_HAL_IMPL_AVX512_EXTRACT(_Tpvec) \ + template \ + inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ + { return v_rotate_right(a, b); } + +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint8x64) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int8x64) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint16x32) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int16x32) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint64x8) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int64x8) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float32x16) +OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float64x8) + +#define OPENCV_HAL_IMPL_AVX512_EXTRACT_N(_Tpvec, _Tp) \ +template inline _Tp v_extract_n(_Tpvec v) { return v_rotate_right(v).get0(); } + +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint8x64, uchar) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int8x64, schar) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint16x32, ushort) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int16x32, short) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint32x16, uint) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int32x16, int) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint64x8, uint64) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int64x8, int64) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float32x16, float) +OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float64x8, double) + +template +inline v_uint32x16 v_broadcast_element(v_uint32x16 a) +{ + static const __m512i perm = _mm512_set1_epi32((char)i); + return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val)); +} + +template +inline v_int32x16 v_broadcast_element(const v_int32x16 &a) +{ return v_reinterpret_as_s32(v_broadcast_element(v_reinterpret_as_u32(a))); } + +template +inline v_float32x16 v_broadcast_element(const v_float32x16 &a) +{ return v_reinterpret_as_f32(v_broadcast_element(v_reinterpret_as_u32(a))); } + + +///////////////////// load deinterleave ///////////////////////////// + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96, + 94, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, + 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97, + 95, 93, 91, 89, 87, 85, 83, 81, 79, 77, 75, 73, 71, 69, 67, 65, + 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask0, ab1)); + b = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask1, ab1)); +#else + __m512i mask0 = _mm512_set4_epi32(0x0f0d0b09, 0x07050301, 0x0e0c0a08, 0x06040200); + __m512i a0b0 = _mm512_shuffle_epi8(ab0, mask0); + __m512i a1b1 = _mm512_shuffle_epi8(ab1, mask0); + __m512i mask1 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask2 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask1, a1b1)); + b = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask2, a1b1)); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask0, ab1)); + b = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask0, ab1)); + b = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b ) +{ + __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + a = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask0, ab1)); + b = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask1, ab1)); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 128)); + +#if CV_AVX_512VBMI2 + __m512i mask0 = _v512_set_epu8(126, 123, 120, 117, 114, 111, 108, 105, 102, 99, 96, 93, 90, 87, 84, 81, + 78, 75, 72, 69, 66, 63, 60, 57, 54, 51, 48, 45, 42, 39, 36, 33, + 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0, 62, 59, 56, 53, 50, + 47, 44, 41, 38, 35, 32, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2); + __m512i r0b01 = _mm512_permutex2var_epi8(bgr0, mask0, bgr1); + __m512i b1g12 = _mm512_permutex2var_epi8(bgr1, mask0, bgr2); + __m512i r12b2 = _mm512_permutex2var_epi8(bgr1, + _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101, 98, 95, 92, 89, 86, 83, 80, + 77, 74, 71, 68, 65, 127, 124, 121, 118, 115, 112, 109, 106, 103, 100, 97, + 94, 91, 88, 85, 82, 79, 76, 73, 70, 67, 64, 61, 58, 55, 52, 49, + 46, 43, 40, 37, 34, 31, 28, 25, 22, 19, 16, 13, 10, 7, 4, 1), bgr2); + a = v_uint8x64(_mm512_mask_compress_epi8(r12b2, 0xffffffffffe00000, r0b01)); + b = v_uint8x64(_mm512_mask_compress_epi8(b1g12, 0x2492492492492492, bgr0)); + c = v_uint8x64(_mm512_mask_expand_epi8(r0b01, 0xffffffffffe00000, r12b2)); +#elif CV_AVX_512VBMI + __m512i b0g0b1 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr1, bgr0); + __m512i g1r1g2 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr2, bgr1); + __m512i r2b2r0 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr0, bgr2); + a = v_uint8x64(_mm512_permutex2var_epi8(b0g0b1, _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101, 98, 95, 92, 89, 86, 83, 80, + 77, 74, 71, 68, 65, 63, 61, 60, 58, 57, 55, 54, 52, 51, 49, 48, + 46, 45, 43, 42, 40, 39, 37, 36, 34, 33, 31, 30, 28, 27, 25, 24, + 23, 21, 20, 18, 17, 15, 14, 12, 11, 9, 8, 6, 5, 3, 2, 0), bgr2)); + b = v_uint8x64(_mm512_permutex2var_epi8(g1r1g2, _v512_set_epu8( 63, 61, 60, 58, 57, 55, 54, 52, 51, 49, 48, 46, 45, 43, 42, 40, + 39, 37, 36, 34, 33, 31, 30, 28, 27, 25, 24, 23, 21, 20, 18, 17, + 15, 14, 12, 11, 9, 8, 6, 5, 3, 2, 0, 126, 123, 120, 117, 114, + 111, 108, 105, 102, 99, 96, 93, 90, 87, 84, 81, 78, 75, 72, 69, 66), bgr0)); + c = v_uint8x64(_mm512_permutex2var_epi8(r2b2r0, _v512_set_epu8( 63, 60, 57, 54, 51, 48, 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, + 15, 12, 9, 6, 3, 0, 125, 122, 119, 116, 113, 110, 107, 104, 101, 98, + 95, 92, 89, 86, 83, 80, 77, 74, 71, 68, 65, 62, 59, 56, 53, 50, + 47, 44, 41, 38, 35, 32, 29, 26, 23, 20, 17, 14, 11, 8, 5, 2), bgr1)); +#else + __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48, + 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0); + + __m512i b0g0 = _mm512_mask_blend_epi32(0xf800, b01g1, r12b2); + __m512i r0b1 = _mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17, + 14, 11, 8, 5, 2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0); + __m512i g1r1 = _mm512_alignr_epi32(r12b2, g20r0, 11); + a = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b0g0, r0b1)); + c = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, r0b1, g1r1)); + b = v_uint8x64(_mm512_shuffle_epi8(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1r1, b0g0), _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001))); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + + __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48, + 45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0); + + a = v_uint16x32(_mm512_mask_blend_epi32(0xf800, b01g1, r12b2)); + b = v_uint16x32(_mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17, + 14, 11, 8, 5, 2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0)); + c = v_uint16x32(_mm512_alignr_epi32(r12b2, g20r0, 11)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + + __m512i mask0 = _v512_set_epu32(29, 26, 23, 20, 17, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0); + __m512i b01r1 = _mm512_permutex2var_epi32(bgr0, mask0, bgr1); + __m512i g12b2 = _mm512_permutex2var_epi32(bgr1, mask0, bgr2); + __m512i r20g0 = _mm512_permutex2var_epi32(bgr2, mask0, bgr0); + + a = v_uint32x16(_mm512_mask_blend_epi32(0xf800, b01r1, g12b2)); + b = v_uint32x16(_mm512_alignr_epi32(g12b2, r20g0, 11)); + c = v_uint32x16(_mm512_permutex2var_epi32(bgr1, _v512_set_epu32(21, 20, 19, 18, 17, 16, 13, 10, 7, 4, 1, 26, 25, 24, 23, 22), r20g0)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c ) +{ + __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + + __m512i mask0 = _v512_set_epu64(13, 10, 15, 12, 9, 6, 3, 0); + __m512i b01g1 = _mm512_permutex2var_epi64(bgr0, mask0, bgr1); + __m512i r12b2 = _mm512_permutex2var_epi64(bgr1, mask0, bgr2); + __m512i g20r0 = _mm512_permutex2var_epi64(bgr2, mask0, bgr0); + + a = v_uint64x8(_mm512_mask_blend_epi64(0xc0, b01g1, r12b2)); + c = v_uint64x8(_mm512_alignr_epi64(r12b2, g20r0, 6)); + b = v_uint64x8(_mm512_permutex2var_epi64(bgr1, _v512_set_epu64(10, 9, 8, 5, 2, 13, 12, 11), g20r0)); +} + +inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c, v_uint8x64& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 128)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 192)); + +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96, + 94, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, + 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97, + 95, 93, 91, 89, 87, 85, 83, 81, 79, 77, 75, 73, 71, 69, 67, 65, + 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi8(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi8(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi8(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi8(bgra2, mask1, bgra3); + + a = v_uint8x64(_mm512_permutex2var_epi8(br01, mask0, br23)); + c = v_uint8x64(_mm512_permutex2var_epi8(br01, mask1, br23)); + b = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask0, ga23)); + d = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask1, ga23)); +#else + __m512i mask = _mm512_set4_epi32(0x0f0b0703, 0x0e0a0602, 0x0d090501, 0x0c080400); + __m512i b0g0r0a0 = _mm512_shuffle_epi8(bgra0, mask); + __m512i b1g1r1a1 = _mm512_shuffle_epi8(bgra1, mask); + __m512i b2g2r2a2 = _mm512_shuffle_epi8(bgra2, mask); + __m512i b3g3r3a3 = _mm512_shuffle_epi8(bgra3, mask); + + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi32(b0g0r0a0, mask0, b1g1r1a1); + __m512i ga01 = _mm512_permutex2var_epi32(b0g0r0a0, mask1, b1g1r1a1); + __m512i br23 = _mm512_permutex2var_epi32(b2g2r2a2, mask0, b3g3r3a3); + __m512i ga23 = _mm512_permutex2var_epi32(b2g2r2a2, mask1, b3g3r3a3); + + a = v_uint8x64(_mm512_permutex2var_epi32(br01, mask0, br23)); + c = v_uint8x64(_mm512_permutex2var_epi32(br01, mask1, br23)); + b = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask0, ga23)); + d = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask1, ga23)); +#endif +} + +inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c, v_uint16x32& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 64)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 96)); + + __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, + 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, + 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi16(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi16(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi16(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi16(bgra2, mask1, bgra3); + + a = v_uint16x32(_mm512_permutex2var_epi16(br01, mask0, br23)); + c = v_uint16x32(_mm512_permutex2var_epi16(br01, mask1, br23)); + b = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask0, ga23)); + d = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask1, ga23)); +} + +inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c, v_uint32x16& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 32)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 48)); + + __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi32(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi32(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi32(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi32(bgra2, mask1, bgra3); + + a = v_uint32x16(_mm512_permutex2var_epi32(br01, mask0, br23)); + c = v_uint32x16(_mm512_permutex2var_epi32(br01, mask1, br23)); + b = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask0, ga23)); + d = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask1, ga23)); +} + +inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c, v_uint64x8& d ) +{ + __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr); + __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 8)); + __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 16)); + __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 24)); + + __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1); + + __m512i br01 = _mm512_permutex2var_epi64(bgra0, mask0, bgra1); + __m512i ga01 = _mm512_permutex2var_epi64(bgra0, mask1, bgra1); + __m512i br23 = _mm512_permutex2var_epi64(bgra2, mask0, bgra3); + __m512i ga23 = _mm512_permutex2var_epi64(bgra2, mask1, bgra3); + + a = v_uint64x8(_mm512_permutex2var_epi64(br01, mask0, br23)); + c = v_uint64x8(_mm512_permutex2var_epi64(br01, mask1, br23)); + b = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask0, ga23)); + d = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask1, ga23)); +} + +///////////////////////////// store interleave ///////////////////////////////////// + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& x, const v_uint8x64& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint8x64 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 64), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 64), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), high.val); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& x, const v_uint16x32& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint16x32 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 32), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 32), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), high.val); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& x, const v_uint32x16& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint32x16 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 16), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 16), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), high.val); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& x, const v_uint64x8& y, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint64x8 low, high; + v_zip(x, y, low, high); + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, low.val); + _mm512_stream_si512((__m512i*)(ptr + 8), high.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, low.val); + _mm512_store_si512((__m512i*)(ptr + 8), high.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, low.val); + _mm512_storeu_si512((__m512i*)(ptr + 8), high.val); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b, const v_uint8x64& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ +#if CV_AVX_512VBMI + __m512i mask0 = _v512_set_epu8(127, 84, 20, 126, 83, 19, 125, 82, 18, 124, 81, 17, 123, 80, 16, 122, + 79, 15, 121, 78, 14, 120, 77, 13, 119, 76, 12, 118, 75, 11, 117, 74, + 10, 116, 73, 9, 115, 72, 8, 114, 71, 7, 113, 70, 6, 112, 69, 5, + 111, 68, 4, 110, 67, 3, 109, 66, 2, 108, 65, 1, 107, 64, 0, 106); + __m512i mask1 = _v512_set_epu8( 21, 42, 105, 20, 41, 104, 19, 40, 103, 18, 39, 102, 17, 38, 101, 16, + 37, 100, 15, 36, 99, 14, 35, 98, 13, 34, 97, 12, 33, 96, 11, 32, + 95, 10, 31, 94, 9, 30, 93, 8, 29, 92, 7, 28, 91, 6, 27, 90, + 5, 26, 89, 4, 25, 88, 3, 24, 87, 2, 23, 86, 1, 22, 85, 0); + __m512i mask2 = _v512_set_epu8(106, 127, 63, 105, 126, 62, 104, 125, 61, 103, 124, 60, 102, 123, 59, 101, + 122, 58, 100, 121, 57, 99, 120, 56, 98, 119, 55, 97, 118, 54, 96, 117, + 53, 95, 116, 52, 94, 115, 51, 93, 114, 50, 92, 113, 49, 91, 112, 48, + 90, 111, 47, 89, 110, 46, 88, 109, 45, 87, 108, 44, 86, 107, 43, 85); + __m512i r2g0r0 = _mm512_permutex2var_epi8(b.val, mask0, c.val); + __m512i b0r1b1 = _mm512_permutex2var_epi8(a.val, mask1, c.val); + __m512i g1b2g2 = _mm512_permutex2var_epi8(a.val, mask2, b.val); + + __m512i bgr0 = _mm512_mask_blend_epi8(0x9249249249249249, r2g0r0, b0r1b1); + __m512i bgr1 = _mm512_mask_blend_epi8(0x9249249249249249, b0r1b1, g1b2g2); + __m512i bgr2 = _mm512_mask_blend_epi8(0x9249249249249249, g1b2g2, r2g0r0); +#else + __m512i g1g0 = _mm512_shuffle_epi8(b.val, _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001)); + __m512i b0g0 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, g1g0); + __m512i r0b1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, c.val, a.val); + __m512i g1r1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1g0, c.val); + + __m512i mask0 = _v512_set_epu16(42, 10, 31, 41, 9, 30, 40, 8, 29, 39, 7, 28, 38, 6, 27, 37, + 5, 26, 36, 4, 25, 35, 3, 24, 34, 2, 23, 33, 1, 22, 32, 0); + __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16, + 47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42); + __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58, + 26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21); + __m512i b0g0b2 = _mm512_permutex2var_epi16(b0g0, mask0, r0b1); + __m512i r1b1r0 = _mm512_permutex2var_epi16(b0g0, mask1, g1r1); + __m512i g2r2g1 = _mm512_permutex2var_epi16(r0b1, mask2, g1r1); + + __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0); + __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1); + __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2); +#endif + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 64), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 128), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 64), bgr1); + _mm512_store_si512((__m512i*)(ptr + 128), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 128), bgr2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b, const v_uint16x32& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu16(42, 10, 31, 41, 9, 30, 40, 8, 29, 39, 7, 28, 38, 6, 27, 37, + 5, 26, 36, 4, 25, 35, 3, 24, 34, 2, 23, 33, 1, 22, 32, 0); + __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16, + 47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42); + __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58, + 26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21); + __m512i b0g0b2 = _mm512_permutex2var_epi16(a.val, mask0, b.val); + __m512i r1b1r0 = _mm512_permutex2var_epi16(a.val, mask1, c.val); + __m512i g2r2g1 = _mm512_permutex2var_epi16(b.val, mask2, c.val); + + __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0); + __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1); + __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 32), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 64), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 32), bgr1); + _mm512_store_si512((__m512i*)(ptr + 64), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgr2); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b, const v_uint32x16& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu32(26, 31, 15, 25, 30, 14, 24, 29, 13, 23, 28, 12, 22, 27, 11, 21); + __m512i mask1 = _v512_set_epu32(31, 10, 25, 30, 9, 24, 29, 8, 23, 28, 7, 22, 27, 6, 21, 26); + __m512i g1b2g2 = _mm512_permutex2var_epi32(a.val, mask0, b.val); + __m512i r2r1b1 = _mm512_permutex2var_epi32(a.val, mask1, c.val); + + __m512i bgr0 = _mm512_mask_expand_epi32(_mm512_mask_expand_epi32(_mm512_maskz_expand_epi32(0x9249, a.val), 0x2492, b.val), 0x4924, c.val); + __m512i bgr1 = _mm512_mask_blend_epi32(0x9249, r2r1b1, g1b2g2); + __m512i bgr2 = _mm512_mask_blend_epi32(0x9249, g1b2g2, r2r1b1); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 16), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 32), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 16), bgr1); + _mm512_store_si512((__m512i*)(ptr + 32), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgr2); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + __m512i mask0 = _v512_set_epu64( 5, 12, 7, 4, 11, 6, 3, 10); + __m512i mask1 = _v512_set_epu64(15, 7, 4, 14, 6, 3, 13, 5); + __m512i r1b1b2 = _mm512_permutex2var_epi64(a.val, mask0, c.val); + __m512i g2r2g1 = _mm512_permutex2var_epi64(b.val, mask1, c.val); + + __m512i bgr0 = _mm512_mask_expand_epi64(_mm512_mask_expand_epi64(_mm512_maskz_expand_epi64(0x49, a.val), 0x92, b.val), 0x24, c.val); + __m512i bgr1 = _mm512_mask_blend_epi64(0xdb, g2r2g1, r1b1b2); + __m512i bgr2 = _mm512_mask_blend_epi64(0xdb, r1b1b2, g2r2g1); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgr0); + _mm512_stream_si512((__m512i*)(ptr + 8), bgr1); + _mm512_stream_si512((__m512i*)(ptr + 16), bgr2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgr0); + _mm512_store_si512((__m512i*)(ptr + 8), bgr1); + _mm512_store_si512((__m512i*)(ptr + 16), bgr2); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgr0); + _mm512_storeu_si512((__m512i*)(ptr + 8), bgr1); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgr2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b, + const v_uint8x64& c, const v_uint8x64& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint8x64 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint8x64 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 192), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 192), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 128), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 192), bgra3.val); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b, + const v_uint16x32& c, const v_uint16x32& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint16x32 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint16x32 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 96), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 96), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 64), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 96), bgra3.val); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b, + const v_uint32x16& c, const v_uint32x16& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint32x16 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint32x16 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 48), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 48), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 32), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 48), bgra3.val); + } +} + +inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b, + const v_uint64x8& c, const v_uint64x8& d, + hal::StoreMode mode=hal::STORE_UNALIGNED ) +{ + v_uint64x8 br01, br23, ga01, ga23; + v_zip(a, c, br01, br23); + v_zip(b, d, ga01, ga23); + v_uint64x8 bgra0, bgra1, bgra2, bgra3; + v_zip(br01, ga01, bgra0, bgra1); + v_zip(br23, ga23, bgra2, bgra3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm512_stream_si512((__m512i*)ptr, bgra0.val); + _mm512_stream_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_stream_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_stream_si512((__m512i*)(ptr + 24), bgra3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm512_store_si512((__m512i*)ptr, bgra0.val); + _mm512_store_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_store_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_store_si512((__m512i*)(ptr + 24), bgra3.val); + } + else + { + _mm512_storeu_si512((__m512i*)ptr, bgra0.val); + _mm512_storeu_si512((__m512i*)(ptr + 8), bgra1.val); + _mm512_storeu_si512((__m512i*)(ptr + 16), bgra2.val); + _mm512_storeu_si512((__m512i*)(ptr + 24), bgra3.val); + } +} + +#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int8x64, schar, s8, v_uint8x64, uchar, u8) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int16x32, short, s16, v_uint16x32, ushort, u16) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int32x16, int, s32, v_uint32x16, unsigned, u32) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float32x16, float, f32, v_uint32x16, unsigned, u32) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int64x8, int64, s64, v_uint64x8, uint64, u64) +OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float64x8, double, f64, v_uint64x8, uint64, u64) + +////////// Mask and checks ///////// + +/** Mask **/ +inline int64 v_signmask(const v_int8x64& a) { return (int64)_mm512_movepi8_mask(a.val); } +inline int v_signmask(const v_int16x32& a) { return (int)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline int v_signmask(const v_int32x16& a) { return (int)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline int v_signmask(const v_int64x8& a) { return (int)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } + +inline int64 v_signmask(const v_uint8x64& a) { return v_signmask(v_reinterpret_as_s8(a)); } +inline int v_signmask(const v_uint16x32& a) { return v_signmask(v_reinterpret_as_s16(a)); } +inline int v_signmask(const v_uint32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_uint64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); } +inline int v_signmask(const v_float32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); } + +/** Checks **/ +inline bool v_check_all(const v_int8x64& a) { return !(bool)_mm512_cmp_epi8_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int8x64& a) { return (bool)_mm512_movepi8_mask(a.val); } +inline bool v_check_all(const v_int16x32& a) { return !(bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int16x32& a) { return (bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline bool v_check_all(const v_int32x16& a) { return !(bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int32x16& a) { return (bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } +inline bool v_check_all(const v_int64x8& a) { return !(bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); } +inline bool v_check_any(const v_int64x8& a) { return (bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); } + +inline bool v_check_all(const v_float32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_float32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_float64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_float64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); } +inline bool v_check_all(const v_uint8x64& a) { return v_check_all(v_reinterpret_as_s8(a)); } +inline bool v_check_all(const v_uint16x32& a) { return v_check_all(v_reinterpret_as_s16(a)); } +inline bool v_check_all(const v_uint32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_uint64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_uint8x64& a) { return v_check_any(v_reinterpret_as_s8(a)); } +inline bool v_check_any(const v_uint16x32& a) { return v_check_any(v_reinterpret_as_s16(a)); } +inline bool v_check_any(const v_uint32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_uint64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); } + +inline int v_scan_forward(const v_int8x64& a) +{ + int64 mask = _mm512_movepi8_mask(a.val); + int mask32 = (int)mask; + return mask != 0 ? mask32 != 0 ? trailingZeros32(mask32) : 32 + trailingZeros32((int)(mask >> 32)) : 0; +} +inline int v_scan_forward(const v_uint8x64& a) { return v_scan_forward(v_reinterpret_as_s8(a)); } +inline int v_scan_forward(const v_int16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); } +inline int v_scan_forward(const v_uint16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); } +inline int v_scan_forward(const v_int32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_uint32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_float32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; } +inline int v_scan_forward(const v_int64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } +inline int v_scan_forward(const v_uint64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } +inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; } + +inline void v512_cleanup() { _mm256_zeroall(); } + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_AVX_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_cpp.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_cpp.hpp new file mode 100644 index 0000000..4622214 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_cpp.hpp @@ -0,0 +1,3320 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_INTRIN_CPP_HPP +#define OPENCV_HAL_INTRIN_CPP_HPP + +#include +#include +#include +#include "opencv2/core/saturate.hpp" + +//! @cond IGNORED +#define CV_SIMD128_CPP 1 +#if defined(CV_FORCE_SIMD128_CPP) +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +#endif +#if defined(CV_DOXYGEN) +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +#define CV_SIMD256 1 +#define CV_SIMD256_64F 1 +#define CV_SIMD512 1 +#define CV_SIMD512_64F 1 +#else +#define CV_SIMD256 0 // Explicitly disable SIMD256 and SIMD512 support for scalar intrinsic implementation +#define CV_SIMD512 0 // to avoid warnings during compilation +#endif +//! @endcond + +namespace cv +{ + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + +/** @addtogroup core_hal_intrin + +"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on +different platforms. Currently a few different SIMD extensions on different architectures are supported. +128 bit registers of various types support is implemented for a wide range of architectures +including x86(__SSE/SSE2/SSE4.2__), ARM(__NEON__), PowerPC(__VSX__), MIPS(__MSA__). +256 bit long registers are supported on x86(__AVX2__) and 512 bit long registers are supported on x86(__AVX512__). +In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics +will be chosen and code will work as expected although it could be slower. + +### Types + +There are several types representing packed values vector registers, each type is +implemented as a structure based on a one SIMD register. + +- cv::v_uint8 and cv::v_int8: 8-bit integer values (unsigned/signed) - char +- cv::v_uint16 and cv::v_int16: 16-bit integer values (unsigned/signed) - short +- cv::v_uint32 and cv::v_int32: 32-bit integer values (unsigned/signed) - int +- cv::v_uint64 and cv::v_int64: 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32: 32-bit floating point values (signed) - float +- cv::v_float64: 64-bit floating point values (signed) - double + +Exact bit length(and value quantity) of listed types is compile time deduced and depends on architecture SIMD +capabilities chosen as available during compilation of the library. All the types contains __nlanes__ enumeration +to check for exact value quantity of the type. + +In case the exact bit length of the type is important it is possible to use specific fixed length register types. + +There are several types representing 128-bit registers. + +- cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char +- cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short +- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int +- cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32x4: four 32-bit floating point values (signed) - float +- cv::v_float64x2: two 64-bit floating point values (signed) - double + +There are several types representing 256-bit registers. + +- cv::v_uint8x32 and cv::v_int8x32: thirty two 8-bit integer values (unsigned/signed) - char +- cv::v_uint16x16 and cv::v_int16x16: sixteen 16-bit integer values (unsigned/signed) - short +- cv::v_uint32x8 and cv::v_int32x8: eight 32-bit integer values (unsigned/signed) - int +- cv::v_uint64x4 and cv::v_int64x4: four 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32x8: eight 32-bit floating point values (signed) - float +- cv::v_float64x4: four 64-bit floating point values (signed) - double + +@note +256 bit registers at the moment implemented for AVX2 SIMD extension only, if you want to use this type directly, +don't forget to check the CV_SIMD256 preprocessor definition: +@code +#if CV_SIMD256 +//... +#endif +@endcode + +There are several types representing 512-bit registers. + +- cv::v_uint8x64 and cv::v_int8x64: sixty four 8-bit integer values (unsigned/signed) - char +- cv::v_uint16x32 and cv::v_int16x32: thirty two 16-bit integer values (unsigned/signed) - short +- cv::v_uint32x16 and cv::v_int32x16: sixteen 32-bit integer values (unsigned/signed) - int +- cv::v_uint64x8 and cv::v_int64x8: eight 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32x16: sixteen 32-bit floating point values (signed) - float +- cv::v_float64x8: eight 64-bit floating point values (signed) - double +@note +512 bit registers at the moment implemented for AVX512 SIMD extension only, if you want to use this type directly, +don't forget to check the CV_SIMD512 preprocessor definition. + +@note +cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to +check the CV_SIMD128_64F preprocessor definition. + +### Load and store operations + +These operations allow to set contents of the register explicitly or by loading it from some memory +block and to save contents of the register to memory block. + +There are variable size register load operations that provide result of maximum available size +depending on chosen platform capabilities. +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +- Other create methods: +vx_setall_s8, vx_setall_u8, ..., +vx_setzero_u8, vx_setzero_s8, ... +- Memory load operations: +vx_load, vx_load_aligned, vx_load_low, vx_load_halves, +- Memory operations with expansion of values: +vx_load_expand, vx_load_expand_q + +Also there are fixed size register load/store operations. + +For 128 bit registers +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +@ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ... +- Other create methods: +@ref v_setall_s8, @ref v_setall_u8, ..., +@ref v_setzero_u8, @ref v_setzero_s8, ... +- Memory load operations: +@ref v_load, @ref v_load_aligned, @ref v_load_low, @ref v_load_halves, +- Memory operations with expansion of values: +@ref v_load_expand, @ref v_load_expand_q + +For 256 bit registers(check CV_SIMD256 preprocessor definition) +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +@ref v_reg::v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) "from four values", ... +- Other create methods: +@ref v256_setall_s8, @ref v256_setall_u8, ..., +@ref v256_setzero_u8, @ref v256_setzero_s8, ... +- Memory load operations: +@ref v256_load, @ref v256_load_aligned, @ref v256_load_low, @ref v256_load_halves, +- Memory operations with expansion of values: +@ref v256_load_expand, @ref v256_load_expand_q + +For 512 bit registers(check CV_SIMD512 preprocessor definition) +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +@ref v_reg::v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7) "from eight values", ... +- Other create methods: +@ref v512_setall_s8, @ref v512_setall_u8, ..., +@ref v512_setzero_u8, @ref v512_setzero_s8, ... +- Memory load operations: +@ref v512_load, @ref v512_load_aligned, @ref v512_load_low, @ref v512_load_halves, +- Memory operations with expansion of values: +@ref v512_load_expand, @ref v512_load_expand_q + +Store to memory operations are similar across different platform capabilities: +@ref v_store, @ref v_store_aligned, +@ref v_store_high, @ref v_store_low + +### Value reordering + +These operations allow to reorder or recombine elements in one or multiple vectors. + +- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave +- Expand: @ref v_expand, @ref v_expand_low, @ref v_expand_high +- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u, +@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store +- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high +- Reverse: @ref v_reverse +- Extract: @ref v_extract + + +### Arithmetic, bitwise and comparison operations + +Element-wise binary and unary operations. + +- Arithmetics: +@ref operator +(const v_reg &a, const v_reg &b) "+", +@ref operator -(const v_reg &a, const v_reg &b) "-", +@ref operator *(const v_reg &a, const v_reg &b) "*", +@ref operator /(const v_reg &a, const v_reg &b) "/", +@ref v_mul_expand + +- Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap + +- Bitwise shifts: +@ref operator <<(const v_reg &a, int s) "<<", +@ref operator >>(const v_reg &a, int s) ">>", +@ref v_shl, @ref v_shr + +- Bitwise logic: +@ref operator &(const v_reg &a, const v_reg &b) "&", +@ref operator |(const v_reg &a, const v_reg &b) "|", +@ref operator ^(const v_reg &a, const v_reg &b) "^", +@ref operator ~(const v_reg &a) "~" + +- Comparison: +@ref operator >(const v_reg &a, const v_reg &b) ">", +@ref operator >=(const v_reg &a, const v_reg &b) ">=", +@ref operator <(const v_reg &a, const v_reg &b) "<", +@ref operator <=(const v_reg &a, const v_reg &b) "<=", +@ref operator ==(const v_reg &a, const v_reg &b) "==", +@ref operator !=(const v_reg &a, const v_reg &b) "!=" + +- min/max: @ref v_min, @ref v_max + +### Reduce and mask + +Most of these operations return only one value. + +- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum, @ref v_popcount +- Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select + +### Other math + +- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude +- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs + +### Conversions + +Different type conversions and casts: + +- Rounding: @ref v_round, @ref v_floor, @ref v_ceil, @ref v_trunc, +- To float: @ref v_cvt_f32, @ref v_cvt_f64 +- Reinterpret: @ref v_reinterpret_as_u8, @ref v_reinterpret_as_s8, ... + +### Matrix operations + +In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_dotprod_fast, +@ref v_dotprod_expand, @ref v_dotprod_expand_fast, @ref v_matmul, @ref v_transpose4x4 + +### Usability + +Most operations are implemented only for some subset of the available types, following matrices +shows the applicability of different operations to the types. + +Regular integers: + +| Operations\\Types | uint 8 | int 8 | uint 16 | int 16 | uint 32 | int 32 | +|-------------------|:-:|:-:|:-:|:-:|:-:|:-:| +|load, store | x | x | x | x | x | x | +|interleave | x | x | x | x | x | x | +|expand | x | x | x | x | x | x | +|expand_low | x | x | x | x | x | x | +|expand_high | x | x | x | x | x | x | +|expand_q | x | x | | | | | +|add, sub | x | x | x | x | x | x | +|add_wrap, sub_wrap | x | x | x | x | | | +|mul_wrap | x | x | x | x | | | +|mul | x | x | x | x | x | x | +|mul_expand | x | x | x | x | x | | +|compare | x | x | x | x | x | x | +|shift | | | x | x | x | x | +|dotprod | | | | x | | x | +|dotprod_fast | | | | x | | x | +|dotprod_expand | x | x | x | x | | x | +|dotprod_expand_fast| x | x | x | x | | x | +|logical | x | x | x | x | x | x | +|min, max | x | x | x | x | x | x | +|absdiff | x | x | x | x | x | x | +|absdiffs | | x | | x | | | +|reduce | x | x | x | x | x | x | +|mask | x | x | x | x | x | x | +|pack | x | x | x | x | x | x | +|pack_u | x | | x | | | | +|pack_b | x | | | | | | +|unpack | x | x | x | x | x | x | +|extract | x | x | x | x | x | x | +|rotate (lanes) | x | x | x | x | x | x | +|cvt_flt32 | | | | | | x | +|cvt_flt64 | | | | | | x | +|transpose4x4 | | | | | x | x | +|reverse | x | x | x | x | x | x | +|extract_n | x | x | x | x | x | x | +|broadcast_element | | | | | x | x | + +Big integers: + +| Operations\\Types | uint 64 | int 64 | +|-------------------|:-:|:-:| +|load, store | x | x | +|add, sub | x | x | +|shift | x | x | +|logical | x | x | +|reverse | x | x | +|extract | x | x | +|rotate (lanes) | x | x | +|cvt_flt64 | | x | +|extract_n | x | x | + +Floating point: + +| Operations\\Types | float 32 | float 64 | +|-------------------|:-:|:-:| +|load, store | x | x | +|interleave | x | | +|add, sub | x | x | +|mul | x | x | +|div | x | x | +|compare | x | x | +|min, max | x | x | +|absdiff | x | x | +|reduce | x | | +|mask | x | x | +|unpack | x | x | +|cvt_flt32 | | x | +|cvt_flt64 | x | | +|sqrt, abs | x | x | +|float math | x | x | +|transpose4x4 | x | | +|extract | x | x | +|rotate (lanes) | x | x | +|reverse | x | x | +|extract_n | x | x | +|broadcast_element | x | | + + @{ */ + +template struct v_reg +{ +//! @cond IGNORED + typedef _Tp lane_type; + enum { nlanes = n }; +// !@endcond + + /** @brief Constructor + + Initializes register with data from memory + @param ptr pointer to memory block with data for register */ + explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } + + /** @brief Constructor + + Initializes register with two 64-bit values */ + v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } + + /** @brief Constructor + + Initializes register with four 32-bit values */ + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } + + /** @brief Constructor + + Initializes register with eight 16-bit values */ + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + } + + /** @brief Constructor + + Initializes register with sixteen 8-bit values */ + v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, + _Tp s4, _Tp s5, _Tp s6, _Tp s7, + _Tp s8, _Tp s9, _Tp s10, _Tp s11, + _Tp s12, _Tp s13, _Tp s14, _Tp s15) + { + s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; + s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; + s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; + s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; + } + + /** @brief Default constructor + + Does not initialize anything*/ + v_reg() {} + + /** @brief Copy constructor */ + v_reg(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + } + /** @brief Access first value + + Returns value of the first lane according to register type, for example: + @code{.cpp} + v_int32x4 r(1, 2, 3, 4); + int v = r.get0(); // returns 1 + v_uint64x2 r(1, 2); + uint64_t v = r.get0(); // returns 1 + @endcode + */ + _Tp get0() const { return s[0]; } + +//! @cond IGNORED + _Tp get(const int i) const { return s[i]; } + v_reg<_Tp, n> high() const + { + v_reg<_Tp, n> c; + int i; + for( i = 0; i < n/2; i++ ) + { + c.s[i] = s[i+(n/2)]; + c.s[i+(n/2)] = 0; + } + return c; + } + + static v_reg<_Tp, n> zero() + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = (_Tp)0; + return c; + } + + static v_reg<_Tp, n> all(_Tp s) + { + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = s; + return c; + } + + template v_reg<_Tp2, n2> reinterpret_as() const + { + size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); + v_reg<_Tp2, n2> c; + std::memcpy(&c.s[0], &s[0], bytes); + return c; + } + + v_reg& operator=(const v_reg<_Tp, n> & r) + { + for( int i = 0; i < n; i++ ) + s[i] = r.s[i]; + return *this; + } + + _Tp s[n]; +//! @endcond +}; + +/** @brief Sixteen 8-bit unsigned integer values */ +typedef v_reg v_uint8x16; +/** @brief Sixteen 8-bit signed integer values */ +typedef v_reg v_int8x16; +/** @brief Eight 16-bit unsigned integer values */ +typedef v_reg v_uint16x8; +/** @brief Eight 16-bit signed integer values */ +typedef v_reg v_int16x8; +/** @brief Four 32-bit unsigned integer values */ +typedef v_reg v_uint32x4; +/** @brief Four 32-bit signed integer values */ +typedef v_reg v_int32x4; +/** @brief Four 32-bit floating point values (single precision) */ +typedef v_reg v_float32x4; +/** @brief Two 64-bit floating point values (double precision) */ +typedef v_reg v_float64x2; +/** @brief Two 64-bit unsigned integer values */ +typedef v_reg v_uint64x2; +/** @brief Two 64-bit signed integer values */ +typedef v_reg v_int64x2; + +#if CV_SIMD256 +/** @brief Thirty two 8-bit unsigned integer values */ +typedef v_reg v_uint8x32; +/** @brief Thirty two 8-bit signed integer values */ +typedef v_reg v_int8x32; +/** @brief Sixteen 16-bit unsigned integer values */ +typedef v_reg v_uint16x16; +/** @brief Sixteen 16-bit signed integer values */ +typedef v_reg v_int16x16; +/** @brief Eight 32-bit unsigned integer values */ +typedef v_reg v_uint32x8; +/** @brief Eight 32-bit signed integer values */ +typedef v_reg v_int32x8; +/** @brief Eight 32-bit floating point values (single precision) */ +typedef v_reg v_float32x8; +/** @brief Four 64-bit floating point values (double precision) */ +typedef v_reg v_float64x4; +/** @brief Four 64-bit unsigned integer values */ +typedef v_reg v_uint64x4; +/** @brief Four 64-bit signed integer values */ +typedef v_reg v_int64x4; +#endif + +#if CV_SIMD512 +/** @brief Sixty four 8-bit unsigned integer values */ +typedef v_reg v_uint8x64; +/** @brief Sixty four 8-bit signed integer values */ +typedef v_reg v_int8x64; +/** @brief Thirty two 16-bit unsigned integer values */ +typedef v_reg v_uint16x32; +/** @brief Thirty two 16-bit signed integer values */ +typedef v_reg v_int16x32; +/** @brief Sixteen 32-bit unsigned integer values */ +typedef v_reg v_uint32x16; +/** @brief Sixteen 32-bit signed integer values */ +typedef v_reg v_int32x16; +/** @brief Sixteen 32-bit floating point values (single precision) */ +typedef v_reg v_float32x16; +/** @brief Eight 64-bit floating point values (double precision) */ +typedef v_reg v_float64x8; +/** @brief Eight 64-bit unsigned integer values */ +typedef v_reg v_uint64x8; +/** @brief Eight 64-bit signed integer values */ +typedef v_reg v_int64x8; +#endif + +enum { + simd128_width = 16, +#if CV_SIMD256 + simd256_width = 32, +#endif +#if CV_SIMD512 + simd512_width = 64, + simdmax_width = simd512_width +#elif CV_SIMD256 + simdmax_width = simd256_width +#else + simdmax_width = simd128_width +#endif +}; + +/** @brief Add values + +For all types. */ +template CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Subtract values + +For all types. */ +template CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Multiply values + +For 16- and 32-bit integer types and floating types. */ +template CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Divide values + +For floating types only. */ +template CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + + +/** @brief Bitwise AND + +Only for integer types. */ +template CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise OR + +Only for integer types. */ +template CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise XOR + +Only for integer types.*/ +template CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); +template CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); + +/** @brief Bitwise NOT + +Only for integer types.*/ +template CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a); + + +#ifndef CV_DOXYGEN + +#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \ +__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(short, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(int, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \ + +#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \ +__CV_EXPAND(macro_name(float, __VA_ARGS__)) \ +__CV_EXPAND(macro_name(double, __VA_ARGS__)) \ + +#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \ +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \ +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \ + +#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \ +template inline \ +v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return c; \ +} \ +template inline \ +v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ + return a; \ +} + +#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op) + +CV__HAL_INTRIN_IMPL_BIN_OP(+) +CV__HAL_INTRIN_IMPL_BIN_OP(-) +CV__HAL_INTRIN_IMPL_BIN_OP(*) +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /) + +#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \ +template CV_INLINE \ +v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return c; \ +} \ +template CV_INLINE \ +v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + for( int i = 0; i < n; i++ ) \ + a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ + V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ + return a; \ +} + +#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \ +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \ +CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */ + + +CV__HAL_INTRIN_IMPL_BIT_OP(&) +CV__HAL_INTRIN_IMPL_BIT_OP(|) +CV__HAL_INTRIN_IMPL_BIT_OP(^) + +#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \ +template CV_INLINE \ +v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \ + return c; \ +} \ + +CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~) + +#endif // !CV_DOXYGEN + + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ +template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp2, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i]); \ + return c; \ +} + +/** @brief Square root of elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) + +//! @cond IGNORED +OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) +OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) +//! @endcond + +/** @brief Absolute value of elements + +Only for floating point types.*/ +OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, + typename V_TypeTraits<_Tp>::abs_type) + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ +template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cfunc(a.s[i], b.s[i]); \ + return c; \ +} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ +template inline _Tp func(const v_reg<_Tp, n>& a) \ +{ \ + _Tp c = a.s[0]; \ + for( int i = 1; i < n; i++ ) \ + c = cfunc(c, a.s[i]); \ + return c; \ +} + +/** @brief Choose min values for each pair + +Scheme: +@code +{A1 A2 ...} +{B1 B2 ...} +-------------- +{min(A1,B1) min(A2,B2) ...} +@endcode +For all types except 64-bit integer. */ +OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) + +/** @brief Choose max values for each pair + +Scheme: +@code +{A1 A2 ...} +{B1 B2 ...} +-------------- +{max(A1,B1) max(A2,B2) ...} +@endcode +For all types except 64-bit integer. */ +OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) + +/** @brief Find one min value + +Scheme: +@code +{A1 A2 A3 ...} => min(A1,A2,A3,...) +@endcode +For all types except 64-bit integer and 64-bit floating point types. */ +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) + +/** @brief Find one max value + +Scheme: +@code +{A1 A2 A3 ...} => max(A1,A2,A3,...) +@endcode +For all types except 64-bit integer and 64-bit floating point types. */ +OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) + +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; +/** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type + +Scheme: +@code +{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...} +@endcode +For all integer types. */ +template +inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) +{ + v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); + for (int i = 0; i < n*(int)sizeof(_Tp); i++) + b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; + return b; +} + + +//! @cond IGNORED +template +inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) +{ + for( int i = 0; i < n; i++ ) + { + minval.s[i] = std::min(a.s[i], b.s[i]); + maxval.s[i] = std::max(a.s[i], b.s[i]); + } +} +//! @endcond + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ +template \ +inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef typename V_TypeTraits<_Tp>::int_type itype; \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ + return c; \ +} + +/** @brief Less-than comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(<) + +/** @brief Greater-than comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(>) + +/** @brief Less-than or equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(<=) + +/** @brief Greater-than or equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(>=) + +/** @brief Equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(==) + +/** @brief Not equal comparison + +For all types except 64-bit integer values. */ +OPENCV_HAL_IMPL_CMP_OP(!=) + +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} +template +inline v_reg v_not_nan(const v_reg& a) +{ + typedef typename V_TypeTraits::int_type itype; + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); + return c; +} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ +template \ +inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + typedef _Tp2 rtype; \ + v_reg c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ + return c; \ +} + +/** @brief Add values without saturation + +For 8- and 16-bit integer values. */ +OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) + +/** @brief Subtract values without saturation + +For 8- and 16-bit integer values. */ +OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) + +/** @brief Multiply values without saturation + +For 8- and 16-bit integer values. */ +OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) + +//! @cond IGNORED +template inline T _absdiff(T a, T b) +{ + return a > b ? a - b : b - a; +} +//! @endcond + +/** @brief Absolute difference + +Returns \f$ |a - b| \f$ converted to corresponding unsigned type. +Example: +@code{.cpp} +v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1} +v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3} +@endcode +For 8-, 16-, 32-bit integer source types. */ +template +inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) +{ + typedef typename V_TypeTraits<_Tp>::abs_type rtype; + v_reg c; + const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); + for( int i = 0; i < n; i++ ) + { + rtype ua = a.s[i] ^ mask; + rtype ub = b.s[i] ^ mask; + c.s[i] = _absdiff(ua, ub); + } + return c; +} + +/** @overload + +For 32-bit floating point values */ +template inline v_reg v_absdiff(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +/** @overload + +For 64-bit floating point values */ +template inline v_reg v_absdiff(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < c.nlanes; i++ ) + c.s[i] = _absdiff(a.s[i], b.s[i]); + return c; +} + +/** @brief Saturating absolute difference + +Returns \f$ saturate(|a - b|) \f$ . +For 8-, 16-bit signed integer source types. */ +template +inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++) + c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); + return c; +} + +/** @brief Inversed square root + +Returns \f$ 1/sqrt(a) \f$ +For floating point types only. */ +template +inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = 1.f/std::sqrt(a.s[i]); + return c; +} + +/** @brief Magnitude + +Returns \f$ sqrt(a^2 + b^2) \f$ +For floating point types only. */ +template +inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); + return c; +} + +/** @brief Square of the magnitude + +Returns \f$ a^2 + b^2 \f$ +For floating point types only. */ +template +inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; + return c; +} + +/** @brief Multiply and add + + Returns \f$ a*b + c \f$ + For floating point types and signed 32bit int only. */ +template +inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + v_reg<_Tp, n> d; + for( int i = 0; i < n; i++ ) + d.s[i] = a.s[i]*b.s[i] + c.s[i]; + return d; +} + +/** @brief A synonym for v_fma */ +template +inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg<_Tp, n>& c) +{ + return v_fma(a, b, c); +} + +/** @brief Dot product of elements + +Multiply values in two registers and sum adjacent result pairs. + +Scheme: +@code + {A1 A2 ...} // 16-bit +x {B1 B2 ...} // 16-bit +------------- +{A1B1+A2B2 ...} // 32-bit + +@endcode +*/ +template inline v_reg::w_type, n/2> +v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; + return c; +} + +/** @brief Dot product of elements + +Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs. +Scheme: +@code + {A1 A2 ...} // 16-bit +x {B1 B2 ...} // 16-bit +------------- + {A1B1+A2B2+C1 ...} // 32-bit +@endcode +*/ +template inline v_reg::w_type, n/2> +v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::w_type, n / 2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg s; + for( int i = 0; i < (n/2); i++ ) + s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; + return s; +} + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod, but it may perform unorder sum between result pairs in some platforms, +this intrinsic can be used if the sum among all lanes is only matters +and also it should be yielding better performance on the affected platforms. + +*/ +template inline v_reg::w_type, n/2> +v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ return v_dotprod(a, b); } + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod_fast, but add a third element to the sum of adjacent pairs. +*/ +template inline v_reg::w_type, n/2> +v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::w_type, n / 2>& c) +{ return v_dotprod(a, b, c); } + +/** @brief Dot product of elements and expand + +Multiply values in two registers and expand the sum of adjacent result pairs. + +Scheme: +@code + {A1 A2 A3 A4 ...} // 8-bit +x {B1 B2 B3 B4 ...} // 8-bit +------------- + {A1B1+A2B2+A3B3+A4B4 ...} // 32-bit + +@endcode +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; + return s; +} + +/** @brief Dot product of elements + +Same as cv::v_dotprod_expand, but add a third element to the sum of adjacent pairs. +Scheme: +@code + {A1 A2 A3 A4 ...} // 8-bit +x {B1 B2 B3 B4 ...} // 8-bit +------------- + {A1B1+A2B2+A3B3+A4B4+C1 ...} // 32-bit +@endcode +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg s; + for( int i = 0; i < (n/4); i++ ) + s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + + (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; + return s; +} + +/** @brief Fast Dot product of elements and expand + +Multiply values in two registers and expand the sum of adjacent result pairs. + +Same as cv::v_dotprod_expand, but it may perform unorder sum between result pairs in some platforms, +this intrinsic can be used if the sum among all lanes is only matters +and also it should be yielding better performance on the affected platforms. + +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ return v_dotprod_expand(a, b); } + +/** @brief Fast Dot product of elements + +Same as cv::v_dotprod_expand_fast, but add a third element to the sum of adjacent pairs. +*/ +template inline v_reg::q_type, n/4> +v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + const v_reg::q_type, n / 4>& c) +{ return v_dotprod_expand(a, b, c); } + +/** @brief Multiply and expand + +Multiply values two registers and store results in two registers with wider pack type. +Scheme: +@code + {A B C D} // 32-bit +x {E F G H} // 32-bit +--------------- +{AE BF} // 64-bit + {CG DH} // 64-bit +@endcode +Example: +@code{.cpp} +v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2} +v_uint64x2 c, d; // results +v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8} +@endcode +Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4). +*/ +template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg::w_type, n/2>& c, + v_reg::w_type, n/2>& d) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i]*b.s[i]; + d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; + } +} + +/** @brief Multiply and extract high part + +Multiply values two registers and store high part of the results. +Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$ a*b >> 16 \f$ +*/ +template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg<_Tp, n> c; + for (int i = 0; i < n; i++) + c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); + return c; +} + +//! @cond IGNORED +template inline void v_hsum(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& c) +{ + typedef typename V_TypeTraits<_Tp>::w_type w_type; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; + } +} +//! @endcond + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ +template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = (_Tp)(a.s[i] shift_op imm); \ + return c; \ +} + +/** @brief Bitwise shift left + +For 16-, 32- and 64-bit integer values. */ +OPENCV_HAL_IMPL_SHIFT_OP(<< ) + +/** @brief Bitwise shift right + +For 16-, 32- and 64-bit integer values. */ +OPENCV_HAL_IMPL_SHIFT_OP(>> ) + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> b; \ + for (int i = 0; i < n; i++) \ + { \ + int sIndex = i opA imm; \ + if (0 <= sIndex && sIndex < n) \ + { \ + b.s[i] = a.s[sIndex]; \ + } \ + else \ + { \ + b.s[i] = 0; \ + } \ + } \ + return b; \ +} \ +template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tp, n> c; \ + for (int i = 0; i < n; i++) \ + { \ + int aIndex = i opA imm; \ + int bIndex = i opA imm opB n; \ + if (0 <= bIndex && bIndex < n) \ + { \ + c.s[i] = b.s[bIndex]; \ + } \ + else if (0 <= aIndex && aIndex < n) \ + { \ + c.s[i] = a.s[aIndex]; \ + } \ + else \ + { \ + c.s[i] = 0; \ + } \ + } \ + return c; \ +} + +/** @brief Element shift left among vector + +For all type */ +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) + +/** @brief Element shift right among vector + +For all type */ +OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) + +/** @brief Sum packed values + +Scheme: +@code +{A1 A2 A3 ...} => sum{A1,A2,A3,...} +@endcode +*/ +template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) +{ + typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; + for( int i = 1; i < n; i++ ) + c += a.s[i]; + return c; +} + +/** @brief Sums all elements of each input vector, returns the vector of sums + + Scheme: + @code + result[0] = a[0] + a[1] + a[2] + a[3] + result[1] = b[0] + b[1] + b[2] + b[3] + result[2] = c[0] + c[1] + c[2] + c[3] + result[3] = d[0] + d[1] + d[2] + d[3] + @endcode +*/ +template inline v_reg v_reduce_sum4(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) +{ + v_reg r; + for(int i = 0; i < (n/4); i++) + { + r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3]; + r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3]; + r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3]; + r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3]; + } + return r; +} + +/** @brief Sum absolute differences of values + +Scheme: +@code +{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...} +@endcode +For all types except 64-bit types.*/ +template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); + for (int i = 1; i < n; i++) + c += _absdiff(a.s[i], b.s[i]); + return c; +} + +/** @brief Get negative values mask +@deprecated v_signmask depends on a lane count heavily and therefore isn't universal enough + +Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. +Example: +@code{.cpp} +v_int32x4 r; // set to {-1, -1, 1, 1} +int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011 +@endcode +*/ +template inline int v_signmask(const v_reg<_Tp, n>& a) +{ + int mask = 0; + for( int i = 0; i < n; i++ ) + mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; + return mask; +} + +/** @brief Get first negative lane index + +Returned value is an index of first negative lane (undefined for input of all positive values) +Example: +@code{.cpp} +v_int32x4 r; // set to {0, 0, -1, -1} +int idx = v_heading_zeros(r); // idx = 2 +@endcode +*/ +template inline int v_scan_forward(const v_reg<_Tp, n>& a) +{ + for (int i = 0; i < n; i++) + if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) + return i; + return 0; +} + +/** @brief Check if all packed values are less than zero + +Unsigned values will be casted to signed: `uchar 254 => char -2`. +*/ +template inline bool v_check_all(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) + return false; + return true; +} + +/** @brief Check if any of packed values is less than zero + +Unsigned values will be casted to signed: `uchar 254 => char -2`. +*/ +template inline bool v_check_any(const v_reg<_Tp, n>& a) +{ + for( int i = 0; i < n; i++ ) + if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) + return true; + return false; +} + +/** @brief Per-element select (blend operation) + +Return value will be built by combining values _a_ and _b_ using the following scheme: + result[i] = mask[i] ? a[i] : b[i]; + +@note: _mask_ element values are restricted to these values: +- 0: select element from _b_ +- 0xff/0xffff/etc: select element from _a_ +(fully compatible with bitwise-based operator) +*/ +template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, + const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + typedef V_TypeTraits<_Tp> Traits; + typedef typename Traits::int_type int_type; + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + { + int_type m = Traits::reinterpret_int(mask.s[i]); + CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc + c.s[i] = m ? a.s[i] : b.s[i]; + } + return c; +} + +/** @brief Expand values to the wider pack type + +Copy contents of register to two registers with 2x wider pack type. +Scheme: +@code + int32x4 int64x2 int64x2 +{A B C D} ==> {A B} , {C D} +@endcode */ +template inline void v_expand(const v_reg<_Tp, n>& a, + v_reg::w_type, n/2>& b0, + v_reg::w_type, n/2>& b1) +{ + for( int i = 0; i < (n/2); i++ ) + { + b0.s[i] = a.s[i]; + b1.s[i] = a.s[i+(n/2)]; + } +} + +/** @brief Expand lower values to the wider pack type + +Same as cv::v_expand, but return lower half of the vector. + +Scheme: +@code + int32x4 int64x2 +{A B C D} ==> {A B} +@endcode */ +template +inline v_reg::w_type, n/2> +v_expand_low(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i]; + return b; +} + +/** @brief Expand higher values to the wider pack type + +Same as cv::v_expand_low, but expand higher half of the vector instead. + +Scheme: +@code + int32x4 int64x2 +{A B C D} ==> {C D} +@endcode */ +template +inline v_reg::w_type, n/2> +v_expand_high(const v_reg<_Tp, n>& a) +{ + v_reg::w_type, n/2> b; + for( int i = 0; i < (n/2); i++ ) + b.s[i] = a.s[i+(n/2)]; + return b; +} + +//! @cond IGNORED +template inline v_reg::int_type, n> + v_reinterpret_as_int(const v_reg<_Tp, n>& a) +{ + v_reg::int_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); + return c; +} + +template inline v_reg::uint_type, n> + v_reinterpret_as_uint(const v_reg<_Tp, n>& a) +{ + v_reg::uint_type, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); + return c; +} +//! @endcond + +/** @brief Interleave two vectors + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +--------------- + {A1 B1 A2 B2} and {A3 B3 A4 B4} +@endcode +For all types except 64-bit. +*/ +template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, + v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) +{ + int i; + for( i = 0; i < n/2; i++ ) + { + b0.s[i*2] = a0.s[i]; + b0.s[i*2+1] = a1.s[i]; + } + for( ; i < n; i++ ) + { + b1.s[i*2-n] = a0.s[i]; + b1.s[i*2-n+1] = a1.s[i]; + } +} + +/** @brief Load register contents from memory + +@param ptr pointer to memory block with data +@return register object + +@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc. + +@note Use vx_load version to get maximum available register length result + +@note Alignment requirement: +if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). +Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). + */ +template +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr); +} + +#if CV_SIMD256 +/** @brief Load 256-bit length register contents from memory + +@param ptr pointer to memory block with data +@return register object + +@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x32, int ==> cv::v_int32x8, etc. + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load version to get maximum available register length result + +@note Alignment requirement: +if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). +Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). + */ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr); +} +#endif + +#if CV_SIMD512 +/** @brief Load 512-bit length register contents from memory + +@param ptr pointer to memory block with data +@return register object + +@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x64, int ==> cv::v_int32x16, etc. + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load version to get maximum available register length result + +@note Alignment requirement: +if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). +Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). + */ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr); +} +#endif + +/** @brief Load register contents from memory (aligned) + +similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc) + +@note Use vx_load_aligned version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_aligned(const _Tp* ptr) +{ + CV_Assert(isAligned)>(ptr)); + return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr); +} + +#if CV_SIMD256 +/** @brief Load register contents from memory (aligned) + +similar to cv::v256_load, but source memory block should be aligned (to 32-byte boundary in case of SIMD256, 64-byte - SIMD512, etc) + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_aligned version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_aligned(const _Tp* ptr) +{ + CV_Assert(isAligned)>(ptr)); + return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr); +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from memory (aligned) + +similar to cv::v512_load, but source memory block should be aligned (to 64-byte boundary in case of SIMD512, etc) + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_aligned version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_aligned(const _Tp* ptr) +{ + CV_Assert(isAligned)>(ptr)); + return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr); +} +#endif + +/** @brief Load 64-bits of data to lower part (high part is undefined). + +@param ptr memory block containing data for first half (0..n/2) + +@code{.cpp} +int lo[2] = { 1, 2 }; +v_int32x4 r = v_load_low(lo); +@endcode + +@note Use vx_load_low version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_low(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +#if CV_SIMD256 +/** @brief Load 128-bits of data to lower part (high part is undefined). + +@param ptr memory block containing data for first half (0..n/2) + +@code{.cpp} +int lo[4] = { 1, 2, 3, 4 }; +v_int32x8 r = v256_load_low(lo); +@endcode + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_low version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_low(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_reg<_Tp, simd256_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load 256-bits of data to lower part (high part is undefined). + +@param ptr memory block containing data for first half (0..n/2) + +@code{.cpp} +int lo[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; +v_int32x16 r = v512_load_low(lo); +@endcode + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_low version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_low(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_reg<_Tp, simd512_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +/** @brief Load register contents from two memory blocks + +@param loptr memory block containing data for first half (0..n/2) +@param hiptr memory block containing data for second half (n/2..n) + +@code{.cpp} +int lo[2] = { 1, 2 }, hi[2] = { 3, 4 }; +v_int32x4 r = v_load_halves(lo, hi); +@endcode + +@note Use vx_load_halves version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(loptr)); + CV_Assert(isAligned(hiptr)); +#endif + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for( int i = 0; i < c.nlanes/2; i++ ) + { + c.s[i] = loptr[i]; + c.s[i+c.nlanes/2] = hiptr[i]; + } + return c; +} + +#if CV_SIMD256 +/** @brief Load register contents from two memory blocks + +@param loptr memory block containing data for first half (0..n/2) +@param hiptr memory block containing data for second half (n/2..n) + +@code{.cpp} +int lo[4] = { 1, 2, 3, 4 }, hi[4] = { 5, 6, 7, 8 }; +v_int32x8 r = v256_load_halves(lo, hi); +@endcode + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_halves version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(loptr)); + CV_Assert(isAligned(hiptr)); +#endif + v_reg<_Tp, simd256_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = loptr[i]; + c.s[i + c.nlanes / 2] = hiptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from two memory blocks + +@param loptr memory block containing data for first half (0..n/2) +@param hiptr memory block containing data for second half (n/2..n) + +@code{.cpp} +int lo[4] = { 1, 2, 3, 4, 5, 6, 7, 8 }, hi[4] = { 9, 10, 11, 12, 13, 14, 15, 16 }; +v_int32x16 r = v512_load_halves(lo, hi); +@endcode + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_halves version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(loptr)); + CV_Assert(isAligned(hiptr)); +#endif + v_reg<_Tp, simd512_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = loptr[i]; + c.s[i + c.nlanes / 2] = hiptr[i]; + } + return c; +} +#endif + +/** @brief Load register contents from memory with double expand + +Same as cv::v_load, but result pack type will be 2x wider than memory type. + +@code{.cpp} +short buf[4] = {1, 2, 3, 4}; // type is int16 +v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32 +@endcode +For 8-, 16-, 32-bit integer source types. + +@note Use vx_load_expand version to get maximum available register length result +*/ +template +inline v_reg::w_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::w_type)> +v_load_expand(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +#if CV_SIMD256 +/** @brief Load register contents from memory with double expand + +Same as cv::v256_load, but result pack type will be 2x wider than memory type. + +@code{.cpp} +short buf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; // type is int16 +v_int32x8 r = v256_load_expand(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8} - type is int32 +@endcode +For 8-, 16-, 32-bit integer source types. + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_expand version to get maximum available register length result +*/ +template +inline v_reg::w_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::w_type)> +v256_load_expand(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from memory with double expand + +Same as cv::v512_load, but result pack type will be 2x wider than memory type. + +@code{.cpp} +short buf[8] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; // type is int16 +v_int32x16 r = v512_load_expand(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} - type is int32 +@endcode +For 8-, 16-, 32-bit integer source types. + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_expand version to get maximum available register length result +*/ +template +inline v_reg::w_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::w_type)> +v512_load_expand(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +/** @brief Load register contents from memory with quad expand + +Same as cv::v_load_expand, but result type is 4 times wider than source. +@code{.cpp} +char buf[4] = {1, 2, 3, 4}; // type is int8 +v_int32x4 r = v_load_expand_q(buf); // r = {1, 2, 3, 4} - type is int32 +@endcode +For 8-bit integer source types. + +@note Use vx_load_expand_q version to get maximum available register length result +*/ +template +inline v_reg::q_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::q_type)> +v_load_expand_q(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg c; + for( int i = 0; i < c.nlanes; i++ ) + { + c.s[i] = ptr[i]; + } + return c; +} + +#if CV_SIMD256 +/** @brief Load register contents from memory with quad expand + +Same as cv::v256_load_expand, but result type is 4 times wider than source. +@code{.cpp} +char buf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; // type is int8 +v_int32x8 r = v256_load_expand_q(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8} - type is int32 +@endcode +For 8-bit integer source types. + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_expand_q version to get maximum available register length result +*/ +template +inline v_reg::q_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::q_type)> +v256_load_expand_q(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from memory with quad expand + +Same as cv::v512_load_expand, but result type is 4 times wider than source. +@code{.cpp} +char buf[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; // type is int8 +v_int32x16 r = v512_load_expand_q(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} - type is int32 +@endcode +For 8-bit integer source types. + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_expand_q version to get maximum available register length result +*/ +template +inline v_reg::q_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::q_type)> +v512_load_expand_q(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +/** @brief Load and deinterleave (2 channels) + +Load data from memory deinterleave and store to 2 registers. +Scheme: +@code +{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...} +@endcode +For all types except 64-bit. */ +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + a.s[i] = ptr[i2]; + b.s[i] = ptr[i2+1]; + } +} + +/** @brief Load and deinterleave (3 channels) + +Load data from memory deinterleave and store to 3 registers. +Scheme: +@code +{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} +@endcode +For all types except 64-bit. */ +template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + a.s[i] = ptr[i3]; + b.s[i] = ptr[i3+1]; + c.s[i] = ptr[i3+2]; + } +} + +/** @brief Load and deinterleave (4 channels) + +Load data from memory deinterleave and store to 4 registers. +Scheme: +@code +{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} +@endcode +For all types except 64-bit. */ +template +inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, + v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, + v_reg<_Tp, n>& d) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + a.s[i] = ptr[i4]; + b.s[i] = ptr[i4+1]; + c.s[i] = ptr[i4+2]; + d.s[i] = ptr[i4+3]; + } +} + +/** @brief Interleave and store (2 channels) + +Interleave and store data from 2 registers to memory. +Scheme: +@code +{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...} +@endcode +For all types except 64-bit. */ +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i2; + for( i = i2 = 0; i < n; i++, i2 += 2 ) + { + ptr[i2] = a.s[i]; + ptr[i2+1] = b.s[i]; + } +} + +/** @brief Interleave and store (3 channels) + +Interleave and store data from 3 registers to memory. +Scheme: +@code +{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...} +@endcode +For all types except 64-bit. */ +template +inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i3; + for( i = i3 = 0; i < n; i++, i3 += 3 ) + { + ptr[i3] = a.s[i]; + ptr[i3+1] = b.s[i]; + ptr[i3+2] = c.s[i]; + } +} + +/** @brief Interleave and store (4 channels) + +Interleave and store data from 4 registers to memory. +Scheme: +@code +{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...} +@endcode +For all types except 64-bit. */ +template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, + const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, + const v_reg<_Tp, n>& d, + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + int i, i4; + for( i = i4 = 0; i < n; i++, i4 += 4 ) + { + ptr[i4] = a.s[i]; + ptr[i4+1] = b.s[i]; + ptr[i4+2] = c.s[i]; + ptr[i4+3] = d.s[i]; + } +} + +/** @brief Store data to memory + +Store register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {A B C D} +@endcode +Pointer can be unaligned. */ +template +inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + for( int i = 0; i < n; i++ ) + ptr[i] = a.s[i]; +} + +template +inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_store(ptr, a); +} + +/** @brief Store data to memory (lower half) + +Store lower half of register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {A B} +@endcode */ +template +inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i]; +} + +/** @brief Store data to memory (higher half) + +Store higher half of register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {C D} +@endcode */ +template +inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + for( int i = 0; i < (n/2); i++ ) + ptr[i] = a.s[i+(n/2)]; +} + +/** @brief Store data to memory (aligned) + +Store register contents to memory. +Scheme: +@code + REG {A B C D} ==> MEM {A B C D} +@endcode +Pointer __should__ be aligned by 16-byte boundary. */ +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +template +inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +template +inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) +{ + CV_Assert(isAligned)>(ptr)); + v_store(ptr, a); +} + +/** @brief Combine vector from first elements of two vectors + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +--------------- + {A1 A2 B1 B2} +@endcode +For all types except 64-bit. */ +template +inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i]; + c.s[i+(n/2)] = b.s[i]; + } + return c; +} + +/** @brief Combine vector from last elements of two vectors + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +--------------- + {A3 A4 B3 B4} +@endcode +For all types except 64-bit. */ +template +inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < (n/2); i++ ) + { + c.s[i] = a.s[i+(n/2)]; + c.s[i+(n/2)] = b.s[i+(n/2)]; + } + return c; +} + +/** @brief Combine two vectors from lower and higher parts of two other vectors + +@code{.cpp} +low = cv::v_combine_low(a, b); +high = cv::v_combine_high(a, b); +@endcode */ +template +inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, + v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) +{ + for( int i = 0; i < (n/2); i++ ) + { + low.s[i] = a.s[i]; + low.s[i+(n/2)] = b.s[i]; + high.s[i] = a.s[i+(n/2)]; + high.s[i+(n/2)] = b.s[i+(n/2)]; + } +} + +/** @brief Vector reverse order + +Reverse the order of the vector +Scheme: +@code + REG {A1 ... An} ==> REG {An ... A1} +@endcode +For all types. */ +template +inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[n-i-1]; + return c; +} + +/** @brief Vector extract + +Scheme: +@code + {A1 A2 A3 A4} + {B1 B2 B3 B4} +======================== +shift = 1 {A2 A3 A4 B1} +shift = 2 {A3 A4 B1 B2} +shift = 3 {A4 B1 B2 B3} +@endcode +Restriction: 0 <= shift < nlanes + +Usage: +@code +v_int32x4 a, b, c; +c = v_extract<2>(a, b); +@endcode +For all types. */ +template +inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + v_reg<_Tp, n> r; + const int shift = n - s; + int i = 0; + for (; i < shift; ++i) + r.s[i] = a.s[i+s]; + for (; i < n; ++i) + r.s[i] = b.s[i-shift]; + return r; +} + +/** @brief Vector extract + +Scheme: +Return the s-th element of v. +Restriction: 0 <= s < nlanes + +Usage: +@code +v_int32x4 a; +int r; +r = v_extract_n<2>(a); +@endcode +For all types. */ +template +inline _Tp v_extract_n(const v_reg<_Tp, n>& v) +{ + CV_DbgAssert(s >= 0 && s < n); + return v.s[s]; +} + +/** @brief Broadcast i-th element of vector + +Scheme: +@code +{ v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] } +@endcode +Restriction: 0 <= i < nlanes +Supported types: 32-bit integers and floats (s32/u32/f32) + */ +template +inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a) +{ + CV_DbgAssert(i >= 0 && i < n); + return v_reg<_Tp, n>::all(a.s[i]); +} + +/** @brief Round elements + +Rounds each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvRound(a.s[i]); + return c; +} + +/** @overload */ +template inline v_reg v_round(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = cvRound(b.s[i]); + } + return c; +} + +/** @brief Floor elements + +Floor each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvFloor(a.s[i]); + return c; +} + +/** @brief Ceil elements + +Ceil each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = cvCeil(a.s[i]); + return c; +} + +/** @brief Truncate elements + +Truncate each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (int)(a.s[i]); + return c; +} + +/** @overload */ +template inline v_reg v_round(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvRound(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @overload */ +template inline v_reg v_floor(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvFloor(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @overload */ +template inline v_reg v_ceil(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = cvCeil(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @overload */ +template inline v_reg v_trunc(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (int)(a.s[i]); + c.s[i+n] = 0; + } + return c; +} + +/** @brief Convert to float + +Supported input type is cv::v_int32. */ +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (float)a.s[i]; + return c; +} + +/** @brief Convert lower half to float + +Supported input type is cv::v_float64. */ +template inline v_reg v_cvt_f32(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = 0; + } + return c; +} + +/** @brief Convert to float + +Supported input type is cv::v_float64. */ +template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + { + c.s[i] = (float)a.s[i]; + c.s[i+n] = (float)b.s[i]; + } + return c; +} + +/** @brief Convert lower half to double + +Supported input type is cv::v_int32. */ +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +/** @brief Convert to double high part of vector + +Supported input type is cv::v_int32. */ +template CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i + (n/2)]; + return c; +} + +/** @brief Convert lower half to double + +Supported input type is cv::v_float32. */ +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + +/** @brief Convert to double high part of vector + +Supported input type is cv::v_float32. */ +template CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i + (n/2)]; + return c; +} + +/** @brief Convert to double + +Supported input type is cv::v_int64. */ +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = (double)a.s[i]; + return c; +} + + +template inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes; i++) + c.s[i] = tab[idx[i]]; + return c; +} +template inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_pairs(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes; i++) + c.s[i] = tab[idx[i / 2] + i % 2]; + return c; +} +template inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_quads(const _Tp* tab, const int* idx) +{ + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes; i++) + c.s[i] = tab[idx[i / 4] + i % 4]; + return c; +} + +template inline v_reg v_lut(const int* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) +{ + v_reg c; + for (int i = 0; i < n; i++) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const float* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + +template inline v_reg v_lut(const double* tab, const v_reg& idx) +{ + v_reg c; + for( int i = 0; i < n/2; i++ ) + c.s[i] = tab[idx.s[i]]; + return c; +} + + +template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, + v_reg& x, v_reg& y) +{ + for( int i = 0; i < n; i++ ) + { + int j = idx.s[i]; + x.s[i] = tab[j]; + y.s[i] = tab[j+1]; + } +} + +template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[4*i ] = vec.s[4*i ]; + c.s[4*i+1] = vec.s[4*i+2]; + c.s[4*i+2] = vec.s[4*i+1]; + c.s[4*i+3] = vec.s[4*i+3]; + } + return c; +} + +template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/8; i++) + { + c.s[8*i ] = vec.s[8*i ]; + c.s[8*i+1] = vec.s[8*i+4]; + c.s[8*i+2] = vec.s[8*i+1]; + c.s[8*i+3] = vec.s[8*i+5]; + c.s[8*i+4] = vec.s[8*i+2]; + c.s[8*i+5] = vec.s[8*i+6]; + c.s[8*i+6] = vec.s[8*i+3]; + c.s[8*i+7] = vec.s[8*i+7]; + } + return c; +} + +template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) +{ + v_reg<_Tp, n> c; + for (int i = 0; i < n/4; i++) + { + c.s[3*i ] = vec.s[4*i ]; + c.s[3*i+1] = vec.s[4*i+1]; + c.s[3*i+2] = vec.s[4*i+2]; + } + return c; +} + +/** @brief Transpose 4x4 matrix + +Scheme: +@code +a0 {A1 A2 A3 A4} +a1 {B1 B2 B3 B4} +a2 {C1 C2 C3 C4} +a3 {D1 D2 D3 D4} +=============== +b0 {A1 B1 C1 D1} +b1 {A2 B2 C2 D2} +b2 {A3 B3 C3 D3} +b3 {A4 B4 C4 D4} +@endcode +*/ +template +inline void v_transpose4x4( v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, + const v_reg<_Tp, n>& a2, const v_reg<_Tp, n>& a3, + v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1, + v_reg<_Tp, n>& b2, v_reg<_Tp, n>& b3 ) +{ + for (int i = 0; i < n / 4; i++) + { + b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4]; + b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4]; + b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4]; + b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4]; + b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4]; + b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4]; + b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4]; + b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4]; + } +} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \ +inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); } + +//! @name Init with zero +//! @{ +//! @brief Create new vector with zero elements +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, v, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, v, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, v, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, v, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, v, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, v, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, v, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, v, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, v, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, v, s64) + +#if CV_SIMD256 +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x32, v256, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x32, v256, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x16, v256, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x16, v256, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x8, v256, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x8, v256, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x8, v256, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x4, v256, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x4, v256, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x4, v256, s64) +#endif + +#if CV_SIMD512 +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x64, v512, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x64, v512, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x32, v512, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x32, v512, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x16, v512, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x16, v512, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x16, v512, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x8, v512, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x8, v512, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64) +#endif +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \ +inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); } + +//! @name Init with value +//! @{ +//! @brief Create new vector with elements set to a specific value +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, v, u8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, v, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, v, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, v, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, v, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, v, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, v, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, v, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, v, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, v, s64) + +#if CV_SIMD256 +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x32, uchar, v256, u8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x32, schar, v256, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x16, ushort, v256, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x16, short, v256, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x8, unsigned, v256, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x8, int, v256, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x8, float, v256, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x4, double, v256, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x4, uint64, v256, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x4, int64, v256, s64) +#endif + +#if CV_SIMD512 +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x64, uchar, v512, u8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x64, schar, v512, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x32, ushort, v512, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x32, short, v512, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x16, unsigned, v512, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x16, int, v512, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x16, float, v512, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x8, double, v512, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x8, uint64, v512, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x8, int64, v512, s64) +#endif +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \ +template inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \ + v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ +{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); } + +//! @name Reinterpret +//! @{ +//! @brief Convert vector to different type without modifying underlying data. +OPENCV_HAL_IMPL_C_REINTERPRET(uchar, u8) +OPENCV_HAL_IMPL_C_REINTERPRET(schar, s8) +OPENCV_HAL_IMPL_C_REINTERPRET(ushort, u16) +OPENCV_HAL_IMPL_C_REINTERPRET(short, s16) +OPENCV_HAL_IMPL_C_REINTERPRET(unsigned, u32) +OPENCV_HAL_IMPL_C_REINTERPRET(int, s32) +OPENCV_HAL_IMPL_C_REINTERPRET(float, f32) +OPENCV_HAL_IMPL_C_REINTERPRET(double, f64) +OPENCV_HAL_IMPL_C_REINTERPRET(uint64, u64) +OPENCV_HAL_IMPL_C_REINTERPRET(int64, s64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \ +template inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \ +{ return a << shift; } + +//! @name Left shift +//! @{ +//! @brief Shift left +OPENCV_HAL_IMPL_C_SHIFTL(ushort) +OPENCV_HAL_IMPL_C_SHIFTL(short) +OPENCV_HAL_IMPL_C_SHIFTL(unsigned) +OPENCV_HAL_IMPL_C_SHIFTL(int) +OPENCV_HAL_IMPL_C_SHIFTL(uint64) +OPENCV_HAL_IMPL_C_SHIFTL(int64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \ +template inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \ +{ return a >> shift; } + +//! @name Right shift +//! @{ +//! @brief Shift right +OPENCV_HAL_IMPL_C_SHIFTR(ushort) +OPENCV_HAL_IMPL_C_SHIFTR(short) +OPENCV_HAL_IMPL_C_SHIFTR(unsigned) +OPENCV_HAL_IMPL_C_SHIFTR(int) +OPENCV_HAL_IMPL_C_SHIFTR(uint64) +OPENCV_HAL_IMPL_C_SHIFTR(int64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \ +template inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \ +{ \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ + return c; \ +} + +//! @name Rounding shift +//! @{ +//! @brief Rounding shift right +OPENCV_HAL_IMPL_C_RSHIFTR(ushort) +OPENCV_HAL_IMPL_C_RSHIFTR(short) +OPENCV_HAL_IMPL_C_RSHIFTR(unsigned) +OPENCV_HAL_IMPL_C_RSHIFTR(int) +OPENCV_HAL_IMPL_C_RSHIFTR(uint64) +OPENCV_HAL_IMPL_C_RSHIFTR(int64) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \ +template inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tpn, 2*n> c; \ + for( int i = 0; i < n; i++ ) \ + { \ + c.s[i] = cast<_Tpn>(a.s[i]); \ + c.s[i+n] = cast<_Tpn>(b.s[i]); \ + } \ + return c; \ +} + +//! @name Pack +//! @{ +//! @brief Pack values from two vectors to one +//! +//! Return vector type have twice more elements than input vector types. Variant with _u_ suffix also +//! converts to corresponding unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_PACK(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(int, ushort, pack_u, saturate_cast) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \ +template inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +{ \ + v_reg<_Tpn, 2*n> c; \ + for( int i = 0; i < n; i++ ) \ + { \ + c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ + c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ + } \ + return c; \ +} + +//! @name Pack with rounding shift +//! @{ +//! @brief Pack values from two vectors to one with rounding shift +//! +//! Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower +//! type and returned in the result vector. Variant with _u_ suffix converts to unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_RSHR_PACK(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int, ushort, pack_u, saturate_cast) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \ +template inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \ +{ \ + for( int i = 0; i < n; i++ ) \ + ptr[i] = cast<_Tpn>(a.s[i]); \ +} + +//! @name Pack and store +//! @{ +//! @brief Store values from the input vector into memory with pack +//! +//! Values will be stored into memory with conversion to narrower type. +//! Variant with _u_ suffix converts to corresponding unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_PACK_STORE(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int, ushort, pack_u, saturate_cast) +//! @} + +//! @brief Helper macro +//! @ingroup core_hal_intrin_impl +#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \ +template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \ +{ \ + for( int i = 0; i < n; i++ ) \ + ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ +} + +//! @name Pack and store with rounding shift +//! @{ +//! @brief Store values from the input vector into memory with pack +//! +//! Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into +//! memory. Variant with _u_ suffix converts to unsigned type. +//! +//! - pack: for 16-, 32- and 64-bit integer input types +//! - pack_u: for 16- and 32-bit signed integer input types +//! +//! @note All variants except 64-bit use saturation. +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, ushort, pack_u, saturate_cast) +//! @} + +//! @cond IGNORED +template +inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +{ + for (int i = 0; i < n; ++i) + { + mptr[i] = (_Tpm)a.s[i]; + mptr[i + n] = (_Tpm)b.s[i]; + } +} +//! @endcond + +//! @name Pack boolean values +//! @{ +//! @brief Pack boolean values from multiple vectors to one unsigned 8-bit integer vector +//! +//! @note Must provide valid boolean values to guarantee same result for all architectures. + +/** @brief +//! For 16-bit boolean values + +Scheme: +@code +a {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0} +b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF} +=============== +{ + 0xFF 0 0 0xFF 0 0xFF 0xFF 0 + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ + +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b) +{ + v_reg mask; + _pack_b(mask.s, a, b); + return mask; +} + +/** @overload +For 32-bit boolean values + +Scheme: +@code +a {0xFFFF.. 0 0 0xFFFF..} +b {0 0xFFFF.. 0xFFFF.. 0} +c {0xFFFF.. 0 0xFFFF.. 0} +d {0 0xFFFF.. 0 0xFFFF..} +=============== +{ + 0xFF 0 0 0xFF 0 0xFF 0xFF 0 + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ + +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) +{ + v_reg mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 2*n, c, d); + return mask; +} + +/** @overload +For 64-bit boolean values + +Scheme: +@code +a {0xFFFF.. 0} +b {0 0xFFFF..} +c {0xFFFF.. 0} +d {0 0xFFFF..} + +e {0xFFFF.. 0} +f {0xFFFF.. 0} +g {0 0xFFFF..} +h {0 0xFFFF..} +=============== +{ + 0xFF 0 0 0xFF 0xFF 0 0 0xFF + 0xFF 0 0xFF 0 0 0xFF 0 0xFF +} +@endcode */ +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d, + const v_reg& e, const v_reg& f, + const v_reg& g, const v_reg& h) +{ + v_reg mask; + _pack_b(mask.s, a, b); + _pack_b(mask.s + 2*n, c, d); + _pack_b(mask.s + 4*n, e, f); + _pack_b(mask.s + 6*n, g, h); + return mask; +} +//! @} + +/** @brief Matrix multiplication + +Scheme: +@code +{A0 A1 A2 A3} |V0| +{B0 B1 B2 B3} |V1| +{C0 C1 C2 C3} |V2| +{D0 D1 D2 D3} x |V3| +==================== +{R0 R1 R2 R3}, where: +R0 = A0V0 + B0V1 + C0V2 + D0V3, +R1 = A1V0 + B1V1 + C1V2 + D1V3 +... +@endcode +*/ +template +inline v_reg v_matmul(const v_reg& v, + const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) +{ + v_reg res; + for (int i = 0; i < n / 4; i++) + { + res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4]; + res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4]; + res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4]; + res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4]; + } + return res; +} + +/** @brief Matrix multiplication and add + +Scheme: +@code +{A0 A1 A2 A3} |V0| |D0| +{B0 B1 B2 B3} |V1| |D1| +{C0 C1 C2 C3} x |V2| + |D2| +==================== |D3| +{R0 R1 R2 R3}, where: +R0 = A0V0 + B0V1 + C0V2 + D0, +R1 = A1V0 + B1V1 + C1V2 + D1 +... +@endcode +*/ +template +inline v_reg v_matmuladd(const v_reg& v, + const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) +{ + v_reg res; + for (int i = 0; i < n / 4; i++) + { + res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4]; + res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4]; + res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4]; + res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4]; + } + return res; +} + + +template inline v_reg v_dotprod_expand(const v_reg& a, const v_reg& b) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } +template inline v_reg v_dotprod_expand(const v_reg& a, const v_reg& b, + const v_reg& c) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } + +template inline v_reg v_dotprod_expand_fast(const v_reg& a, const v_reg& b) +{ return v_dotprod_expand(a, b); } +template inline v_reg v_dotprod_expand_fast(const v_reg& a, const v_reg& b, + const v_reg& c) +{ return v_dotprod_expand(a, b, c); } + +////// FP16 support /////// + +inline v_reg +v_load_expand(const float16_t* ptr) +{ + v_reg v; + for( int i = 0; i < v.nlanes; i++ ) + { + v.s[i] = ptr[i]; + } + return v; +} +#if CV_SIMD256 +inline v_reg +v256_load_expand(const float16_t* ptr) +{ + v_reg v; + for (int i = 0; i < v.nlanes; i++) + { + v.s[i] = ptr[i]; + } + return v; +} +#endif +#if CV_SIMD512 +inline v_reg +v512_load_expand(const float16_t* ptr) +{ + v_reg v; + for (int i = 0; i < v.nlanes; i++) + { + v.s[i] = ptr[i]; + } + return v; +} +#endif + +template inline void +v_pack_store(float16_t* ptr, const v_reg& v) +{ + for( int i = 0; i < v.nlanes; i++ ) + { + ptr[i] = float16_t(v.s[i]); + } +} + +inline void v_cleanup() {} +#if CV_SIMD256 +inline void v256_cleanup() {} +#endif +#if CV_SIMD512 +inline void v512_cleanup() {} +#endif + +//! @} + +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif +} + +#if !defined(CV_DOXYGEN) +#undef CV_SIMD256 +#undef CV_SIMD512 +#endif + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_forward.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_forward.hpp new file mode 100644 index 0000000..979f15a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_forward.hpp @@ -0,0 +1,191 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef CV__SIMD_FORWARD +#error "Need to pre-define forward width" +#endif + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +/** Types **/ +#if CV__SIMD_FORWARD == 1024 +// [todo] 1024 +#error "1024-long ops not implemented yet" +#elif CV__SIMD_FORWARD == 512 +// 512 +#define __CV_VX(fun) v512_##fun +#define __CV_V_UINT8 v_uint8x64 +#define __CV_V_INT8 v_int8x64 +#define __CV_V_UINT16 v_uint16x32 +#define __CV_V_INT16 v_int16x32 +#define __CV_V_UINT32 v_uint32x16 +#define __CV_V_INT32 v_int32x16 +#define __CV_V_UINT64 v_uint64x8 +#define __CV_V_INT64 v_int64x8 +#define __CV_V_FLOAT32 v_float32x16 +#define __CV_V_FLOAT64 v_float64x8 +struct v_uint8x64; +struct v_int8x64; +struct v_uint16x32; +struct v_int16x32; +struct v_uint32x16; +struct v_int32x16; +struct v_uint64x8; +struct v_int64x8; +struct v_float32x16; +struct v_float64x8; +#elif CV__SIMD_FORWARD == 256 +// 256 +#define __CV_VX(fun) v256_##fun +#define __CV_V_UINT8 v_uint8x32 +#define __CV_V_INT8 v_int8x32 +#define __CV_V_UINT16 v_uint16x16 +#define __CV_V_INT16 v_int16x16 +#define __CV_V_UINT32 v_uint32x8 +#define __CV_V_INT32 v_int32x8 +#define __CV_V_UINT64 v_uint64x4 +#define __CV_V_INT64 v_int64x4 +#define __CV_V_FLOAT32 v_float32x8 +#define __CV_V_FLOAT64 v_float64x4 +struct v_uint8x32; +struct v_int8x32; +struct v_uint16x16; +struct v_int16x16; +struct v_uint32x8; +struct v_int32x8; +struct v_uint64x4; +struct v_int64x4; +struct v_float32x8; +struct v_float64x4; +#else +// 128 +#define __CV_VX(fun) v_##fun +#define __CV_V_UINT8 v_uint8x16 +#define __CV_V_INT8 v_int8x16 +#define __CV_V_UINT16 v_uint16x8 +#define __CV_V_INT16 v_int16x8 +#define __CV_V_UINT32 v_uint32x4 +#define __CV_V_INT32 v_int32x4 +#define __CV_V_UINT64 v_uint64x2 +#define __CV_V_INT64 v_int64x2 +#define __CV_V_FLOAT32 v_float32x4 +#define __CV_V_FLOAT64 v_float64x2 +struct v_uint8x16; +struct v_int8x16; +struct v_uint16x8; +struct v_int16x8; +struct v_uint32x4; +struct v_int32x4; +struct v_uint64x2; +struct v_int64x2; +struct v_float32x4; +struct v_float64x2; +#endif + +/** Value reordering **/ + +// Expansion +void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&); +void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&); +void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&); +void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&); +void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&); +void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&); +// Low Expansion +__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&); +__CV_V_INT16 v_expand_low(const __CV_V_INT8&); +__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&); +__CV_V_INT32 v_expand_low(const __CV_V_INT16&); +__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&); +__CV_V_INT64 v_expand_low(const __CV_V_INT32&); +// High Expansion +__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&); +__CV_V_INT16 v_expand_high(const __CV_V_INT8&); +__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&); +__CV_V_INT32 v_expand_high(const __CV_V_INT16&); +__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&); +__CV_V_INT64 v_expand_high(const __CV_V_INT32&); +// Load & Low Expansion +__CV_V_UINT16 __CV_VX(load_expand)(const uchar*); +__CV_V_INT16 __CV_VX(load_expand)(const schar*); +__CV_V_UINT32 __CV_VX(load_expand)(const ushort*); +__CV_V_INT32 __CV_VX(load_expand)(const short*); +__CV_V_UINT64 __CV_VX(load_expand)(const uint*); +__CV_V_INT64 __CV_VX(load_expand)(const int*); +// Load lower 8-bit and expand into 32-bit +__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*); +__CV_V_INT32 __CV_VX(load_expand_q)(const schar*); + +// Saturating Pack +__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&); +__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&); +__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&); +__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&); +// Non-saturating Pack +__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&); +__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&); +// Pack signed integers with unsigned saturation +__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&); +__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&); + +/** Arithmetic, bitwise and comparison operations **/ + +// Non-saturating multiply +#if CV_VSX +template +Tvec v_mul_wrap(const Tvec& a, const Tvec& b); +#else +__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&); +__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&); +__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&); +__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&); +#endif + +// Multiply and expand +#if CV_VSX +template +void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d); +#else +void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&); +void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&); +void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&); +void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&); +void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&); +void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&); +#endif + +// Conversions +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a); +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a); +__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a); +__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a); +__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a); +__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a); + +/** Cleanup **/ +#undef CV__SIMD_FORWARD +#undef __CV_VX +#undef __CV_V_UINT8 +#undef __CV_V_INT8 +#undef __CV_V_UINT16 +#undef __CV_V_INT16 +#undef __CV_V_UINT32 +#undef __CV_V_INT32 +#undef __CV_V_UINT64 +#undef __CV_V_INT64 +#undef __CV_V_FLOAT32 +#undef __CV_V_FLOAT64 + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: \ No newline at end of file diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_msa.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_msa.hpp new file mode 100644 index 0000000..c035fda --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_msa.hpp @@ -0,0 +1,1887 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_HAL_INTRIN_MSA_HPP +#define OPENCV_HAL_INTRIN_MSA_HPP + +#include +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @cond IGNORED +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 + +//MSA implements 128-bit wide vector registers shared with the 64-bit wide floating-point unit registers. +//MSA and FPU can not be both present, unless the FPU has 64-bit floating-point registers. +#define CV_SIMD128_64F 1 + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(v16u8 v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = msa_ld1q_u8(v); + } + + uchar get0() const + { + return msa_getq_lane_u8(val, 0); + } + + v16u8 val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(v16i8 v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = msa_ld1q_s8(v); + } + + schar get0() const + { + return msa_getq_lane_s8(val, 0); + } + + v16i8 val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(v8u16 v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = msa_ld1q_u16(v); + } + + ushort get0() const + { + return msa_getq_lane_u16(val, 0); + } + + v8u16 val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(v8i16 v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = msa_ld1q_s16(v); + } + + short get0() const + { + return msa_getq_lane_s16(val, 0); + } + + v8i16 val; +}; + +struct v_uint32x4 +{ + typedef unsigned int lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(v4u32 v) : val(v) {} + v_uint32x4(unsigned int v0, unsigned int v1, unsigned int v2, unsigned int v3) + { + unsigned int v[] = {v0, v1, v2, v3}; + val = msa_ld1q_u32(v); + } + + unsigned int get0() const + { + return msa_getq_lane_u32(val, 0); + } + + v4u32 val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(v4i32 v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = msa_ld1q_s32(v); + } + + int get0() const + { + return msa_getq_lane_s32(val, 0); + } + + v4i32 val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(v4f32 v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = msa_ld1q_f32(v); + } + + float get0() const + { + return msa_getq_lane_f32(val, 0); + } + + v4f32 val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(v2u64 v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = msa_ld1q_u64(v); + } + + uint64 get0() const + { + return msa_getq_lane_u64(val, 0); + } + + v2u64 val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(v2i64 v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = msa_ld1q_s64(v); + } + + int64 get0() const + { + return msa_getq_lane_s64(val, 0); + } + + v2i64 val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(v2f64 v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = msa_ld1q_f64(v); + } + + double get0() const + { + return msa_getq_lane_f64(val, 0); + } + + v2f64 val; +}; + +#define OPENCV_HAL_IMPL_MSA_INIT(_Tpv, _Tp, suffix) \ +inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(msa_dupq_n_##suffix((_Tp)0)); } \ +inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(msa_dupq_n_##suffix(v)); } \ +inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(MSA_TPV_REINTERPRET(v16u8, v.val)); } \ +inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(MSA_TPV_REINTERPRET(v16i8, v.val)); } \ +inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(MSA_TPV_REINTERPRET(v8u16, v.val)); } \ +inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(MSA_TPV_REINTERPRET(v8i16, v.val)); } \ +inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(MSA_TPV_REINTERPRET(v4u32, v.val)); } \ +inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(MSA_TPV_REINTERPRET(v4i32, v.val)); } \ +inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(MSA_TPV_REINTERPRET(v2u64, v.val)); } \ +inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(MSA_TPV_REINTERPRET(v2i64, v.val)); } \ +inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(MSA_TPV_REINTERPRET(v4f32, v.val)); } \ +inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(MSA_TPV_REINTERPRET(v2f64, v.val)); } + +OPENCV_HAL_IMPL_MSA_INIT(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_INIT(int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_INIT(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_INIT(int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_INIT(uint32x4, unsigned int, u32) +OPENCV_HAL_IMPL_MSA_INIT(int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_INIT(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_INIT(int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_INIT(float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_INIT(float64x2, double, f64) + +#define OPENCV_HAL_IMPL_MSA_PACK(_Tpvec, _Tpwvec, pack, mov, rshr) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(mov(a.val, b.val)); \ +} \ +template inline \ +_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(rshr(a.val, b.val, n)); \ +} + +OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_uint16x8, pack, msa_qpack_u16, msa_qrpackr_u16) +OPENCV_HAL_IMPL_MSA_PACK(v_int8x16, v_int16x8, pack, msa_qpack_s16, msa_qrpackr_s16) +OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_uint32x4, pack, msa_qpack_u32, msa_qrpackr_u32) +OPENCV_HAL_IMPL_MSA_PACK(v_int16x8, v_int32x4, pack, msa_qpack_s32, msa_qrpackr_s32) +OPENCV_HAL_IMPL_MSA_PACK(v_uint32x4, v_uint64x2, pack, msa_pack_u64, msa_rpackr_u64) +OPENCV_HAL_IMPL_MSA_PACK(v_int32x4, v_int64x2, pack, msa_pack_s64, msa_rpackr_s64) +OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_int16x8, pack_u, msa_qpacku_s16, msa_qrpackru_s16) +OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_int32x4, pack_u, msa_qpacku_s32, msa_qrpackru_s32) + +#define OPENCV_HAL_IMPL_MSA_PACK_STORE(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = mov(a.val); \ + msa_st1_##suffix(ptr, a1); \ +} \ +template inline \ +void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = rshr(a.val, n); \ + msa_st1_##suffix(ptr, a1); \ +} + +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_uint16x8, pack, msa_qmovn_u16, msa_qrshrn_n_u16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int8x16, schar, v8i8, s8, v_int16x8, pack, msa_qmovn_s16, msa_qrshrn_n_s16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_uint32x4, pack, msa_qmovn_u32, msa_qrshrn_n_u32) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int16x8, short, v4i16, s16, v_int32x4, pack, msa_qmovn_s32, msa_qrshrn_n_s32) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint32x4, unsigned, v2u32, u32, v_uint64x2, pack, msa_movn_u64, msa_rshrn_n_u64) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int32x4, int, v2i32, s32, v_int64x2, pack, msa_movn_s64, msa_rshrn_n_s64) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_int16x8, pack_u, msa_qmovun_s16, msa_qrshrun_n_s16) +OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_int32x4, pack_u, msa_qmovun_s32, msa_qrshrun_n_s32) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + return v_uint8x16(msa_pack_u16(a.val, b.val)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + return v_uint8x16(msa_pack_u16(msa_pack_u32(a.val, b.val), msa_pack_u32(c.val, d.val))); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v8u16 abcd = msa_pack_u32(msa_pack_u64(a.val, b.val), msa_pack_u64(c.val, d.val)); + v8u16 efgh = msa_pack_u32(msa_pack_u64(e.val, f.val), msa_pack_u64(g.val, h.val)); + return v_uint8x16(msa_pack_u16(abcd, efgh)); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + v4f32 v0 = v.val; + v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0); + res = msa_mlaq_lane_f32(res, m1.val, v0, 1); + res = msa_mlaq_lane_f32(res, m2.val, v0, 2); + res = msa_mlaq_lane_f32(res, m3.val, v0, 3); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + v4f32 v0 = v.val; + v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0); + res = msa_mlaq_lane_f32(res, m1.val, v0, 1); + res = msa_mlaq_lane_f32(res, m2.val, v0, 2); + res = msa_addq_f32(res, a.val); + return v_float32x4(res); +} + +#define OPENCV_HAL_IMPL_MSA_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint8x16, msa_qaddq_u8) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint8x16, msa_qsubq_u8) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int8x16, msa_qaddq_s8) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int8x16, msa_qsubq_s8) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint16x8, msa_qaddq_u16) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint16x8, msa_qsubq_u16) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int16x8, msa_qaddq_s16) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int16x8, msa_qsubq_s16) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int32x4, msa_addq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int32x4, msa_subq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_int32x4, msa_mulq_s32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint32x4, msa_addq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint32x4, msa_subq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_uint32x4, msa_mulq_u32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float32x4, msa_addq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float32x4, msa_subq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float32x4, msa_mulq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int64x2, msa_addq_s64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int64x2, msa_subq_s64) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint64x2, msa_addq_u64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint64x2, msa_subq_u64) +OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float32x4, msa_divq_f32) +OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float64x2, msa_addq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float64x2, msa_subq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float64x2, msa_mulq_f64) +OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float64x2, msa_divq_f64) + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_MSA_MUL_SAT(_Tpvec, _Tpwvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{a = a * b; return a; } + +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint16x8, v_uint32x4) + +// Multiply and expand +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v16i8 a_lo, a_hi, b_lo, b_hi; + + ILVRL_B2_SB(a.val, msa_dupq_n_s8(0), a_lo, a_hi); + ILVRL_B2_SB(b.val, msa_dupq_n_s8(0), b_lo, b_hi); + c.val = msa_mulq_s16(msa_paddlq_s8(a_lo), msa_paddlq_s8(b_lo)); + d.val = msa_mulq_s16(msa_paddlq_s8(a_hi), msa_paddlq_s8(b_hi)); +} + +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v16u8 a_lo, a_hi, b_lo, b_hi; + + ILVRL_B2_UB(a.val, msa_dupq_n_u8(0), a_lo, a_hi); + ILVRL_B2_UB(b.val, msa_dupq_n_u8(0), b_lo, b_hi); + c.val = msa_mulq_u16(msa_paddlq_u8(a_lo), msa_paddlq_u8(b_lo)); + d.val = msa_mulq_u16(msa_paddlq_u8(a_hi), msa_paddlq_u8(b_hi)); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + v8i16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); + ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi); + c.val = msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo)); + d.val = msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi)); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + v8u16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); + ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi); + c.val = msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo)); + d.val = msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi)); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + v4u32 a_lo, a_hi, b_lo, b_hi; + + ILVRL_W2_UW(a.val, msa_dupq_n_u32(0), a_lo, a_hi); + ILVRL_W2_UW(b.val, msa_dupq_n_u32(0), b_lo, b_hi); + c.val = msa_mulq_u64(msa_paddlq_u32(a_lo), msa_paddlq_u32(b_lo)); + d.val = msa_mulq_u64(msa_paddlq_u32(a_hi), msa_paddlq_u32(b_hi)); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + v8i16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); + ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi); + + return v_int16x8(msa_packr_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo)), + msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi)), 16)); +} + +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + v8u16 a_lo, a_hi, b_lo, b_hi; + + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); + ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi); + + return v_uint16x8(msa_packr_u32(msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo)), + msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi)), 16)); +} + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(msa_dotp_s_w(a.val, b.val)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(msa_dpadd_s_w(c.val , a.val, b.val)); } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ return v_int64x2(msa_dotp_s_d(a.val, b.val)); } +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_int64x2(msa_dpadd_s_d(c.val , a.val, b.val)); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8); + v8u16 odd_a = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8); + v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8); + v8u16 odd_b = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8); + v4u32 prod = msa_dotp_u_w(even_a, even_b); + return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b)); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8); + v8u16 odd_a = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8); + v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8); + v8u16 odd_b = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8); + v4u32 prod = msa_dpadd_u_w(c.val, even_a, even_b); + return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b)); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + v8i16 prod = msa_dotp_s_h(a.val, b.val); + return v_int32x4(msa_hadd_s32(prod, prod)); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16); + v4u32 odd_a = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16); + v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16); + v4u32 odd_b = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16); + v2u64 prod = msa_dotp_u_d(even_a, even_b); + return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, + const v_uint64x2& c) +{ + v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16); + v4u32 odd_a = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16); + v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16); + v4u32 odd_b = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16); + v2u64 prod = msa_dpadd_u_d(c.val, even_a, even_b); + return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b)); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v4i32 prod = msa_dotp_s_w(a.val, b.val); + return v_int64x2(msa_hadd_s64(prod, prod)); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_IMPL_MSA_LOGIC_OP(_Tpvec, _Tpv, suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(&, _Tpvec, msa_andq_##suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(|, _Tpvec, msa_orrq_##suffix) \ +OPENCV_HAL_IMPL_MSA_BIN_OP(^, _Tpvec, msa_eorq_##suffix) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_u8(MSA_TPV_REINTERPRET(v16u8, a.val)))); \ +} + +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint8x16, v16u8, u8) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int8x16, v16i8, s8) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint16x8, v8u16, u16) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int16x8, v8i16, s16) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint32x4, v4u32, u32) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int32x4, v4i32, s32) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint64x2, v2u64, u64) +OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int64x2, v2i64, s64) + +#define OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + a.val = MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(&, msa_andq_s32) +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(|, msa_orrq_s32) +OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(^, msa_eorq_s32) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val)))); +} + +/* v_abs */ +#define OPENCV_HAL_IMPL_MSA_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ +inline _Tpuvec v_abs(const _Tpsvec& a) \ +{ \ + return v_reinterpret_as_##usuffix(_Tpsvec(msa_absq_##ssuffix(a.val))); \ +} + +OPENCV_HAL_IMPL_MSA_ABS(v_uint8x16, v_int8x16, u8, s8) +OPENCV_HAL_IMPL_MSA_ABS(v_uint16x8, v_int16x8, u16, s16) +OPENCV_HAL_IMPL_MSA_ABS(v_uint32x4, v_int32x4, u32, s32) + +/* v_abs(float), v_sqrt, v_invsqrt */ +#define OPENCV_HAL_IMPL_MSA_BASIC_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a) \ +{ \ + return _Tpvec(intrin(a.val)); \ +} + +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_abs, msa_absq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_abs, msa_absq_f64) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_sqrt, msa_sqrtq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_invsqrt, msa_rsqrtq_f32) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_sqrt, msa_sqrtq_f64) +OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_invsqrt, msa_rsqrtq_f64) + +#define OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a.val = MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(&, msa_andq_s64) +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(|, msa_orrq_s64) +OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(^, msa_eorq_s64) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val)))); +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_MSA_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_min, msa_minq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_max, msa_maxq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_min, msa_minq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_max, msa_maxq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_min, msa_minq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_max, msa_maxq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_min, msa_minq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_max, msa_maxq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_min, msa_minq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_max, msa_maxq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_min, msa_minq_s32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_max, msa_maxq_s32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_min, msa_minq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_max, msa_maxq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_min, msa_minq_f64) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_max, msa_maxq_f64) + +#define OPENCV_HAL_IMPL_MSA_INT_CMP_OP(_Tpvec, _Tpv, suffix, not_suffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ceqq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_##not_suffix(msa_ceqq_##suffix(a.val, b.val)))); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cltq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgtq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cleq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgeq_##suffix(a.val, b.val))); } + +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint8x16, v16u8, u8, u8) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int8x16, v16i8, s8, u8) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint16x8, v8u16, u16, u16) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int16x8, v8i16, s16, u16) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint32x4, v4u32, u32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int32x4, v4i32, s32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float32x4, v4f32, f32, u32) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint64x2, v2u64, u64, u64) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int64x2, v2i64, s64, u64) +OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float64x2, v2f64, f64, u64) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ceqq_f32(a.val, a.val))); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ceqq_f64(a.val, a.val))); } + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_add_wrap, msa_addq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_add_wrap, msa_addq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_add_wrap, msa_addq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_add_wrap, msa_addq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_sub_wrap, msa_subq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_sub_wrap, msa_subq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_sub_wrap, msa_subq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_sub_wrap, msa_subq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_mul_wrap, msa_mulq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_mul_wrap, msa_mulq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_mul_wrap, msa_mulq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_mul_wrap, msa_mulq_s16) + +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_absdiff, msa_abdq_u8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_absdiff, msa_abdq_u16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_absdiff, msa_abdq_u32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_absdiff, msa_abdq_f32) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_absdiff, msa_abdq_f64) + +/** Saturating absolute difference **/ +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_absdiffs, msa_qabdq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_absdiffs, msa_qabdq_s16) + +#define OPENCV_HAL_IMPL_MSA_BIN_FUNC2(_Tpvec, _Tpvec2, _Tpv, func, intrin) \ +inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec2(MSA_TPV_REINTERPRET(_Tpv, intrin(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int8x16, v_uint8x16, v16u8, v_absdiff, msa_abdq_s8) +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int16x8, v_uint16x8, v8u16, v_absdiff, msa_abdq_s16) +OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int32x4, v_uint32x4, v4u32, v_absdiff, msa_abdq_s32) + +/* v_magnitude, v_sqr_magnitude, v_fma, v_muladd */ +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 x(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val)); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_float32x4(msa_mlaq_f32(c.val, a.val, b.val)); +} + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(msa_mlaq_s32(c.val, a.val, b.val)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val)); +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(msa_mlaq_f64(c.val, a.val, b.val)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} + +// trade efficiency for convenience +#define OPENCV_HAL_IMPL_MSA_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ return _Tpvec(msa_shlq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ return _Tpvec(msa_shrq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(msa_shlq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(msa_shrq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ return _Tpvec(msa_rshrq_n_##suffix(a.val, n)); } + +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint8x16, u8, schar, s8) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int8x16, s8, schar, s8) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint16x8, u16, short, s16) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int16x8, s16, short, s16) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint32x4, u32, int, s32) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int32x4, s32, int, s32) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint64x2, u64, int64, s64) +OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int64x2, s64, int64, s64) + +/* v_rotate_right, v_rotate_left */ +#define OPENCV_HAL_IMPL_MSA_ROTATE_OP(_Tpvec, _Tpv, _Tpvs, suffix) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##suffix(0), n))); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(msa_dupq_n_##suffix(0), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ \ + return a; \ +} \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), n))); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, b.val), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ \ + CV_UNUSED(b); \ + return a; \ +} + +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint64x2, v2u64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int64x2, v2i64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float64x2, v2f64, v2i64, s64) + +#define OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr), msa_dup_n_##suffix((_Tp)0))); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr0), msa_ld1_##suffix(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ msa_st1q_##suffix(ptr, a.val); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + int n = _Tpvec::nlanes; \ + for( int i = 0; i < (n/2); i++ ) \ + ptr[i] = a.val[i]; \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + int n = _Tpvec::nlanes; \ + for( int i = 0; i < (n/2); i++ ) \ + ptr[i] = a.val[i+(n/2)]; \ +} + +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float64x2, double, f64) + + +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + v_uint8x16 c = v_uint8x16((v16u8)__builtin_msa_vshf_b((v16i8)((v2i64){0x08090A0B0C0D0E0F, 0x0001020304050607}), msa_dupq_n_s8(0), (v16i8)a.val)); + return c; +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + v_uint16x8 c = v_uint16x8((v8u16)__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000500060007, 0x0000000100020003}), msa_dupq_n_s16(0), (v8i16)a.val)); + return c; +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + v_uint32x4 c; + c.val[0] = a.val[3]; + c.val[1] = a.val[2]; + c.val[2] = a.val[1]; + c.val[3] = a.val[0]; + return c; +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + v_uint64x2 c; + c.val[0] = a.val[1]; + c.val[1] = a.val[0]; + return c; +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(func, cfunc) \ +inline unsigned short v_reduce_##func(const v_uint16x8& a) \ +{ \ + v8u16 a_lo, a_hi; \ + ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); \ + v4u32 b = msa_##func##q_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(a_hi)); \ + v4u32 b_lo, b_hi; \ + ILVRL_W2_UW(b, msa_dupq_n_u32(0), b_lo, b_hi); \ + v2u64 c = msa_##func##q_u64(msa_paddlq_u32(b_lo), msa_paddlq_u32(b_hi)); \ + return (unsigned short)cfunc(c[0], c[1]); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(min, std::min) + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(func, cfunc) \ +inline short v_reduce_##func(const v_int16x8& a) \ +{ \ + v8i16 a_lo, a_hi; \ + ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); \ + v4i32 b = msa_##func##q_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(a_hi)); \ + v4i32 b_lo, b_hi; \ + ILVRL_W2_SW(b, msa_dupq_n_s32(0), b_lo, b_hi); \ + v2i64 c = msa_##func##q_s64(msa_paddlq_s32(b_lo), msa_paddlq_s32(b_hi)); \ + return (short)cfunc(c[0], c[1]); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(min, std::min) + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(_Tpvec, scalartype, func, cfunc) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + return (scalartype)cfunc(cfunc(a.val[0], a.val[1]), cfunc(a.val[2], a.val[3])); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, min, std::min) + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(_Tpvec, scalartype, _Tpvec2, func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpvec2 a1, a2; \ + v_expand(a, a1, a2); \ + return (scalartype)v_reduce_##func(v_##func(a1, a2)); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, max) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, min) +OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, max) + + + +#define OPENCV_HAL_IMPL_MSA_REDUCE_SUM(_Tpvec, scalartype, suffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + return (scalartype)msa_sum_##suffix(a.val); \ +} + +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned short, u8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, short, s8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned, u16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, int, s16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, uint64_t, u32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int64_t, s32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ return (uint64)(msa_getq_lane_u64(a.val, 0) + msa_getq_lane_u64(a.val, 1)); } +inline int64 v_reduce_sum(const v_int64x2& a) +{ return (int64)(msa_getq_lane_s64(a.val, 0) + msa_getq_lane_s64(a.val, 1)); } +inline double v_reduce_sum(const v_float64x2& a) +{ + return msa_getq_lane_f64(a.val, 0) + msa_getq_lane_f64(a.val, 1); +} + +/* v_reduce_sum4, v_reduce_sad */ +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v4f32 u0 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val)))); // a0+a1 b0+b1 a2+a3 b2+b3 + v4f32 u1 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val)))); // c0+c1 d0+d1 c2+c3 d2+d3 + + return v_float32x4(msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0))), + MSA_TPV_REINTERPRET(v4f32, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0))))); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + v16u8 t0 = msa_abdq_u8(a.val, b.val); + v8u16 t1 = msa_paddlq_u8(t0); + v4u32 t2 = msa_paddlq_u16(t1); + return msa_sum_u32(t2); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + v16u8 t0 = MSA_TPV_REINTERPRET(v16u8, msa_abdq_s8(a.val, b.val)); + v8u16 t1 = msa_paddlq_u8(t0); + v4u32 t2 = msa_paddlq_u16(t1); + return msa_sum_u32(t2); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v8u16 t0 = msa_abdq_u16(a.val, b.val); + v4u32 t1 = msa_paddlq_u16(t0); + return msa_sum_u32(t1); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v8u16 t0 = MSA_TPV_REINTERPRET(v8u16, msa_abdq_s16(a.val, b.val)); + v4u32 t1 = msa_paddlq_u16(t0); + return msa_sum_u32(t1); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + v4u32 t0 = msa_abdq_u32(a.val, b.val); + return msa_sum_u32(t0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + v4u32 t0 = MSA_TPV_REINTERPRET(v4u32, msa_abdq_s32(a.val, b.val)); + return msa_sum_u32(t0); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + v4f32 t0 = msa_abdq_f32(a.val, b.val); + return msa_sum_f32(t0); +} + +/* v_popcount */ +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(_Tpvec) \ +inline v_uint8x16 v_popcount(const _Tpvec& a) \ +{ \ + v16u8 t = MSA_TPV_REINTERPRET(v16u8, msa_cntq_s8(MSA_TPV_REINTERPRET(v16i8, a.val))); \ + return v_uint8x16(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_uint8x16) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_int8x16) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(_Tpvec) \ +inline v_uint16x8 v_popcount(const _Tpvec& a) \ +{ \ + v8u16 t = MSA_TPV_REINTERPRET(v8u16, msa_cntq_s16(MSA_TPV_REINTERPRET(v8i16, a.val))); \ + return v_uint16x8(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_uint16x8) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_int16x8) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(_Tpvec) \ +inline v_uint32x4 v_popcount(const _Tpvec& a) \ +{ \ + v4u32 t = MSA_TPV_REINTERPRET(v4u32, msa_cntq_s32(MSA_TPV_REINTERPRET(v4i32, a.val))); \ + return v_uint32x4(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_uint32x4) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_int32x4) + +#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(_Tpvec) \ +inline v_uint64x2 v_popcount(const _Tpvec& a) \ +{ \ + v2u64 t = MSA_TPV_REINTERPRET(v2u64, msa_cntq_s64(MSA_TPV_REINTERPRET(v2i64, a.val))); \ + return v_uint64x2(t); \ +} +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_uint64x2) +OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_int64x2) + +inline int v_signmask(const v_uint8x16& a) +{ + v8i8 m0 = msa_create_s8(CV_BIG_UINT(0x0706050403020100)); + v16u8 v0 = msa_shlq_u8(msa_shrq_n_u8(a.val, 7), msa_combine_s8(m0, m0)); + v8u16 v1 = msa_paddlq_u8(v0); + v4u32 v2 = msa_paddlq_u16(v1); + v2u64 v3 = msa_paddlq_u32(v2); + return (int)msa_getq_lane_u64(v3, 0) + ((int)msa_getq_lane_u64(v3, 1) << 8); +} +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_uint16x8& a) +{ + v4i16 m0 = msa_create_s16(CV_BIG_UINT(0x0003000200010000)); + v8u16 v0 = msa_shlq_u16(msa_shrq_n_u16(a.val, 15), msa_combine_s16(m0, m0)); + v4u32 v1 = msa_paddlq_u16(v0); + v2u64 v2 = msa_paddlq_u32(v1); + return (int)msa_getq_lane_u64(v2, 0) + ((int)msa_getq_lane_u64(v2, 1) << 4); +} +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } + +inline int v_signmask(const v_uint32x4& a) +{ + v2i32 m0 = msa_create_s32(CV_BIG_UINT(0x0000000100000000)); + v4u32 v0 = msa_shlq_u32(msa_shrq_n_u32(a.val, 31), msa_combine_s32(m0, m0)); + v2u64 v1 = msa_paddlq_u32(v0); + return (int)msa_getq_lane_u64(v1, 0) + ((int)msa_getq_lane_u64(v1, 1) << 2); +} +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } + +inline int v_signmask(const v_uint64x2& a) +{ + v2u64 v0 = msa_shrq_n_u64(a.val, 63); + return (int)msa_getq_lane_u64(v0, 0) + ((int)msa_getq_lane_u64(v0, 1) << 1); +} +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } + +#define OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(_Tpvec, _Tpvec2, suffix, shift) \ +inline bool v_check_all(const v_##_Tpvec& a) \ +{ \ + _Tpvec2 v0 = msa_shrq_n_##suffix(msa_mvnq_##suffix(a.val), shift); \ + v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \ + return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) == 0; \ +} \ +inline bool v_check_any(const v_##_Tpvec& a) \ +{ \ + _Tpvec2 v0 = msa_shrq_n_##suffix(a.val, shift); \ + v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \ + return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) != 0; \ +} + +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint8x16, v16u8, u8, 7) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint16x8, v8u16, u16, 15) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint32x4, v4u32, u32, 31) +OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint64x2, v2u64, u64, 63) + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } + +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +/* v_select */ +#define OPENCV_HAL_IMPL_MSA_SELECT(_Tpvec, _Tpv, _Tpvu) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_bslq_u8(MSA_TPV_REINTERPRET(_Tpvu, mask.val), \ + MSA_TPV_REINTERPRET(_Tpvu, b.val), MSA_TPV_REINTERPRET(_Tpvu, a.val)))); \ +} + +OPENCV_HAL_IMPL_MSA_SELECT(v_uint8x16, v16u8, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int8x16, v16i8, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_uint16x8, v8u16, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int16x8, v8i16, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_uint32x4, v4u32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_int32x4, v4i32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_float32x4, v4f32, v16u8) +OPENCV_HAL_IMPL_MSA_SELECT(v_float64x2, v2f64, v16u8) + +#define OPENCV_HAL_IMPL_MSA_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix, ssuffix, _Tpv, _Tpvs) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + b0.val = msa_paddlq_##suffix(a_lo); \ + b1.val = msa_paddlq_##suffix(a_hi); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + return _Tpwvec(msa_paddlq_##suffix(a_lo)); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \ + return _Tpwvec(msa_paddlq_##suffix(a_hi)); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(msa_movl_##suffix(msa_ld1_##suffix(ptr))); \ +} + +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint8x16, v_uint16x8, uchar, u8, s8, v16u8, v16i8) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int8x16, v_int16x8, schar, s8, s8, v16i8, v16i8) +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint16x8, v_uint32x4, ushort, u16, s16, v8u16, v8i16) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int16x8, v_int32x4, short, s16, s16, v8i16, v8i16) +OPENCV_HAL_IMPL_MSA_EXPAND(v_uint32x4, v_uint64x2, uint, u32, s32, v4u32, v4i32) +OPENCV_HAL_IMPL_MSA_EXPAND(v_int32x4, v_int64x2, int, s32, s32, v4i32, v4i32) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + return v_uint32x4((v4u32){ptr[0], ptr[1], ptr[2], ptr[3]}); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + return v_int32x4((v4i32){ptr[0], ptr[1], ptr[2], ptr[3]}); +} + +/* v_zip, v_combine_low, v_combine_high, v_recombine */ +#define OPENCV_HAL_IMPL_MSA_UNPACKS(_Tpvec, _Tpv, _Tpvs, ssuffix) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + c.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \ + d.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \ +} + +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_UNPACKS(v_float64x2, v2f64, v2i64, s64) + +/* v_extract */ +#define OPENCV_HAL_IMPL_MSA_EXTRACT(_Tpvec, _Tpv, _Tpvs, suffix) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), s))); \ +} + +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint8x16, v16u8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int8x16, v16i8, v16i8, s8) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint16x8, v8u16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int16x8, v8i16, v8i16, s16) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint64x2, v2u64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_int64x2, v2i64, v2i64, s64) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_float32x4, v4f32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_EXTRACT(v_float64x2, v2f64, v2i64, s64) + +/* v_round, v_floor, v_ceil, v_trunc */ +inline v_int32x4 v_round(const v_float32x4& a) +{ + return v_int32x4(msa_cvttintq_s32_f32(a.val)); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v4i32 a1 = msa_cvttintq_s32_f32(a.val); + return v_int32x4(msa_addq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(msa_cvtfintq_f32_s32(a1), a.val)))); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v4i32 a1 = msa_cvttintq_s32_f32(a.val); + return v_int32x4(msa_subq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(a.val, msa_cvtfintq_f32_s32(a1))))); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + return v_int32x4(msa_cvttruncq_s32_f32(a.val)); +} + +inline v_int32x4 v_round(const v_float64x2& a) +{ + return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_cvttintq_s64_f64(b.val))); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + v2f64 a1 = msa_cvtrintq_f64(a.val); + return v_int32x4(msa_pack_s64(msa_addq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a1, a.val))), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + v2f64 a1 = msa_cvtrintq_f64(a.val); + return v_int32x4(msa_pack_s64(msa_subq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a.val, a1))), msa_dupq_n_s64(0))); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + return v_int32x4(msa_pack_s64(msa_cvttruncq_s64_f64(a.val), msa_dupq_n_s64(0))); +} + +#define OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(_Tpvec, _Tpv, _Tpvs, ssuffix) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + _Tpv t00 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + _Tpv t01 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \ + _Tpv t10 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \ + _Tpv t11 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \ + b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \ + b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \ + b2.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \ + b3.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \ +} + +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_uint32x4, v4u32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_int32x4, v4i32, v4i32, s32) +OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_float32x4, v4f32, v4i32, s32) + +#define OPENCV_HAL_IMPL_MSA_INTERLEAVED(_Tpvec, _Tp, suffix) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + msa_ld2q_##suffix(ptr, &a.val, &b.val); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + msa_ld3q_##suffix(ptr, &a.val, &b.val, &c.val); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + msa_ld4q_##suffix(ptr, &a.val, &b.val, &c.val, &d.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + msa_st2q_##suffix(ptr, a.val, b.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + msa_st3q_##suffix(ptr, a.val, b.val, c.val); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + msa_st4q_##suffix(ptr, a.val, b.val, c.val, d.val); \ +} + +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int8x16, schar, s8) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int16x8, short, s16) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(float32x4, float, f32) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(int64x2, int64, s64) +OPENCV_HAL_IMPL_MSA_INTERLEAVED(float64x2, double, f64) + +/* v_cvt_f32, v_cvt_f64, v_cvt_f64_high */ +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(msa_cvtfintq_f32_s32(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + return v_float32x4(msa_cvtfq_f32_f64(a.val, msa_dupq_n_f64(0.0f))); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(msa_cvtfq_f32_f64(a.val, b.val)); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(msa_cvtflq_f64_f32(msa_cvtfintq_f32_s32(a.val))); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(msa_cvtfhq_f64_f32(msa_cvtfintq_f32_s32(a.val))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(msa_cvtflq_f64_f32(a.val)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(msa_cvtfhq_f64_f32(a.val)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + return v_float64x2(msa_cvtfintq_f64_s64(a.val)); +} + +////////////// Lookup table access //////////////////// +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(msa_ld1q_s8(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(msa_ld1q_s16(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(msa_ld1q_s16(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(msa_combine_s16(msa_ld1_s16(tab + idx[0]), msa_ld1_s16(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(msa_ld1q_s32(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(msa_combine_s32(msa_ld1_s32(tab + idx[0]), msa_ld1_s32(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(msa_ld1q_s32(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(msa_combine_s64(msa_create_s64(tab[idx[0]]), msa_create_s64(tab[idx[1]]))); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(msa_ld1q_s64(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(msa_ld1q_f32(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + uint64 CV_DECL_ALIGNED(32) elems[2] = + { + *(uint64*)(tab + idx[0]), + *(uint64*)(tab + idx[1]) + }; + return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ld1q_u64(elems))); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(msa_ld1q_f32(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[msa_getq_lane_s32(idxvec.val, 0)], + tab[msa_getq_lane_s32(idxvec.val, 1)], + tab[msa_getq_lane_s32(idxvec.val, 2)], + tab[msa_getq_lane_s32(idxvec.val, 3)] + }; + return v_uint32x4(msa_ld1q_u32(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + v4f32 xy02 = msa_combine_f32(msa_ld1_f32(tab + idx[0]), msa_ld1_f32(tab + idx[2])); + v4f32 xy13 = msa_combine_f32(msa_ld1_f32(tab + idx[1]), msa_ld1_f32(tab + idx[3])); + x = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02)))); + y = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02)))); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0705060403010200, 0x0F0D0E0C0B090A08}), msa_dupq_n_s8(0), vec.val)); + return c; +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0703060205010400, 0x0F0B0E0A0D090C08}), msa_dupq_n_s8(0), vec.val)); + return c; +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0003000100020000, 0x0007000500060004}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } + +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0005000100040000, 0x0007000300060002}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + v_int32x4 c; + c.val[0] = vec.val[0]; + c.val[1] = vec.val[2]; + c.val[2] = vec.val[1]; + c.val[3] = vec.val[3]; + return c; +} + +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0908060504020100, 0x131211100E0D0C0A}), msa_dupq_n_s8(0), vec.val)); + return c; +} + +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000200010000, 0x0009000800060005}), msa_dupq_n_s16(0), vec.val)); + return c; +} + +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_float64x2(msa_ld1q_f64(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(msa_ld1q_f64(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + v2f64 xy0 = msa_ld1q_f64(tab + idx[0]); + v2f64 xy1 = msa_ld1q_f64(tab + idx[1]); + x = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvevq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0)))); + y = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvodq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0)))); +} + +template +inline typename _Tp::lane_type v_extract_n(const _Tp& a) +{ + return v_rotate_right(a).get0(); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& a) +{ + return v_setall_u32(v_extract_n(a)); +} +template +inline v_int32x4 v_broadcast_element(const v_int32x4& a) +{ + return v_setall_s32(v_extract_n(a)); +} +template +inline v_float32x4 v_broadcast_element(const v_float32x4& a) +{ + return v_setall_f32(v_extract_n(a)); +} + +////// FP16 support /////// +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ +#ifndef msa_ld1_f16 + v4f16 v = (v4f16)msa_ld1_s16((const short*)ptr); +#else + v4f16 v = msa_ld1_f16((const __fp16*)ptr); +#endif + return v_float32x4(msa_cvt_f32_f16(v)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + v4f16 hv = msa_cvt_f16_f32(v.val); + +#ifndef msa_st1_f16 + msa_st1_s16((short*)ptr, (int16x4_t)hv); +#else + msa_st1_f16((__fp16*)ptr, hv); +#endif +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + float buf[4]; + for( int i = 0; i < 4; i++ ) + buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + float buf[4]; + v_store(buf, v); + for( int i = 0; i < 4; i++ ) + ptr[i] = (float16_t)buf[i]; +} +#endif + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_neon.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_neon.hpp new file mode 100644 index 0000000..28cf813 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_neon.hpp @@ -0,0 +1,2615 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_INTRIN_NEON_HPP +#define OPENCV_HAL_INTRIN_NEON_HPP + +#include +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 +#if defined(__aarch64__) || defined(_M_ARM64) +#define CV_SIMD128_64F 1 +#else +#define CV_SIMD128_64F 0 +#endif + +// The following macro checks if the code is being compiled for the +// AArch64 execution state of Armv8, to enable the 128-bit +// intrinsics. The macro `__ARM_64BIT_STATE` is the one recommended by +// the Arm C Language Extension (ACLE) specifications [1] to check the +// availability of 128-bit intrinsics, and it is supporrted by clang +// and gcc. The macro `_M_ARM64` is the equivalent one for Microsoft +// Visual Studio [2] . +// +// [1] https://developer.arm.com/documentation/101028/0012/13--Advanced-SIMD--Neon--intrinsics +// [2] https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros +#if defined(__ARM_64BIT_STATE) || defined(_M_ARM64) +#define CV_NEON_AARCH64 1 +#else +#define CV_NEON_AARCH64 0 +#endif + +// TODO +#define CV_NEON_DOT 0 + +//////////// Utils //////////// + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { c = vuzp1q_##suffix(a, b); d = vuzp2q_##suffix(a, b); } +#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv&a, const _Tpv&b, _Tpv& c, _Tpv& d) \ + { c = vuzp1_##suffix(a, b); d = vuzp2_##suffix(a, b); } +#else +#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { _Tpvx2 ab = vuzpq_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; } +#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \ + inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \ + { _Tpvx2 ab = vuzp_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; } +#endif + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \ + template static inline \ + _Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \ + template static inline \ + float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; } +#else +#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) +#endif + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpvl##_t, _Tpvl##x2_t, suffix) \ + OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix) + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix) + +#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(_Tpv, _Tpvl, suffix) \ + OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) + +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint8x16, uint8x8, u8) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int8x16, int8x8, s8) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint16x8, uint16x4, u16) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int16x8, int16x4, s16) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint32x4, uint32x2, u32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int32x4, int32x2, s32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(float32x4, float32x2, f32) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(uint64x2, uint64x1, u64) +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(int64x2, int64x1, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(float64x2, float64x1,f64) +#endif + +//////////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(uint8x16_t v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = vld1q_u8(v); + } + uchar get0() const + { + return vgetq_lane_u8(val, 0); + } + + uint8x16_t val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(int8x16_t v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = vld1q_s8(v); + } + schar get0() const + { + return vgetq_lane_s8(val, 0); + } + + int8x16_t val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(uint16x8_t v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = vld1q_u16(v); + } + ushort get0() const + { + return vgetq_lane_u16(val, 0); + } + + uint16x8_t val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(int16x8_t v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = vld1q_s16(v); + } + short get0() const + { + return vgetq_lane_s16(val, 0); + } + + int16x8_t val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(uint32x4_t v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + val = vld1q_u32(v); + } + unsigned get0() const + { + return vgetq_lane_u32(val, 0); + } + + uint32x4_t val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(int32x4_t v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = vld1q_s32(v); + } + int get0() const + { + return vgetq_lane_s32(val, 0); + } + int32x4_t val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(float32x4_t v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = vld1q_f32(v); + } + float get0() const + { + return vgetq_lane_f32(val, 0); + } + float32x4_t val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(uint64x2_t v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = vld1q_u64(v); + } + uint64 get0() const + { + return vgetq_lane_u64(val, 0); + } + uint64x2_t val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(int64x2_t v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = vld1q_s64(v); + } + int64 get0() const + { + return vgetq_lane_s64(val, 0); + } + int64x2_t val; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(float64x2_t v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = vld1q_f64(v); + } + double get0() const + { + return vgetq_lane_f64(val, 0); + } + float64x2_t val; +}; +#endif + +#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \ +inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \ +inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \ +inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \ +inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \ +inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \ +inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(vreinterpretq_u16_##suffix(v.val)); } \ +inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(vreinterpretq_s16_##suffix(v.val)); } \ +inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(vreinterpretq_u32_##suffix(v.val)); } \ +inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(vreinterpretq_s32_##suffix(v.val)); } \ +inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(vreinterpretq_u64_##suffix(v.val)); } \ +inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(vreinterpretq_s64_##suffix(v.val)); } \ +inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(vreinterpretq_f32_##suffix(v.val)); } + +OPENCV_HAL_IMPL_NEON_INIT(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_INIT(int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_INIT(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_INIT(int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_INIT(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32) +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \ +inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); } +OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64) +OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8) +OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8) +OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16) +OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16) +OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32) +OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64) +OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64) +OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32) +OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + hreg a1 = mov(a.val), b1 = mov(b.val); \ + return _Tpvec(vcombine_##suffix(a1, b1)); \ +} \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = mov(a.val); \ + vst1_##suffix(ptr, a1); \ +} \ +template inline \ +_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + hreg a1 = rshr(a.val, n); \ + hreg b1 = rshr(b.val, n); \ + return _Tpvec(vcombine_##suffix(a1, b1)); \ +} \ +template inline \ +void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + hreg a1 = rshr(a.val, n); \ + vst1_##suffix(ptr, a1); \ +} + +OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, pack, vqmovn_u16, vqrshrn_n_u16) +OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, pack, vqmovn_s16, vqrshrn_n_s16) +OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, pack, vqmovn_u32, vqrshrn_n_u32) +OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, pack, vqmovn_s32, vqrshrn_n_s32) +OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, pack, vmovn_u64, vrshrn_n_u64) +OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn_s64, vrshrn_n_s64) + +OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16) +OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + uint8x16_t ab = vcombine_u8(vmovn_u16(a.val), vmovn_u16(b.val)); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + uint16x8_t nab = vcombine_u16(vmovn_u32(a.val), vmovn_u32(b.val)); + uint16x8_t ncd = vcombine_u16(vmovn_u32(c.val), vmovn_u32(d.val)); + return v_uint8x16(vcombine_u8(vmovn_u16(nab), vmovn_u16(ncd))); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + uint32x4_t ab = vcombine_u32(vmovn_u64(a.val), vmovn_u64(b.val)); + uint32x4_t cd = vcombine_u32(vmovn_u64(c.val), vmovn_u64(d.val)); + uint32x4_t ef = vcombine_u32(vmovn_u64(e.val), vmovn_u64(f.val)); + uint32x4_t gh = vcombine_u32(vmovn_u64(g.val), vmovn_u64(h.val)); + + uint16x8_t abcd = vcombine_u16(vmovn_u32(ab), vmovn_u32(cd)); + uint16x8_t efgh = vcombine_u16(vmovn_u32(ef), vmovn_u32(gh)); + return v_uint8x16(vcombine_u8(vmovn_u16(abcd), vmovn_u16(efgh))); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val); + float32x4_t res = vmulq_lane_f32(m0.val, vl, 0); + res = vmlaq_lane_f32(res, m1.val, vl, 1); + res = vmlaq_lane_f32(res, m2.val, vh, 0); + res = vmlaq_lane_f32(res, m3.val, vh, 1); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val); + float32x4_t res = vmulq_lane_f32(m0.val, vl, 0); + res = vmlaq_lane_f32(res, m1.val, vl, 1); + res = vmlaq_lane_f32(res, m2.val, vh, 0); + res = vaddq_f32(res, a.val); + return v_float32x4(res); +} + +#define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint32x4, vaddq_u32) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint32x4, vsubq_u32) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint32x4, vmulq_u32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float32x4, vaddq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float32x4, vsubq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float32x4, vmulq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32) +OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64) +OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64) +#else +inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b) +{ + float32x4_t reciprocal = vrecpeq_f32(b.val); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + return v_float32x4(vmulq_f32(a.val, reciprocal)); +} +inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b) +{ + float32x4_t reciprocal = vrecpeq_f32(b.val); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); + a.val = vmulq_f32(a.val, reciprocal); + return a; +} +#endif + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_NEON_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint16x8, v_uint32x4) + +// Multiply and expand +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); +#if CV_NEON_AARCH64 + d.val = vmull_high_s8(a.val, b.val); +#else // #if CV_NEON_AARCH64 + d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val)); +#endif // #if CV_NEON_AARCH64 +} + +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val)); +#if CV_NEON_AARCH64 + d.val = vmull_high_u8(a.val, b.val); +#else // #if CV_NEON_AARCH64 + d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val)); +#endif // #if CV_NEON_AARCH64 +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); +#if CV_NEON_AARCH64 + d.val = vmull_high_s16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); +#endif // #if CV_NEON_AARCH64 +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val)); +#if CV_NEON_AARCH64 + d.val = vmull_high_u16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); +#endif // #if CV_NEON_AARCH64 +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val)); +#if CV_NEON_AARCH64 + d.val = vmull_high_u32(a.val, b.val); +#else // #if CV_NEON_AARCH64 + d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val)); +#endif // #if CV_NEON_AARCH64 +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ +#if CV_NEON_AARCH64 + int32x4_t c = vmull_high_s16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); +#endif // #if CV_NEON_AARCH64 + return v_int16x8(vcombine_s16( + vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16), + vshrn_n_s32(c, 16) + )); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ +#if CV_NEON_AARCH64 + uint32x4_t c = vmull_high_u16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); +#endif // #if CV_NEON_AARCH64 + return v_uint16x8(vcombine_u16( + vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16), + vshrn_n_u32(c, 16) + )); +} + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + int16x8_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int16x4_t a0 = vget_low_s16(uzp1); + int16x4_t b0 = vget_high_s16(uzp1); + int16x4_t a1 = vget_low_s16(uzp2); + int16x4_t b1 = vget_high_s16(uzp2); + int32x4_t p = vmull_s16(a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int16x8_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int16x4_t a0 = vget_low_s16(uzp1); + int16x4_t b0 = vget_high_s16(uzp1); + int16x4_t a1 = vget_low_s16(uzp2); + int16x4_t b1 = vget_high_s16(uzp2); + int32x4_t p = vmlal_s16(c.val, a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +} + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + int32x4_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int32x2_t a0 = vget_low_s32(uzp1); + int32x2_t b0 = vget_high_s32(uzp1); + int32x2_t a1 = vget_low_s32(uzp2); + int32x2_t b1 = vget_high_s32(uzp2); + int64x2_t p = vmull_s32(a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int32x4_t uzp1, uzp2; + _v128_unzip(a.val, b.val, uzp1, uzp2); + int32x2_t a0 = vget_low_s32(uzp1); + int32x2_t b0 = vget_high_s32(uzp1); + int32x2_t a1 = vget_low_s32(uzp2); + int32x2_t b1 = vget_high_s32(uzp2); + int64x2_t p = vmlal_s32(c.val, a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); +#else + const uint8x16_t zero = vreinterpretq_u8_u32(vdupq_n_u32(0)); + const uint8x16_t mask = vreinterpretq_u8_u32(vdupq_n_u32(0x00FF00FF)); + const uint16x8_t zero32 = vreinterpretq_u16_u32(vdupq_n_u32(0)); + const uint16x8_t mask32 = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF)); + + uint16x8_t even = vmulq_u16(vreinterpretq_u16_u8(vbslq_u8(mask, a.val, zero)), + vreinterpretq_u16_u8(vbslq_u8(mask, b.val, zero))); + uint16x8_t odd = vmulq_u16(vshrq_n_u16(vreinterpretq_u16_u8(a.val), 8), + vshrq_n_u16(vreinterpretq_u16_u8(b.val), 8)); + + uint32x4_t s0 = vaddq_u32(vreinterpretq_u32_u16(vbslq_u16(mask32, even, zero32)), + vreinterpretq_u32_u16(vbslq_u16(mask32, odd, zero32))); + uint32x4_t s1 = vaddq_u32(vshrq_n_u32(vreinterpretq_u32_u16(even), 16), + vshrq_n_u32(vreinterpretq_u32_u16(odd), 16)); + return v_uint32x4(vaddq_u32(s0, s1)); +#endif +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); +#else + return v_dotprod_expand(a, b) + c; +#endif +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); +#else + int16x8_t p0 = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + int16x8_t p1 = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val)); + int16x8_t uzp1, uzp2; + _v128_unzip(p0, p1, uzp1, uzp2); + int16x8_t sum = vaddq_s16(uzp1, uzp2); + int16x4_t uzpl1, uzpl2; + _v128_unzip(vget_low_s16(sum), vget_high_s16(sum), uzpl1, uzpl2); + return v_int32x4(vaddl_s16(uzpl1, uzpl2)); +#endif +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(c.val, a.val, b.val)); +#else + return v_dotprod_expand(a, b) + c; +#endif +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + const uint16x8_t zero = vreinterpretq_u16_u32(vdupq_n_u32(0)); + const uint16x8_t mask = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF)); + + uint32x4_t even = vmulq_u32(vreinterpretq_u32_u16(vbslq_u16(mask, a.val, zero)), + vreinterpretq_u32_u16(vbslq_u16(mask, b.val, zero))); + uint32x4_t odd = vmulq_u32(vshrq_n_u32(vreinterpretq_u32_u16(a.val), 16), + vshrq_n_u32(vreinterpretq_u32_u16(b.val), 16)); + uint32x4_t uzp1, uzp2; + _v128_unzip(even, odd, uzp1, uzp2); + uint64x2_t s0 = vaddl_u32(vget_low_u32(uzp1), vget_high_u32(uzp1)); + uint64x2_t s1 = vaddl_u32(vget_low_u32(uzp2), vget_high_u32(uzp2)); + return v_uint64x2(vaddq_u64(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + int32x4_t p0 = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + int32x4_t p1 = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); + + int32x4_t uzp1, uzp2; + _v128_unzip(p0, p1, uzp1, uzp2); + int32x4_t sum = vaddq_s32(uzp1, uzp2); + + int32x2_t uzpl1, uzpl2; + _v128_unzip(vget_low_s32(sum), vget_high_s32(sum), uzpl1, uzpl2); + return v_int64x2(vaddl_s32(uzpl1, uzpl2)); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } +#endif + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ +#if CV_NEON_AARCH64 + int32x4_t p = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + return v_int32x4(vmlal_high_s16(p, a.val, b.val)); +#else + int16x4_t a0 = vget_low_s16(a.val); + int16x4_t a1 = vget_high_s16(a.val); + int16x4_t b0 = vget_low_s16(b.val); + int16x4_t b1 = vget_high_s16(b.val); + int32x4_t p = vmull_s16(a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +#endif +} +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ +#if CV_NEON_AARCH64 + int32x4_t p = vmlal_s16(c.val, vget_low_s16(a.val), vget_low_s16(b.val)); + return v_int32x4(vmlal_high_s16(p, a.val, b.val)); +#else + int16x4_t a0 = vget_low_s16(a.val); + int16x4_t a1 = vget_high_s16(a.val); + int16x4_t b0 = vget_low_s16(b.val); + int16x4_t b1 = vget_high_s16(b.val); + int32x4_t p = vmlal_s16(c.val, a0, b0); + return v_int32x4(vmlal_s16(p, a1, b1)); +#endif +} + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_NEON_AARCH64 + int64x2_t p = vmull_s32(vget_low_s32(a.val), vget_low_s32(b.val)); + return v_int64x2(vmlal_high_s32(p, a.val, b.val)); +#else + int32x2_t a0 = vget_low_s32(a.val); + int32x2_t a1 = vget_high_s32(a.val); + int32x2_t b0 = vget_low_s32(b.val); + int32x2_t b1 = vget_high_s32(b.val); + int64x2_t p = vmull_s32(a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +#endif +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ +#if CV_NEON_AARCH64 + int64x2_t p = vmlal_s32(c.val, vget_low_s32(a.val), vget_low_s32(b.val)); + return v_int64x2(vmlal_high_s32(p, a.val, b.val)); +#else + int32x2_t a0 = vget_low_s32(a.val); + int32x2_t a1 = vget_high_s32(a.val); + int32x2_t b0 = vget_low_s32(b.val); + int32x2_t b1 = vget_high_s32(b.val); + int64x2_t p = vmlal_s32(c.val, a0, b0); + return v_int64x2(vmlal_s32(p, a1, b1)); +#endif +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val)); +#else + uint16x8_t p0 = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val)); + uint16x8_t p1 = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val)); + uint32x4_t s0 = vaddl_u16(vget_low_u16(p0), vget_low_u16(p1)); + uint32x4_t s1 = vaddl_u16(vget_high_u16(p0), vget_high_u16(p1)); + return v_uint32x4(vaddq_u32(s0, s1)); +#endif +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ +#if CV_NEON_DOT + return v_uint32x4(vdotq_u32(c.val, a.val, b.val)); +#else + return v_dotprod_expand_fast(a, b) + c; +#endif +} + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val)); +#else + int16x8_t prod = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val)); + prod = vmlal_s8(prod, vget_high_s8(a.val), vget_high_s8(b.val)); + return v_int32x4(vaddl_s16(vget_low_s16(prod), vget_high_s16(prod))); +#endif +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ +#if CV_NEON_DOT + return v_int32x4(vdotq_s32(c.val, a.val, b.val)); +#else + return v_dotprod_expand_fast(a, b) + c; +#endif +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + uint32x4_t p0 = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val)); + uint32x4_t p1 = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); + uint64x2_t s0 = vaddl_u32(vget_low_u32(p0), vget_high_u32(p0)); + uint64x2_t s1 = vaddl_u32(vget_low_u32(p1), vget_high_u32(p1)); + return v_uint64x2(vaddq_u64(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + int32x4_t prod = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + prod = vmlal_s16(prod, vget_high_s16(a.val), vget_high_s16(b.val)); + return v_int64x2(vaddl_s32(vget_low_s32(prod), vget_high_s32(prod))); +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod_fast(a, b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } +#endif + + +#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \ + OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \ + OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \ + OPENCV_HAL_IMPL_NEON_BIN_OP(^, _Tpvec, veorq_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { \ + return _Tpvec(vreinterpretq_##suffix##_u8(vmvnq_u8(vreinterpretq_u8_##suffix(a.val)))); \ + } + +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint8x16, u8) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int8x16, s8) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint16x8, u16) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int16x8, s16) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint32x4, u32) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int32x4, s32) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint64x2, u64) +OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int64x2, s64) + +#define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + return v_float32x4(vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + a.val = vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(&, vandq_s32) +OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(|, vorrq_s32) +OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(^, veorq_s32) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val)))); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + return v_float32x4(vsqrtq_f32(x.val)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + v_float32x4 one = v_setall_f32(1.0f); + return one / v_sqrt(x); +} +#else +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN)); + float32x4_t e = vrsqrteq_f32(x1); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); + return v_float32x4(vmulq_f32(x.val, e)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + float32x4_t e = vrsqrteq_f32(x.val); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e); + return v_float32x4(e); +} +#endif + +#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ +inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); } + +OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8) +OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16) +OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32) + +inline v_float32x4 v_abs(v_float32x4 x) +{ return v_float32x4(vabsq_f32(x.val)); } + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \ + return a; \ +} + +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64) +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64) +OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val)))); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + return v_float64x2(vsqrtq_f64(x.val)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + v_float64x2 one = v_setall_f64(1.0f); + return one / v_sqrt(x); +} + +inline v_float64x2 v_abs(v_float64x2 x) +{ return v_float64x2(vabsq_f64(x.val)); } +#endif + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_min, vminq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_max, vmaxq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_min, vminq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_max, vmaxq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_min, vminq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_max, vmaxq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_min, vminq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_max, vmaxq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_min, vminq_u32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_max, vmaxq_u32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64) +#endif + +#if CV_SIMD128_64F +inline int64x2_t vmvnq_s64(int64x2_t a) +{ + int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_s64(a, vx); +} +inline uint64x2_t vmvnq_u64(uint64x2_t a) +{ + uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_u64(a, vx); +} +#endif +#define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vmvnq_##not_suffix(vceqq_##suffix(a.val, b.val)))); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcltq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcgtq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcleq_##suffix(a.val, b.val))); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); } + +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64) +#endif + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(vreinterpretq_f32_u32(vceqq_f32(a.val, a.val))); } +#if CV_SIMD128_64F +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(vreinterpretq_f64_u64(vceqq_f64(a.val, a.val))); } +#endif + +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_add_wrap, vaddq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_mul_wrap, vmulq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16) + +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32) +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64) +#endif + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ return v_int8x16(vqabsq_s8(vqsubq_s8(a.val, b.val))); } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_int16x8(vqabsq_s16(vqsubq_s16(a.val, b.val))); } + +#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \ +inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec2(cast(intrin(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int8x16, v_uint8x16, vreinterpretq_u8_s8, v_absdiff, vabdq_s8) +OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int16x8, v_uint16x8, vreinterpretq_u16_s16, v_absdiff, vabdq_s16) +OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int32x4, v_uint32x4, vreinterpretq_u32_s32, v_absdiff, vabdq_s32) + +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 x(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val)); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ +#if CV_SIMD128_64F + // ARMv8, which adds support for 64-bit floating-point (so CV_SIMD128_64F is defined), + // also adds FMA support both for single- and double-precision floating-point vectors + return v_float32x4(vfmaq_f32(c.val, a.val, b.val)); +#else + return v_float32x4(vmlaq_f32(c.val, a.val, b.val)); +#endif +} + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(vmlaq_s32(c.val, a.val, b.val)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(vfmaq_f64(c.val, a.val, b.val)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} +#endif + +// trade efficiency for convenience +#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(vshlq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(vshrq_n_##suffix(a.val, n)); } \ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ return _Tpvec(vrshrq_n_##suffix(a.val, n)); } + +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64) + +#define OPENCV_HAL_IMPL_NEON_ROTATE_OP(_Tpvec, suffix) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint16x8, u16) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int16x8, s16) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint32x4, u32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int32x4, s32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float32x4, f32) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint64x2, u64) +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64) +#endif + +#if defined(__clang__) && defined(__aarch64__) +// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863 +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ +typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \ +uint64 v = *(unaligned_uint64*)ptr; \ +return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \ +} +#else +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } +#endif + +#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(vld1q_##suffix(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(vld1q_##suffix(ptr)); } \ +OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ vst1q_##suffix(ptr, a.val); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ vst1_##suffix(ptr, vget_high_##suffix(a.val)); } + +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64) +#endif + +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ +#if CV_NEON_AARCH64 + uint16_t t0 = vaddlvq_u8(a.val); + return t0; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline int v_reduce_sum(const v_int8x16& a) +{ +#if CV_NEON_AARCH64 + int16_t t0 = vaddlvq_s8(a.val); + return t0; +#else // #if CV_NEON_AARCH64 + int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val)); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ +#if CV_NEON_AARCH64 + uint32_t t0 = vaddlvq_u16(a.val); + return t0; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vpaddlq_u16(a.val); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline int v_reduce_sum(const v_int16x8& a) +{ +#if CV_NEON_AARCH64 + int32_t t0 = vaddlvq_s16(a.val); + return t0; +#else // #if CV_NEON_AARCH64 + int32x4_t t0 = vpaddlq_s16(a.val); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} + +#if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + return v##vectorfunc##vq_##suffix(a.val); \ +} +#else // #if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ +} +#endif // #if CV_NEON_AARCH64 + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8) + +#if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + return v##vectorfunc##vq_##suffix(a.val); \ +} +#else // #if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + a0 = vp##vectorfunc##_##suffix(a0, a0); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ +} +#endif // #if CV_NEON_AARCH64 + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16) + +#if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + return v##vectorfunc##vq_##suffix(a.val); \ +} +#else // #if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ + return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \ +} +#endif // #if CV_NEON_AARCH64 + +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ +#if CV_NEON_AARCH64 + return vaddvq_u64(a.val); +#else // #if CV_NEON_AARCH64 + return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); +#endif // #if CV_NEON_AARCH64 +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ +#if CV_NEON_AARCH64 + return vaddvq_s64(a.val); +#else // #if CV_NEON_AARCH64 + return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); +#endif // #if CV_NEON_AARCH64 +} +#if CV_SIMD128_64F +inline double v_reduce_sum(const v_float64x2& a) +{ + return vaddvq_f64(a.val); +} +#endif + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ +#if CV_NEON_AARCH64 + float32x4_t ab = vpaddq_f32(a.val, b.val); // a0+a1 a2+a3 b0+b1 b2+b3 + float32x4_t cd = vpaddq_f32(c.val, d.val); // c0+c1 d0+d1 c2+c3 d2+d3 + return v_float32x4(vpaddq_f32(ab, cd)); // sumA sumB sumC sumD +#else // #if CV_NEON_AARCH64 + float32x4x2_t ab = vtrnq_f32(a.val, b.val); + float32x4x2_t cd = vtrnq_f32(c.val, d.val); + + float32x4_t u0 = vaddq_f32(ab.val[0], ab.val[1]); // a0+a1 b0+b1 a2+a3 b2+b3 + float32x4_t u1 = vaddq_f32(cd.val[0], cd.val[1]); // c0+c1 d0+d1 c2+c3 d2+d3 + + float32x4_t v0 = vcombine_f32(vget_low_f32(u0), vget_low_f32(u1)); + float32x4_t v1 = vcombine_f32(vget_high_f32(u0), vget_high_f32(u1)); + + return v_float32x4(vaddq_f32(v0, v1)); +#endif // #if CV_NEON_AARCH64 +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ +#if CV_NEON_AARCH64 + uint8x16_t t0 = vabdq_u8(a.val, b.val); + uint16_t t1 = vaddlvq_u8(t0); + return t1; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vabdq_u8(a.val, b.val))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_NEON_AARCH64 + uint8x16_t t0 = vreinterpretq_u8_s8(vabdq_s8(a.val, b.val)); + uint16_t t1 = vaddlvq_u8(t0); + return t1; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s8(vabdq_s8(a.val, b.val)))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ +#if CV_NEON_AARCH64 + uint16x8_t t0 = vabdq_u16(a.val, b.val); + uint32_t t1 = vaddlvq_u16(t0); + return t1; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vpaddlq_u16(vabdq_u16(a.val, b.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ +#if CV_NEON_AARCH64 + uint16x8_t t0 = vreinterpretq_u16_s16(vabdq_s16(a.val, b.val)); + uint32_t t1 = vaddlvq_u16(t0); + return t1; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vpaddlq_u16(vreinterpretq_u16_s16(vabdq_s16(a.val, b.val))); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ +#if CV_NEON_AARCH64 + uint32x4_t t0 = vabdq_u32(a.val, b.val); + uint32_t t1 = vaddvq_u32(t0); + return t1; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vabdq_u32(a.val, b.val); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_NEON_AARCH64 + uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val)); + uint32_t t1 = vaddvq_u32(t0); + return t1; +#else // #if CV_NEON_AARCH64 + uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ +#if CV_NEON_AARCH64 + float32x4_t t0 = vabdq_f32(a.val, b.val); + return vaddvq_f32(t0); +#else // #if CV_NEON_AARCH64 + float32x4_t t0 = vabdq_f32(a.val, b.val); + float32x2_t t1 = vpadd_f32(vget_low_f32(t0), vget_high_f32(t0)); + return vget_lane_f32(vpadd_f32(t1, t1), 0); +#endif // #if CV_NEON_AARCH64 +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vcntq_u8(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vcntq_u8(vreinterpretq_u8_s8(a.val))); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u16(a.val)))); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s16(a.val)))); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u32(a.val))))); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s32(a.val))))); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(a.val)))))); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s64(a.val)))))); } + +inline int v_signmask(const v_uint8x16& a) +{ +#if CV_NEON_AARCH64 + const int8x16_t signPosition = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}; + const uint8x16_t byteOrder = {0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15}; + uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), signPosition); + uint8x16_t v1 = vqtbl1q_u8(v0, byteOrder); + uint32_t t0 = vaddlvq_u16(vreinterpretq_u16_u8(v1)); + return t0; +#else // #if CV_NEON_AARCH64 + int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100)); + uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0)); + uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0))); + return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8); +#endif // #if CV_NEON_AARCH64 +} + +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_uint16x8& a) +{ +#if CV_NEON_AARCH64 + const int16x8_t signPosition = {0,1,2,3,4,5,6,7}; + uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), signPosition); + uint32_t t0 = vaddlvq_u16(v0); + return t0; +#else // #if CV_NEON_AARCH64 + int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000)); + uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0)); + uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0)); + return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4); +#endif // #if CV_NEON_AARCH64 +} +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } + +inline int v_signmask(const v_uint32x4& a) +{ +#if CV_NEON_AARCH64 + const int32x4_t signPosition = {0,1,2,3}; + uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), signPosition); + uint32_t t0 = vaddvq_u32(v0); + return t0; +#else // #if CV_NEON_AARCH64 + int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000)); + uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0)); + uint64x2_t v1 = vpaddlq_u32(v0); + return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2); +#endif // #if CV_NEON_AARCH64 +} +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_uint64x2& a) +{ +#if CV_NEON_AARCH64 + const int64x2_t signPosition = {0,1}; + uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), signPosition); + uint64_t t0 = vaddvq_u64(v0); + return t0; +#else // #if CV_NEON_AARCH64 + int64x1_t m0 = vdup_n_s64(0); + uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0)); + return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1); +#endif // #if CV_NEON_AARCH64 +} +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#endif + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +#if CV_SIMD128_64F +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } +#endif + +#if CV_NEON_AARCH64 + #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \ + inline bool v_check_all(const v_##_Tpvec& a) \ + { \ + return (vminvq_##suffix(a.val) >> shift) != 0; \ + } \ + inline bool v_check_any(const v_##_Tpvec& a) \ + { \ + return (vmaxvq_##suffix(a.val) >> shift) != 0; \ + } +#else // #if CV_NEON_AARCH64 + #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \ + inline bool v_check_all(const v_##_Tpvec& a) \ + { \ + _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \ + uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \ + return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \ + } \ + inline bool v_check_any(const v_##_Tpvec& a) \ + { \ + _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \ + uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \ + return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \ + } +#endif // #if CV_NEON_AARCH64 + +OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7) +OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15) +OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31) + +inline bool v_check_all(const v_uint64x2& a) +{ + uint64x2_t v0 = vshrq_n_u64(a.val, 63); + return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1; +} +inline bool v_check_any(const v_uint64x2& a) +{ + uint64x2_t v0 = vshrq_n_u64(a.val, 63); + return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0; +} + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } + +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#endif + +#define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vbslq_##suffix(vreinterpretq_##usuffix##_##suffix(mask.val), a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_NEON_SELECT(v_uint8x16, u8, u8) +OPENCV_HAL_IMPL_NEON_SELECT(v_int8x16, s8, u8) +OPENCV_HAL_IMPL_NEON_SELECT(v_uint16x8, u16, u16) +OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16) +OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32) +OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32) +OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64) +#endif + +#if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \ + b1.val = vmovl_high_##suffix(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_high_##suffix(a.val)); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \ +} +#else +#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \ + b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_high_##suffix(a.val))); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \ +} +#endif + +OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8) +OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8) +OPENCV_HAL_IMPL_NEON_EXPAND(v_uint16x8, v_uint32x4, ushort, u16) +OPENCV_HAL_IMPL_NEON_EXPAND(v_int16x8, v_int32x4, short, s16) +OPENCV_HAL_IMPL_NEON_EXPAND(v_uint32x4, v_uint64x2, uint, u32) +OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + uint8x8_t v0 = vcreate_u8(*(unaligned_uint*)ptr); + uint16x4_t v1 = vget_low_u16(vmovl_u8(v0)); + return v_uint32x4(vmovl_u16(v1)); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint; + int8x8_t v0 = vcreate_s8(*(unaligned_uint*)ptr); + int16x4_t v1 = vget_low_s16(vmovl_s8(v0)); + return v_int32x4(vmovl_s16(v1)); +} + +#if defined(__aarch64__) || defined(_M_ARM64) +#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + b0.val = vzip1q_##suffix(a0.val, a1.val); \ + b1.val = vzip2q_##suffix(a0.val, a1.val); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ + d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ +} +#else +#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + _Tpvec##x2_t p = vzipq_##suffix(a0.val, a1.val); \ + b0.val = p.val[0]; \ + b1.val = p.val[1]; \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ + d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ +} +#endif + +OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8) +OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8) +OPENCV_HAL_IMPL_NEON_UNPACKS(uint16x8, u16) +OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16) +OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32) +OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64) +#endif + +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + uint8x16_t vec = vrev64q_u8(a.val); + return v_uint8x16(vextq_u8(vec, vec, 8)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + uint16x8_t vec = vrev64q_u16(a.val); + return v_uint16x8(vextq_u16(vec, vec, 4)); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + uint32x4_t vec = vrev64q_u32(a.val); + return v_uint32x4(vextq_u32(vec, vec, 2)); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + uint64x2_t vec = a.val; + uint64x1_t vec_lo = vget_low_u64(vec); + uint64x1_t vec_hi = vget_high_u64(vec); + return v_uint64x2(vcombine_u64(vec_hi, vec_lo)); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +#if CV_SIMD128_64F +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } +#endif + +#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \ +template \ +inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \ +} + +OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8) +OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8) +OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16) +OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16) +OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32) +OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64) +OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64) +OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_NEON_EXTRACT_N(_Tpvec, _Tp, suffix) \ +template inline _Tp v_extract_n(_Tpvec v) { return vgetq_lane_##suffix(v.val, i); } + +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint32x4, uint, u32) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float64x2, double, f64) +#endif + +#define OPENCV_HAL_IMPL_NEON_BROADCAST(_Tpvec, _Tp, suffix) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) { _Tp t = v_extract_n(v); return v_setall_##suffix(t); } + +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint32x4, uint, u32) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_NEON_BROADCAST(v_float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_BROADCAST(v_float64x2, double, f64) +#endif + +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float32x4& a) +{ + float32x4_t a_ = a.val; + int32x4_t result; +#if defined _MSC_VER + result = vcvtnq_s32_f32(a_); +#else + __asm__ ("fcvtns %0.4s, %1.4s" + : "=w"(result) + : "w"(a_) + : /* No clobbers */); +#endif + return v_int32x4(result); +} +#else +inline v_int32x4 v_round(const v_float32x4& a) +{ + static const int32x4_t v_sign = vdupq_n_s32(1 << 31), + v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); + + int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val))); + return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition)))); +} +#endif +inline v_int32x4 v_floor(const v_float32x4& a) +{ + int32x4_t a1 = vcvtq_s32_f32(a.val); + uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val); + return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask))); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + int32x4_t a1 = vcvtq_s32_f32(a.val); + uint32x4_t mask = vcgtq_f32(a.val, vcvtq_f32_s32(a1)); + return v_int32x4(vsubq_s32(a1, vreinterpretq_s32_u32(mask))); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(vcvtq_s32_f32(a.val)); } + +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val)))); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + int64x2_t a1 = vcvtq_s64_f64(a.val); + uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val); + a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask)); + return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + int64x2_t a1 = vcvtq_s64_f64(a.val); + uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1)); + a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask)); + return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + static const int32x2_t zero = vdup_n_s32(0); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); +} +#endif + +#if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \ +inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ + const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ + v_##_Tpvec& b0, v_##_Tpvec& b1, \ + v_##_Tpvec& b2, v_##_Tpvec& b3) \ +{ \ + /* -- Pass 1: 64b transpose */ \ + _Tpvec##_t t0 = vreinterpretq_##suffix##32_##suffix##64( \ + vtrn1q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a0.val), \ + vreinterpretq_##suffix##64_##suffix##32(a2.val))); \ + _Tpvec##_t t1 = vreinterpretq_##suffix##32_##suffix##64( \ + vtrn1q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a1.val), \ + vreinterpretq_##suffix##64_##suffix##32(a3.val))); \ + _Tpvec##_t t2 = vreinterpretq_##suffix##32_##suffix##64( \ + vtrn2q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a0.val), \ + vreinterpretq_##suffix##64_##suffix##32(a2.val))); \ + _Tpvec##_t t3 = vreinterpretq_##suffix##32_##suffix##64( \ + vtrn2q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a1.val), \ + vreinterpretq_##suffix##64_##suffix##32(a3.val))); \ + /* -- Pass 2: 32b transpose */ \ + b0.val = vtrn1q_##suffix##32(t0, t1); \ + b1.val = vtrn2q_##suffix##32(t0, t1); \ + b2.val = vtrn1q_##suffix##32(t2, t3); \ + b3.val = vtrn2q_##suffix##32(t2, t3); \ +} + +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u) +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s) +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f) +#else // #if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \ +inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ + const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ + v_##_Tpvec& b0, v_##_Tpvec& b1, \ + v_##_Tpvec& b2, v_##_Tpvec& b3) \ +{ \ + /* m00 m01 m02 m03 */ \ + /* m10 m11 m12 m13 */ \ + /* m20 m21 m22 m23 */ \ + /* m30 m31 m32 m33 */ \ + _Tpvec##x2_t t0 = vtrnq_##suffix(a0.val, a1.val); \ + _Tpvec##x2_t t1 = vtrnq_##suffix(a2.val, a3.val); \ + /* m00 m10 m02 m12 */ \ + /* m01 m11 m03 m13 */ \ + /* m20 m30 m22 m32 */ \ + /* m21 m31 m23 m33 */ \ + b0.val = vcombine_##suffix(vget_low_##suffix(t0.val[0]), vget_low_##suffix(t1.val[0])); \ + b1.val = vcombine_##suffix(vget_low_##suffix(t0.val[1]), vget_low_##suffix(t1.val[1])); \ + b2.val = vcombine_##suffix(vget_high_##suffix(t0.val[0]), vget_high_##suffix(t1.val[0])); \ + b3.val = vcombine_##suffix(vget_high_##suffix(t0.val[1]), vget_high_##suffix(t1.val[1])); \ +} + +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32) +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32) +OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32) +#endif // #if CV_NEON_AARCH64 + +#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + _Tpvec##x2_t v = vld2q_##suffix(ptr); \ + a.val = v.val[0]; \ + b.val = v.val[1]; \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + _Tpvec##x3_t v = vld3q_##suffix(ptr); \ + a.val = v.val[0]; \ + b.val = v.val[1]; \ + c.val = v.val[2]; \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + _Tpvec##x4_t v = vld4q_##suffix(ptr); \ + a.val = v.val[0]; \ + b.val = v.val[1]; \ + c.val = v.val[2]; \ + d.val = v.val[3]; \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + _Tpvec##x2_t v; \ + v.val[0] = a.val; \ + v.val[1] = b.val; \ + vst2q_##suffix(ptr, v); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + _Tpvec##x3_t v; \ + v.val[0] = a.val; \ + v.val[1] = b.val; \ + v.val[2] = c.val; \ + vst3q_##suffix(ptr, v); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec##x4_t v; \ + v.val[0] = a.val; \ + v.val[1] = b.val; \ + v.val[2] = c.val; \ + v.val[3] = d.val; \ + vst4q_##suffix(ptr, v); \ +} + +#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix) \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t a1 = vld1_##suffix(ptr + 2); \ + tp##x1_t b1 = vld1_##suffix(ptr + 3); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ +} \ + \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, \ + v_##tp##x2& b, v_##tp##x2& c ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t c0 = vld1_##suffix(ptr + 2); \ + tp##x1_t a1 = vld1_##suffix(ptr + 3); \ + tp##x1_t b1 = vld1_##suffix(ptr + 4); \ + tp##x1_t c1 = vld1_##suffix(ptr + 5); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ + c = v_##tp##x2(vcombine_##suffix(c0, c1)); \ +} \ + \ +inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \ + v_##tp##x2& c, v_##tp##x2& d ) \ +{ \ + tp##x1_t a0 = vld1_##suffix(ptr); \ + tp##x1_t b0 = vld1_##suffix(ptr + 1); \ + tp##x1_t c0 = vld1_##suffix(ptr + 2); \ + tp##x1_t d0 = vld1_##suffix(ptr + 3); \ + tp##x1_t a1 = vld1_##suffix(ptr + 4); \ + tp##x1_t b1 = vld1_##suffix(ptr + 5); \ + tp##x1_t c1 = vld1_##suffix(ptr + 6); \ + tp##x1_t d1 = vld1_##suffix(ptr + 7); \ + a = v_##tp##x2(vcombine_##suffix(a0, a1)); \ + b = v_##tp##x2(vcombine_##suffix(b0, b1)); \ + c = v_##tp##x2(vcombine_##suffix(c0, c1)); \ + d = v_##tp##x2(vcombine_##suffix(d0, d1)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 3, vget_high_##suffix(b.val)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \ + const v_##tp##x2& b, const v_##tp##x2& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \ + vst1_##suffix(ptr + 3, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 4, vget_high_##suffix(b.val)); \ + vst1_##suffix(ptr + 5, vget_high_##suffix(c.val)); \ +} \ + \ +inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \ + const v_##tp##x2& c, const v_##tp##x2& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + vst1_##suffix(ptr, vget_low_##suffix(a.val)); \ + vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \ + vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \ + vst1_##suffix(ptr + 3, vget_low_##suffix(d.val)); \ + vst1_##suffix(ptr + 4, vget_high_##suffix(a.val)); \ + vst1_##suffix(ptr + 5, vget_high_##suffix(b.val)); \ + vst1_##suffix(ptr + 6, vget_high_##suffix(c.val)); \ + vst1_##suffix(ptr + 7, vget_high_##suffix(d.val)); \ +} + +OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64) +#endif + +OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(int64, s64) +OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(uint64, u64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(vcvtq_f32_s32(a.val)); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + float32x2_t zero = vdup_n_f32(0.0f); + return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), vcvt_f32_f64(b.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val)))); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val)))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val))); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ return v_float64x2(vcvtq_f64_s64(a.val)); } + +#endif + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(vld1q_s8(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(vld1q_s16(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(vld1q_s16(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(vcombine_s16(vld1_s16(tab + idx[0]), vld1_s16(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(vld1q_s32(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(vcombine_s32(vld1_s32(tab + idx[0]), vld1_s32(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vld1q_s32(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(vcombine_s64(vcreate_s64(tab[idx[0]]), vcreate_s64(tab[idx[1]]))); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(vld1q_s64(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(vld1q_f32(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; + + uint64 CV_DECL_ALIGNED(32) elems[2] = + { + *(unaligned_uint64*)(tab + idx[0]), + *(unaligned_uint64*)(tab + idx[1]) + }; + return v_float32x4(vreinterpretq_f32_u64(vld1q_u64(elems))); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(vld1q_f32(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_int32x4(vld1q_s32(elems)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_uint32x4(vld1q_u32(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + tab[vgetq_lane_s32(idxvec.val, 2)], + tab[vgetq_lane_s32(idxvec.val, 3)] + }; + return v_float32x4(vld1q_f32(elems)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + /*int CV_DECL_ALIGNED(32) idx[4]; + v_store(idx, idxvec); + + float32x4_t xy02 = vcombine_f32(vld1_f32(tab + idx[0]), vld1_f32(tab + idx[2])); + float32x4_t xy13 = vcombine_f32(vld1_f32(tab + idx[1]), vld1_f32(tab + idx[3])); + + float32x4x2_t xxyy = vuzpq_f32(xy02, xy13); + x = v_float32x4(xxyy.val[0]); + y = v_float32x4(xxyy.val[1]);*/ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); + y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0705060403010200)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0705060403010200)))); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0703060205010400)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0703060205010400)))); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + return v_int16x8(vreinterpretq_s16_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100)), vtbl1_s8(vget_high_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100))))); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + int16x4x2_t res = vzip_s16(vget_low_s16(vec.val), vget_high_s16(vec.val)); + return v_int16x8(vcombine_s16(res.val[0], res.val[1])); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + int32x2x2_t res = vzip_s32(vget_low_s32(vec.val), vget_high_s32(vec.val)); + return v_int32x4(vcombine_s32(res.val[0], res.val[1])); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + return v_int8x16(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0605040201000000)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0807060504020100))), vdupq_n_s8(0), 2)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + return v_int16x8(vreinterpretq_s16_s8(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0504030201000000)), vget_high_s8(vreinterpretq_s8_s16(vec.val))), vdupq_n_s8(0), 2))); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +#if CV_SIMD128_64F +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_float64x2(vld1q_f64(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(vld1q_f64(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[vgetq_lane_s32(idxvec.val, 0)], + tab[vgetq_lane_s32(idxvec.val, 1)], + }; + return v_float64x2(vld1q_f64(elems)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} +#endif + +////// FP16 support /////// +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + float16x4_t v = + #ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro + (float16x4_t)vld1_s16((const short*)ptr); + #else + vld1_f16((const __fp16*)ptr); + #endif + return v_float32x4(vcvt_f32_f16(v)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + float16x4_t hv = vcvt_f16_f32(v.val); + + #ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro + vst1_s16((short*)ptr, (int16x4_t)hv); + #else + vst1_f16((__fp16*)ptr, hv); + #endif +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + const int N = 4; + float buf[N]; + for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + const int N = 4; + float buf[N]; + v_store(buf, v); + for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); +} +#endif + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_rvv.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_rvv.hpp new file mode 100644 index 0000000..a592976 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_rvv.hpp @@ -0,0 +1,3320 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// The original implementation has been contributed by Yin Zhang. +// Copyright (C) 2020, Institute of Software, Chinese Academy of Sciences. + +#ifndef OPENCV_HAL_INTRIN_RVV_HPP +#define OPENCV_HAL_INTRIN_RVV_HPP + +#include + +namespace cv +{ + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 + +//////////// Unsupported native intrinsics in C++ //////////// +// The following types have been defined in clang, but not in GCC yet. +#ifndef __clang__ + +struct vuint8mf2_t +{ + uchar val[8] = {0}; + vuint8mf2_t() {} + vuint8mf2_t(const uchar* ptr) + { + for (int i = 0; i < 8; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint8mf2_t +{ + schar val[8] = {0}; + vint8mf2_t() {} + vint8mf2_t(const schar* ptr) + { + for (int i = 0; i < 8; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vuint16mf2_t +{ + ushort val[4] = {0}; + vuint16mf2_t() {} + vuint16mf2_t(const ushort* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint16mf2_t +{ + short val[4] = {0}; + vint16mf2_t() {} + vint16mf2_t(const short* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vuint32mf2_t +{ + unsigned val[2] = {0}; + vuint32mf2_t() {} + vuint32mf2_t(const unsigned* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vint32mf2_t +{ + int val[2] = {0}; + vint32mf2_t() {} + vint32mf2_t(const int* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vfloat32mf2_t +{ + float val[2] = {0}; + vfloat32mf2_t() {} + vfloat32mf2_t(const float* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vuint64mf2_t +{ + uint64 val[1] = {0}; + vuint64mf2_t() {} + vuint64mf2_t(const uint64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vint64mf2_t +{ + int64 val[1] = {0}; + vint64mf2_t() {} + vint64mf2_t(const int64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vfloat64mf2_t +{ + double val[1] = {0}; + vfloat64mf2_t() {} + vfloat64mf2_t(const double* ptr) + { + val[0] = ptr[0]; + } +}; +struct vuint8mf4_t +{ + uchar val[4] = {0}; + vuint8mf4_t() {} + vuint8mf4_t(const uchar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint8mf4_t +{ + schar val[4] = {0}; + vint8mf4_t() {} + vint8mf4_t(const schar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; + +#define OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(_Tpvec, _Tp, suffix, width, n) \ +inline _Tpvec vle##width##_v_##suffix##mf2(const _Tp* ptr, size_t vl) \ +{ \ + CV_UNUSED(vl); \ + return _Tpvec(ptr); \ +} \ +inline void vse##width##_v_##suffix##mf2(_Tp* ptr, _Tpvec v, size_t vl) \ +{ \ + CV_UNUSED(vl); \ + for (int i = 0; i < n; ++i) \ + { \ + ptr[i] = v.val[i]; \ + } \ +} + +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint8mf2_t, uint8_t, u8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint8mf2_t, int8_t, i8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint16mf2_t, uint16_t, u16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint16mf2_t, int16_t, i16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint32mf2_t, uint32_t, u32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint32mf2_t, int32_t, i32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat32mf2_t, float32_t, f32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint64mf2_t, uint64_t, u64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint64mf2_t, int64_t, i64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat64mf2_t, float64_t, f64, 64, 1) + + +#define OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(_Tpwvec, _Tpvec, _wTp, wcvt, suffix, width, n) \ +inline _Tpwvec wcvt (_Tpvec v, size_t vl) \ +{ \ + _wTp tmp[n]; \ + for (int i = 0; i < n; ++i) \ + { \ + tmp[i] = (_wTp)v.val[i]; \ + } \ + return vle##width##_v_##suffix##m1(tmp, vl); \ +} + +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint16m1_t, vuint8mf2_t, ushort, vwcvtu_x_x_v_u16m1, u16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint16m1_t, vint8mf2_t, short, vwcvt_x_x_v_i16m1, i16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint32m1_t, vuint16mf2_t, unsigned, vwcvtu_x_x_v_u32m1, u32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint32m1_t, vint16mf2_t, int, vwcvt_x_x_v_i32m1, i32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint64m1_t, vuint32mf2_t, uint64, vwcvtu_x_x_v_u64m1, u64, 64, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint64m1_t, vint32mf2_t, int64, vwcvt_x_x_v_i64m1, i64, 64, 2) + +inline vuint8mf4_t vle8_v_u8mf4 (const uint8_t *base, size_t vl) +{ + CV_UNUSED(vl); + return vuint8mf4_t(base); +} +inline vint8mf4_t vle8_v_i8mf4 (const int8_t *base, size_t vl) +{ + CV_UNUSED(vl); + return vint8mf4_t(base); +} + +inline vuint16mf2_t vwcvtu_x_x_v_u16mf2 (vuint8mf4_t src, size_t vl) +{ + ushort tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (ushort)src.val[i]; + } + return vle16_v_u16mf2(tmp, vl); +} +inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src, size_t vl) +{ + short tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (short)src.val[i]; + } + return vle16_v_i16mf2(tmp, vl); +} +#endif + +//////////// Types //////////// + +#ifndef __clang__ +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) + { + vse8_v_u8m1(val, v, nlanes); + } + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint8m1_t() const + { + return vle8_v_u8m1(val, nlanes); + } + uchar get0() const + { + return val[0]; + } + + uchar val[16]; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) + { + vse8_v_i8m1(val, v, nlanes); + } + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint8m1_t() const + { + return vle8_v_i8m1(val, nlanes); + } + schar get0() const + { + return val[0]; + } + + schar val[16]; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) + { + vse16_v_u16m1(val, v, nlanes); + } + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint16m1_t() const + { + return vle16_v_u16m1(val, nlanes); + } + ushort get0() const + { + return val[0]; + } + + ushort val[8]; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) + { + vse16_v_i16m1(val, v, nlanes); + } + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint16m1_t() const + { + return vle16_v_i16m1(val, nlanes); + } + short get0() const + { + return val[0]; + } + + short val[8]; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) + { + vse32_v_u32m1(val, v, nlanes); + } + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint32m1_t() const + { + return vle32_v_u32m1(val, nlanes); + } + unsigned get0() const + { + return val[0]; + } + + unsigned val[4]; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) + { + vse32_v_i32m1(val, v, nlanes); + } + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint32m1_t() const + { + return vle32_v_i32m1(val, nlanes); + } + int get0() const + { + return val[0]; + } + int val[4]; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) + { + vse32_v_f32m1(val, v, nlanes); + } + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat32m1_t() const + { + return vle32_v_f32m1(val, nlanes); + } + float get0() const + { + return val[0]; + } + float val[4]; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) + { + vse64_v_u64m1(val, v, nlanes); + } + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint64m1_t() const + { + return vle64_v_u64m1(val, nlanes); + } + uint64 get0() const + { + return val[0]; + } + + uint64 val[2]; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) + { + vse64_v_i64m1(val, v, nlanes); + } + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint64m1_t() const + { + return vle64_v_i64m1(val, nlanes); + } + int64 get0() const + { + return val[0]; + } + + int64 val[2]; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) + { + vse64_v_f64m1(val, v, nlanes); + } + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat64m1_t() const + { + return vle64_v_f64m1(val, nlanes); + } + double get0() const + { + return val[0]; + } + + double val[2]; +}; +#endif +#else +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) + { + *pval = v; + } + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + *pval = vle8_v_u8m1(v, nlanes); + } + operator vuint8m1_t() const + { + return *pval; + } + uchar get0() const + { + return vmv_x(*pval); + } + inline v_uint8x16& operator=(const v_uint8x16& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint8x16(const v_uint8x16& vec) { + *pval = *(vec.pval); + } + uchar val[16]; + vuint8m1_t* pval = (vuint8m1_t*)val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) + { + *pval = v; + } + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + *pval = vle8_v_i8m1(v, nlanes); + } + operator vint8m1_t() const + { + return *pval; + } + schar get0() const + { + return vmv_x(*pval); + } + inline v_int8x16& operator=(const v_int8x16& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int8x16(const v_int8x16& vec) { + *pval = *(vec.pval); + } + schar val[16]; + vint8m1_t* pval = (vint8m1_t*)val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) + { + *pval = v; + } + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + *pval = vle16_v_u16m1(v, nlanes); + } + operator vuint16m1_t() const + { + return *pval; + } + ushort get0() const + { + return vmv_x(*pval); + } + + inline v_uint16x8& operator=(const v_uint16x8& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint16x8(const v_uint16x8& vec) { + *pval = *(vec.pval); + } + ushort val[8]; + vuint16m1_t* pval = (vuint16m1_t*)val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) + { + *pval = v; + } + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + *pval = vle16_v_i16m1(v, nlanes); + } + operator vint16m1_t() const + { + return *pval; + } + short get0() const + { + return vmv_x(*pval); + } + + inline v_int16x8& operator=(const v_int16x8& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int16x8(const v_int16x8& vec) { + *pval = *(vec.pval); + } + short val[8]; + vint16m1_t* pval = (vint16m1_t*)val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) + { + *pval = v; + } + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + *pval = vle32_v_u32m1(v, nlanes); + } + operator vuint32m1_t() const + { + return *pval; + } + unsigned get0() const + { + return vmv_x(*pval); + } + + inline v_uint32x4& operator=(const v_uint32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint32x4(const v_uint32x4& vec) { + *pval = *(vec.pval); + } + unsigned val[4]; + vuint32m1_t* pval = (vuint32m1_t*)val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) + { + *pval = v; + } + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + *pval = vle32_v_i32m1(v, nlanes); + } + operator vint32m1_t() const + { + return *pval; + } + int get0() const + { + return vmv_x(*pval); + } + + inline v_int32x4& operator=(const v_int32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int32x4(const v_int32x4& vec) { + *pval = *(vec.pval); + } + int val[4]; + vint32m1_t* pval = (vint32m1_t*)val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) + { + *pval = v; + } + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + *pval = vle32_v_f32m1(v, nlanes); + } + operator vfloat32m1_t() const + { + return *pval; + } + float get0() const + { + return vfmv_f(*pval); + } + inline v_float32x4& operator=(const v_float32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_float32x4(const v_float32x4& vec) { + *pval = *(vec.pval); + } + float val[4]; + vfloat32m1_t* pval = (vfloat32m1_t*)val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) + { + *pval = v; + } + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + *pval = vle64_v_u64m1(v, nlanes); + } + operator vuint64m1_t() const + { + return *pval; + } + uint64 get0() const + { + return vmv_x(*pval); + } + + inline v_uint64x2& operator=(const v_uint64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint64x2(const v_uint64x2& vec) { + *pval = *(vec.pval); + } + uint64 val[2]; + vuint64m1_t* pval = (vuint64m1_t*)val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) + { + *pval = v; + } + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + *pval = vle64_v_i64m1(v, nlanes); + } + operator vint64m1_t() const + { + return *pval; + } + int64 get0() const + { + return vmv_x(*pval); + } + + inline v_int64x2& operator=(const v_int64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int64x2(const v_int64x2& vec) { + *pval = *(vec.pval); + } + int64 val[2]; + vint64m1_t* pval = (vint64m1_t*)val; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) + { + *pval = v; + } + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + *pval = vle64_v_f64m1(v, nlanes); + } + operator vfloat64m1_t() const + { + return *pval; + } + double get0() const + { + return vfmv_f(*pval); + } + + inline v_float64x2& operator=(const v_float64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_float64x2(const v_float64x2& vec) { + *pval = *(vec.pval); + } + double val[2]; + vfloat64m1_t* pval = (vfloat64m1_t*)val; +}; +#endif // CV_SIMD128_64F +#endif // __clang__ + +//////////// Initial //////////// + +#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, suffix1, suffix2, vl) \ +inline v_##_Tpvec v_setzero_##suffix1() \ +{ \ + return v_##_Tpvec(vmv_v_x_##suffix2##m1(0, vl)); \ +} \ +inline v_##_Tpvec v_setall_##suffix1(_Tp v) \ +{ \ + return v_##_Tpvec(vmv_v_x_##suffix2##m1(v, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8x16, uchar, u8, u8, 16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8x16, schar, s8, i8, 16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16x8, ushort, u16, u16, 8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16x8, short, s16, i16, 8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32x4, unsigned, u32, u32, 4) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32x4, int, s32, i32, 4) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64x2, uint64, u64, u64, 2) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64x2, int64, s64, i64, 2) + +#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, suffix, vl) \ +inline v_##_Tpv v_setzero_##suffix() \ +{ \ + return v_##_Tpv(vfmv_v_f_##suffix##m1(0, vl)); \ +} \ +inline v_##_Tpv v_setall_##suffix(_Tp v) \ +{ \ + return v_##_Tpv(vfmv_v_f_##suffix##m1(v, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_FP(float32x4, float, f32, 4) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INIT_FP(float64x2, double, f64, 2) +#endif + +//////////// Reinterpret //////////// + +#define OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(_Tpvec, suffix) \ +inline v_##_Tpvec v_reinterpret_as_##suffix(const v_##_Tpvec& v) { return v; } + +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint8x16, u8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int8x16, s8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint16x8, u16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int16x8, s16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint32x4, u32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int32x4, s32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float32x4, f32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint64x2, u64) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int64x2, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(_Tpvec1, _Tpvec2, suffix1, suffix2, nsuffix1, nsuffix2) \ +inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \ +{ \ + return v_##_Tpvec1(vreinterpret_v_##nsuffix2##m1_##nsuffix1##m1(v));\ +} \ +inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \ +{ \ + return v_##_Tpvec2(vreinterpret_v_##nsuffix1##m1_##nsuffix2##m1(v));\ +} + +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8x16, int8x16, u8, s8, u8, i8) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16x8, int16x8, u16, s16, u16, i16) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32x4, int32x4, u32, s32, u32, i32) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32x4, float32x4, u32, f32, u32, f32) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32x4, float32x4, s32, f32, i32, f32) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint64x2, int64x2, u64, s64, u64, i64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint64x2, float64x2, u64, f64, u64, f64) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int64x2, float64x2, s64, f64, i64, f64) +#endif +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8x16, uint16x8, u8, u16, u8, u16) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8x16, uint32x4, u8, u32, u8, u32) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8x16, uint64x2, u8, u64, u8, u64) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16x8, uint32x4, u16, u32, u16, u32) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16x8, uint64x2, u16, u64, u16, u64) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32x4, uint64x2, u32, u64, u32, u64) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8x16, int16x8, s8, s16, i8, i16) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8x16, int32x4, s8, s32, i8, i32) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8x16, int64x2, s8, s64, i8, i64) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16x8, int32x4, s16, s32, i16, i32) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16x8, int64x2, s16, s64, i16, i64) +OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32x4, int64x2, s32, s64, i32, i64) + + +#define OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(_Tpvec1, _Tpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \ +inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \ +{ \ + return v_##_Tpvec1(vreinterpret_v_##nsuffix1##width2##m1_##nsuffix1##width1##m1(vreinterpret_v_##nsuffix2##width2##m1_##nsuffix1##width2##m1(v)));\ +} \ +inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \ +{ \ + return v_##_Tpvec2(vreinterpret_v_##nsuffix1##width2##m1_##nsuffix2##width2##m1(vreinterpret_v_##nsuffix1##width1##m1_##nsuffix1##width2##m1(v)));\ +} + +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8x16, int16x8, u8, s16, u, i, 8, 16) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8x16, int32x4, u8, s32, u, i, 8, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8x16, int64x2, u8, s64, u, i, 8, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16x8, int8x16, u16, s8, u, i, 16, 8) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16x8, int32x4, u16, s32, u, i, 16, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16x8, int64x2, u16, s64, u, i, 16, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32x4, int8x16, u32, s8, u, i, 32, 8) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32x4, int16x8, u32, s16, u, i, 32, 16) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32x4, int64x2, u32, s64, u, i, 32, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64x2, int8x16, u64, s8, u, i, 64, 8) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64x2, int16x8, u64, s16, u, i, 64, 16) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64x2, int32x4, u64, s32, u, i, 64, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8x16, float32x4, u8, f32, u, f, 8, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16x8, float32x4, u16, f32, u, f, 16, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64x2, float32x4, u64, f32, u, f, 64, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8x16, float32x4, s8, f32, i, f, 8, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16x8, float32x4, s16, f32, i, f, 16, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int64x2, float32x4, s64, f32, i, f, 64, 32) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8x16, float64x2, u8, f64, u, f, 8, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16x8, float64x2, u16, f64, u, f, 16, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32x4, float64x2, u32, f64, u, f, 32, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8x16, float64x2, s8, f64, i, f, 8, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16x8, float64x2, s16, f64, i, f, 16, 64) +OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int32x4, float64x2, s32, f64, i, f, 32, 64) + +// Three times reinterpret +inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& v) \ +{ \ + return v_float32x4(vreinterpret_v_u32m1_f32m1(vreinterpret_v_u64m1_u32m1(vreinterpret_v_f64m1_u64m1(v))));\ +} \ +inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& v) \ +{ \ + return v_float64x2(vreinterpret_v_u64m1_f64m1(vreinterpret_v_u32m1_u64m1(vreinterpret_v_f32m1_u32m1(v))));\ +} + +////////////// Extract ////////////// + +#define OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(_Tpvec, _Tp, suffix, vmv, vl) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, s, vl), b, _Tpvec::nlanes - s, vl)); \ +} \ +template inline _Tp v_extract_n(_Tpvec v) \ +{ \ + return _Tp(vmv(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), v, i, vl))); \ +} + + +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint8x16, uchar, u8, vmv_x_s_u8m1_u8, 16) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int8x16, schar, i8, vmv_x_s_i8m1_i8, 16) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint16x8, ushort, u16, vmv_x_s_u16m1_u16, 8) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int16x8, short, i16, vmv_x_s_i16m1_i16, 8) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint32x4, uint, u32, vmv_x_s_u32m1_u32, 4) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int32x4, int, i32, vmv_x_s_i32m1_i32, 4) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint64x2, uint64, u64, vmv_x_s_u64m1_u64, 2) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int64x2, int64, i64, vmv_x_s_i64m1_i64, 2) + +#define OPENCV_HAL_IMPL_RVV_EXTRACT_FP(_Tpvec, _Tp, suffix, vmv, vl) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, s, vl), b, _Tpvec::nlanes - s, vl)); \ +} \ +template inline _Tp v_extract_n(_Tpvec v) \ +{ \ + return _Tp(vmv(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), v, i, vl))); \ +} + +OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float32x4, float, f32, vfmv_f_s_f32m1_f32, 4) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float64x2, double, f64, vfmv_f_s_f64m1_f64, 2) +#endif + +////////////// Load/Store ////////////// + +#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, vl, width, suffix, vmv) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ \ + return _Tpvec(vle##width##_v_##suffix##m1(ptr, vl)); \ +} \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ \ + return _Tpvec(vle##width##_v_##suffix##m1(ptr, vl)); \ +} \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ + _Tpvec res = _Tpvec(vle##width##_v_##suffix##m1(ptr, hvl)); \ + return res; \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ \ + vse##width##_v_##suffix##m1(ptr, a, vl); \ +} \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ \ + vse##width##_v_##suffix##m1(ptr, a, vl); \ +} \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ \ + vse##width##_v_##suffix##m1(ptr, a, vl); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ \ + vse##width##_v_##suffix##m1(ptr, a, vl); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + vse##width##_v_##suffix##m1(ptr, a, hvl); \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + vse##width##_v_##suffix##m1(ptr, vslidedown_vx_##suffix##m1(vmv(0, vl), a, hvl, vl), hvl); \ +} + +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8x16, vuint8m1_t, uchar, 8, 16, 8, u8, vmv_v_x_u8m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8x16, vint8m1_t, schar, 8, 16, 8, i8, vmv_v_x_i8m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16x8, vuint16m1_t, ushort, 4, 8, 16, u16, vmv_v_x_u16m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16x8, vint16m1_t, short, 4, 8, 16, i16, vmv_v_x_i16m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32x4, vuint32m1_t, unsigned, 2, 4, 32, u32, vmv_v_x_u32m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32x4, vint32m1_t, int, 2, 4, 32, i32, vmv_v_x_i32m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64x2, vuint64m1_t, uint64, 1, 2, 64, u64, vmv_v_x_u64m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64x2, vint64m1_t, int64, 1, 2, 64, i64, vmv_v_x_i64m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32x4, vfloat32m1_t, float, 2, 4, 32, f32, vfmv_v_f_f32m1) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64x2, vfloat64m1_t, double, 1, 2, 64, f64, vfmv_v_f_f64m1) +#endif + +inline v_int8x16 v_load_halves(const schar* ptr0, const schar* ptr1) +{ + schar elems[16] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr0[4], ptr0[5], ptr0[6], ptr0[7], + ptr1[0], ptr1[1], ptr1[2], ptr1[3], ptr1[4], ptr1[5], ptr1[6], ptr1[7] + }; + return v_int8x16(vle8_v_i8m1(elems, 16)); +} +inline v_uint8x16 v_load_halves(const uchar* ptr0, const uchar* ptr1) { return v_reinterpret_as_u8(v_load_halves((schar*)ptr0, (schar*)ptr1)); } + +inline v_int16x8 v_load_halves(const short* ptr0, const short* ptr1) +{ + short elems[8] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr1[0], ptr1[1], ptr1[2], ptr1[3] + }; + return v_int16x8(vle16_v_i16m1(elems, 8)); +} +inline v_uint16x8 v_load_halves(const ushort* ptr0, const ushort* ptr1) { return v_reinterpret_as_u16(v_load_halves((short*)ptr0, (short*)ptr1)); } + +inline v_int32x4 v_load_halves(const int* ptr0, const int* ptr1) +{ + int elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + return v_int32x4(vle32_v_i32m1(elems, 4)); +} +inline v_float32x4 v_load_halves(const float* ptr0, const float* ptr1) +{ + float elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + return v_float32x4(vle32_v_f32m1(elems, 4)); +} +inline v_uint32x4 v_load_halves(const unsigned* ptr0, const unsigned* ptr1) { return v_reinterpret_as_u32(v_load_halves((int*)ptr0, (int*)ptr1)); } + +inline v_int64x2 v_load_halves(const int64* ptr0, const int64* ptr1) +{ + int64 elems[2] = + { + ptr0[0], ptr1[0] + }; + return v_int64x2(vle64_v_i64m1(elems, 2)); +} +inline v_uint64x2 v_load_halves(const uint64* ptr0, const uint64* ptr1) { return v_reinterpret_as_u64(v_load_halves((int64*)ptr0, (int64*)ptr1)); } + +#if CV_SIMD128_64F +inline v_float64x2 v_load_halves(const double* ptr0, const double* ptr1) +{ + double elems[2] = + { + ptr0[0], ptr1[0] + }; + return v_float64x2(vle64_v_f64m1(elems, 2)); +} +#endif + + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(vle8_v_i8m1(elems, 16)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(vle8_v_i8m1(elems, 16)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(vle8_v_i8m1(elems, 16)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(vle16_v_i16m1(elems, 8)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(vle16_v_i16m1(elems, 8)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + short elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3] + }; + return v_int16x8(vle16_v_i16m1(elems, 8)); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(vle32_v_i32m1(elems, 4)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + int elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + return v_int32x4(vle32_v_i32m1(elems, 4)); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vle32_v_i32m1(tab + idx[0], 4)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + int64_t elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_int64x2(vle64_v_i64m1(elems, 2)); +} +inline v_int64x2 v_lut_pairs(const int64* tab, const int* idx) +{ + return v_int64x2(vle64_v_i64m1(tab + idx[0], 2)); +} +inline v_uint64x2 v_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(vle32_v_f32m1(elems, 4)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + float elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + return v_float32x4(vle32_v_f32m1(elems, 4)); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(vle32_v_f32m1(tab + idx[0], 4)); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + return v_int32x4(vle32_v_i32m1(elems, 4)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + return v_uint32x4(vle32_v_u32m1(elems, 4)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + return v_float32x4(vle32_v_f32m1(elems, 4)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); + y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + return v_float64x2(vle64_v_f64m1(elems, 2)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(vle64_v_f64m1(tab + idx[0], 2)); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + double elems[2] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)] + }; + return v_float64x2(vle64_v_f64m1(elems, 2)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int idx[4] = {0}; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} +#endif + +////////////// Pack boolean //////////////////// + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + ushort ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 8, b); + return v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr, 16), 0, 16)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + unsigned ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 4, b); + v_store(ptr + 8, c); + v_store(ptr + 12, d); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vle32_v_u32m4(ptr, 16), 0, 16), 0, 16)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + uint64 ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 2, b); + v_store(ptr + 4, c); + v_store(ptr + 6, d); + v_store(ptr + 8, e); + v_store(ptr + 10, f); + v_store(ptr + 12, g); + v_store(ptr + 14, h); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vnsrl_wx_u32m4(vle64_v_u64m8(ptr, 16), 0, 16), 0, 16), 0, 16)); +} + +////////////// Arithmetics ////////////// +#define OPENCV_HAL_IMPL_RVV_BIN_OP(bin_op, _Tpvec, intrin, vl) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a, b, vl)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a = _Tpvec(intrin(a, b, vl)); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint8x16, vsaddu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint8x16, vssubu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint8x16, vdivu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int8x16, vsadd_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int8x16, vssub_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int8x16, vdiv_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint16x8, vsaddu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint16x8, vssubu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint16x8, vdivu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int16x8, vsadd_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int16x8, vssub_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int16x8, vdiv_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint32x4, vadd_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint32x4, vsub_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint32x4, vmul_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint32x4, vdivu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int32x4, vadd_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int32x4, vsub_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int32x4, vmul_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int32x4, vdiv_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float32x4, vfadd_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float32x4, vfsub_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float32x4, vfmul_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float32x4, vfdiv_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint64x2, vadd_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint64x2, vsub_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint64x2, vmul_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint64x2, vdivu_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int64x2, vadd_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int64x2, vsub_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int64x2, vmul_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int64x2, vdiv_vv_i64m1, 2) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float64x2, vfadd_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float64x2, vfsub_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float64x2, vfmul_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float64x2, vfdiv_vv_f64m1, 2) +#endif + + +////////////// Bitwise logic ////////////// + +#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(&, _Tpvec, vand_vv_##suffix##m1, vl) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(|, _Tpvec, vor_vv_##suffix##m1, vl) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(^, _Tpvec, vxor_vv_##suffix##m1, vl) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + return _Tpvec(vnot_v_##suffix##m1(a, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8x16, u8, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8x16, i8, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16x8, u16, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16x8, i16, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32x4, u32, 4) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32x4, i32, 4) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64x2, u64, 2) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64x2, i64, 2) + +#define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + return v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b), 4))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + a = v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b), 4))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(&, vand_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(|, vor_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(^, vxor_vv_i32m1) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + return v_float32x4(vreinterpret_v_i32m1_f32m1(vnot_v_i32m1(vreinterpret_v_f32m1_i32m1(a), 4))); +} + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b), 2))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a = v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b), 2))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(&, vand_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(|, vor_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(^, vxor_vv_i64m1) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2(vreinterpret_v_i64m1_f64m1(vnot_v_i64m1(vreinterpret_v_f64m1_i64m1(a), 2))); +} +#endif + +////////////// Bitwise shifts ////////////// + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, suffix, vl) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} + +#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, suffix, vl) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n), vl)); \ +} + +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint8x16, u8, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16x8, u16, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32x4, u32, 4) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64x2, u64, 2) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int8x16, i8, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16x8, i16, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32x4, i32, 4) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64x2, i64, 2) + + +////////////// Comparison ////////////// + +#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, vl) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + uint64_t ones = -1; \ + return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b, vl), vmv_v_x_##suffix##m1(0, vl), ones, vl)); \ +} + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, vl) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + union { uint64 u; double d; } ones; ones.u = -1; \ + return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b, vl), vfmv_v_f_##suffix##m1(0, vl), ones.d, vl)); \ +} + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmsltu_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgtu_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsleu_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsgeu_vv_##suffix##m1_b##width, suffix, vl) + +#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix, width, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmslt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsle_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsge_vv_##suffix##m1_b##width, suffix, vl) + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix, width, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ==, vmfeq_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, !=, vmfne_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <, vmflt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >, vmfgt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <=, vmfle_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >=, vmfge_vv_##suffix##m1_b##width, suffix, vl) + + +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8x16, u8, 8, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16x8, u16, 16, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32x4, u32, 32, 4) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64x2, u64, 64, 2) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8x16, i8, 8, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16x8, i16, 16, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32x4, i32, 32, 4) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64x2, i64, 64, 2) +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32x4, f32, 32, 4) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64x2, f64, 64, 2) +#endif + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return a == a; } + +#if CV_SIMD128_64F +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return a == a; } +#endif + +////////////// Min/Max ////////////// + +#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, vl) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a, b, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_min, vminu_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_max, vmaxu_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_min, vmin_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_max, vmax_vv_i64m1, 2) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 2) +#endif + +////////////// Arithmetics wrap ////////////// + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 8) + +////////////// Reduce ////////////// + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, vl, red) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + _nwTpvec zero = vmv_v_x_##wsuffix##m1(0, vl); \ + _nwTpvec res = vmv_v_x_##wsuffix##m1(0, vl); \ + res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero, vl); \ + return (scalartype)(_wTpvec(res).get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint8x16, v_uint16x8, vuint16m1_t, unsigned, u8, u16, 16, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8x16, v_int16x8, vint16m1_t, int, i8, i16, 16, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16x8, v_uint32x4, vuint32m1_t, unsigned, u16, u32, 8, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16x8, v_int32x4, vint32m1_t, int, i16, i32, 8, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32x4, v_uint64x2, vuint64m1_t, unsigned, u32, u64, 4, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32x4, v_int64x2, vint64m1_t, int, i32, i64, 4, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64x2, v_uint64x2, vuint64m1_t, uint64, u64, u64, 2, redsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64x2, v_int64x2, vint64m1_t, int64, i64, i64, 2, redsum) + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, vl, red) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + _nwTpvec zero = vfmv_v_f_##wsuffix##m1(0, vl); \ + _nwTpvec res = vfmv_v_f_##wsuffix##m1(0, vl); \ + res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero, vl); \ + return (scalartype)(_wTpvec(res).get0()); \ +} + +// vfredsum for float has renamed to fredosum, also updated in GNU. +OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float32x4, v_float32x4, vfloat32m1_t, float, f32, f32, 4, fredosum) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float64x2, v_float64x2, vfloat64m1_t, double, f64, f64, 2, fredosum) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, vl, red) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + _Tpvec res = _Tpvec(v##red##_vs_##suffix##m1_##suffix##m1(a, a, a, vl)); \ + return scalartype(res.get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, min, uchar, u8, 16, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, min, schar, i8, 16, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, min, ushort, u16, 8, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, min, short, i16, 8, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, min, unsigned, u32, 4, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, min, int, i32, 4, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, min, float, f32, 4, fredmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, max, uchar, u8, 16, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, max, schar, i8, 16, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, max, ushort, u16, 8, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, max, short, i16, 8, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, max, unsigned, u32, 4, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, max, int, i32, 4, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, max, float, f32, 4, fredmax) + + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + float elems[4] = + { + v_reduce_sum(a), + v_reduce_sum(b), + v_reduce_sum(c), + v_reduce_sum(d) + }; + return v_float32x4(vle32_v_f32m1(elems, 4)); +} + +////////////// Square-Root ////////////// + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + return v_float32x4(vfsqrt_v_f32m1(x, 4)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + v_float32x4 one = v_setall_f32(1.0f); + return one / v_sqrt(x); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + return v_float64x2(vfsqrt_v_f64m1(x, 4)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + v_float64x2 one = v_setall_f64(1.0f); + return one / v_sqrt(x); +} +#endif + +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a, 4), b, b, 4)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a, 4), b, b, 4)); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a, 2), b, b, 2)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a, 2), b, b, 2)); +} +#endif + +////////////// Multiply-Add ////////////// + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_float32x4(vfmacc_vv_f32m1(c, a, b, 4)); +} +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(vmacc_vv_i32m1(c, a, b, 4)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(vfmacc_vv_f64m1(c, a, b, 2)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} +#endif + +////////////// Check all/any ////////////// + +// use overloaded vcpop in clang, no casting like (vuint64m1_t) is needed. +#ifndef __clang__ +#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \ +inline bool v_check_all(const _Tpvec& a) \ +{ \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl)); \ + return (v.val[0] | v.val[1]) == 0; \ +} \ +inline bool v_check_any(const _Tpvec& a) \ +{ \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift, vl)); \ + return (v.val[0] | v.val[1]) != 0; \ +} + +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2) + + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } + +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } + +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#endif +#else +#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, vl) \ +inline bool v_check_all(const _Tpvec& a) \ +{ \ + return vcpop(vmslt(a, 0, vl), vl) == vl; \ +} \ +inline bool v_check_any(const _Tpvec& a) \ +{ \ + return vcpop(vmslt(a, 0, vl), vl) != 0; \ +} + +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int8x16, 16) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int16x8, 8) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int32x4, 4) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int64x2, 2) + + +inline bool v_check_all(const v_uint8x16& a) +{ return v_check_all(v_reinterpret_as_s8(a)); } +inline bool v_check_any(const v_uint8x16& a) +{ return v_check_any(v_reinterpret_as_s8(a)); } + +inline bool v_check_all(const v_uint16x8& a) +{ return v_check_all(v_reinterpret_as_s16(a)); } +inline bool v_check_any(const v_uint16x8& a) +{ return v_check_any(v_reinterpret_as_s16(a)); } + +inline bool v_check_all(const v_uint32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_uint32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } + +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } + +inline bool v_check_all(const v_uint64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_uint64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } + +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } +#endif +#endif +////////////// abs ////////////// + +#define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \ +inline _Tpvec v_##abs(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_max(a, b) - v_min(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint8x16, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint16x8, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint32x4, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float32x4, absdiff) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64x2, absdiff) +#endif +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8x16, absdiffs) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs) + +// use reinterpret instead of c-style casting. +#ifndef __clang__ +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, vl) \ +inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b), vl), 0, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 4) +#else +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \ +inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _rTpvec(rshr(vreinterpret_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 32, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 64, 4) +#endif +#define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \ +inline _Tprvec v_abs(const _Tpvec& a) \ +{ \ + return v_absdiff(a, v_setzero_##suffix()); \ +} + +OPENCV_HAL_IMPL_RVV_ABS(v_uint8x16, v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_ABS(v_uint16x8, v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_ABS(v_uint32x4, v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_ABS(v_float32x4, v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABS(v_float64x2, v_float64x2, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \ +inline scalartype v_reduce_sad(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_reduce_sum(v_absdiff(a, b)); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32x4, float) + +////////////// Select ////////////// + +#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, merge, ne, vl) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(merge(ne(mask, 0, vl), b, a, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_SELECT(v_uint8x16, vmerge_vvm_u8m1, vmsne_vx_u8m1_b8, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_int8x16, vmerge_vvm_i8m1, vmsne_vx_i8m1_b8, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint16x8, vmerge_vvm_u16m1, vmsne_vx_u16m1_b16, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_int16x8, vmerge_vvm_i16m1, vmsne_vx_i16m1_b16, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint32x4, vmerge_vvm_u32m1, vmsne_vx_u32m1_b32, 4) +OPENCV_HAL_IMPL_RVV_SELECT(v_int32x4, vmerge_vvm_i32m1, vmsne_vx_i32m1_b32, 4) +OPENCV_HAL_IMPL_RVV_SELECT(v_float32x4, vmerge_vvm_f32m1, vmfne_vf_f32m1_b32, 4) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELECT(v_float64x2, vmerge_vvm_f64m1, vmfne_vf_f64m1_b64, 2) +#endif + +////////////// Rotate shift ////////////// + +#define OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(_Tpvec, suffix, vl) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + return _Tpvec(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, n, vl)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, n, vl)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, n, vl), b, _Tpvec::nlanes - n, vl)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), b, _Tpvec::nlanes - n, vl), a, n, vl)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint8x16, u8, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int8x16, i8, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint16x8, u16, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int16x8, i16, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint32x4, u32, 4) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int32x4, i32, 4) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint64x2, u64, 2) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int64x2, i64, 2) + +#define OPENCV_HAL_IMPL_RVV_ROTATE_FP(_Tpvec, suffix, vl) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + return _Tpvec(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, n, vl)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, n, vl)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, n, vl), b, _Tpvec::nlanes - n, vl)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), b, _Tpvec::nlanes - n, vl), a, n, vl)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + +OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float32x4, f32, 4) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float64x2, f64, 2) +#endif + +////////////// Convert to float ////////////// + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(vfcvt_f_x_v_f32m1(a, 4)); +} + +#if CV_SIMD128_64F +#ifndef __clang__ +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp, 4)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp, 4)); +} +#else +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a); + return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(dst, 1, b), 4)); +} +#endif + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + double ptr[4] = {0}; + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a, 4), 4); + double elems[2] = + { + ptr[0], ptr[1] + }; + return v_float64x2(vle64_v_f64m1(elems, 2)); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + double ptr[4] = {0}; + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a, 4), 4); + double elems[2] = + { + ptr[2], ptr[3] + }; + return v_float64x2(vle64_v_f64m1(elems, 2)); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + double ptr[4] = {0}; + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a, 4), 4); + double elems[2] = + { + ptr[0], ptr[1] + }; + return v_float64x2(vle64_v_f64m1(elems, 2)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + double ptr[4] = {0}; + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a, 4), 4); + double elems[2] = + { + ptr[2], ptr[3] + }; + return v_float64x2(vle64_v_f64m1(elems, 2)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + return v_float64x2(vfcvt_f_x_v_f64m1(a, 2)); +} +#endif + +////////////// Broadcast ////////////// + +#define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) \ +{ \ + return v_setall_##suffix(v_extract_n(v)); \ +} + +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint8x16, u8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint16x8, u16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint32x4, u32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint64x2, u64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int64x2, s64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float64x2, f64) +#endif + +////////////// Transpose4x4 ////////////// + +#define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, _Tp, suffix) \ +inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ + const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ + v_##_Tpvec& b0, v_##_Tpvec& b1, \ + v_##_Tpvec& b2, v_##_Tpvec& b3) \ +{ \ + _Tp elems0[4] = \ + { \ + v_extract_n<0>(a0), \ + v_extract_n<0>(a1), \ + v_extract_n<0>(a2), \ + v_extract_n<0>(a3) \ + }; \ + b0 = v_load(elems0); \ + _Tp elems1[4] = \ + { \ + v_extract_n<1>(a0), \ + v_extract_n<1>(a1), \ + v_extract_n<1>(a2), \ + v_extract_n<1>(a3) \ + }; \ + b1 = v_load(elems1); \ + _Tp elems2[4] = \ + { \ + v_extract_n<2>(a0), \ + v_extract_n<2>(a1), \ + v_extract_n<2>(a2), \ + v_extract_n<2>(a3) \ + }; \ + b2 = v_load(elems2); \ + _Tp elems3[4] = \ + { \ + v_extract_n<3>(a0), \ + v_extract_n<3>(a1), \ + v_extract_n<3>(a2), \ + v_extract_n<3>(a3) \ + }; \ + b3 = v_load(elems3); \ +} + +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(float32x4, float, f32) + +////////////// Reverse ////////////// + +#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_reverse(const _Tpvec& a) \ +{ \ + _Tp ptr[_Tpvec::nlanes] = {0}; \ + _Tp ptra[_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + { \ + ptr[i] = ptra[_Tpvec::nlanes-i-1]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int8x16, schar, i8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int16x8, short, i16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_float32x4, float, f32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int64x2, int64, i64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REVERSE(v_float64x2, double, f64) +#endif + +//////////// Value reordering //////////// + +#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt, vl) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + _Tp lptr[_Tpvec::nlanes/2] = {0}; \ + _Tp hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + v_store_high(hptr, a); \ + b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr, vl), vl)); \ + b1 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr, vl), vl)); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _Tp lptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr, vl), vl)); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _Tp hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_high(hptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr, vl), vl)); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(ptr, vl), vl)); \ +} + +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint16x8, uchar, v_uint8x16, 8, u8, vwcvtu_x_x_v_u16m1, 8) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int16x8, schar, v_int8x16, 8, i8, vwcvt_x_x_v_i16m1, 8) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint32x4, ushort, v_uint16x8, 16, u16, vwcvtu_x_x_v_u32m1, 4) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int32x4, short, v_int16x8, 16, i16, vwcvt_x_x_v_i32m1, 4) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint64x2, uint, v_uint32x4, 32, u32, vwcvtu_x_x_v_u64m1, 2) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int64x2, int, v_int32x4, 32, i32, vwcvt_x_x_v_i64m1, 2) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + return v_uint32x4(vwcvtu_x_x_v_u32m1(vwcvtu_x_x_v_u16mf2(vle8_v_u8mf4(ptr, 4), 4), 4)); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + return v_int32x4(vwcvt_x_x_v_i32m1(vwcvt_x_x_v_i16mf2(vle8_v_i8mf4(ptr, 4), 4), 4)); +} + + +#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, rshr, shr, hvl, vl) \ +inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + return _Tpvec(shr(vle##width##_v_##suffix##m2(arr, vl), 0, vl)); \ +} \ +inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ + vse##hwidth##_v_##hsuffix##m1(ptr, shr(vle##width##_v_##suffix##m2(arr, vl), 0, vl), hvl); \ +} \ +template inline \ +_Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl)); \ +} \ +template inline \ +void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ + vse##hwidth##_v_##hsuffix##m1(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl)), hvl); \ +} + +OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 8, 16, u8, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 8, 16, i8, i16, vnclip_wx_i8m1, vnclip_wx_i8m1, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 16, 32, u16, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1, 4, 8) +OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 16, 32, i16, i32, vnclip_wx_i16m1, vnclip_wx_i16m1, 4, 8) +OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 32, 64, u32, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1, 2, 4) +OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 32, 64, i32, i64, vnclip_wx_i32m1, vnsra_wx_i32m1, 2, 4) + + +#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, rshr, cast, hvl, vl) \ +inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), 0, vl)); \ +} \ +inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ + vse##hwidth##_v_##hsuffix##m1(ptr, rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), 0, vl), hvl); \ +} \ +template inline \ +_Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), n, vl)); \ +} \ +template inline \ +void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), n, vl))); \ +} + +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 8, 16, u8, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 16, 32, u16, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2, 4, 8) + + +#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + _Tp ptra0[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra1[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb0[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb1[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra0, a0); \ + v_store(ptra1, a1); \ + int i; \ + for( i = 0; i < v_##_Tpvec::nlanes/2; i++ ) \ + { \ + ptrb0[i*2] = ptra0[i]; \ + ptrb0[i*2+1] = ptra1[i]; \ + } \ + for( ; i < v_##_Tpvec::nlanes; i++ ) \ + { \ + ptrb1[i*2-v_##_Tpvec::nlanes] = ptra0[i]; \ + ptrb1[i*2-v_##_Tpvec::nlanes+1] = ptra1[i]; \ + } \ + b0 = v_load(ptrb0); \ + b1 = v_load(ptrb1); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_low(ptra, a); \ + v_store_low(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_high(ptra, a); \ + v_store_high(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c = v_combine_low(a, b); \ + d = v_combine_high(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_UNPACKS(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_UNPACKS(int8x16, schar, i8) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_UNPACKS(int16x8, short, i16) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_UNPACKS(int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_UNPACKS(float32x4, float, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2, double, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + int i, i2; \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptra[i] = ptr[i2]; \ + ptrb[i] = ptr[i2+1]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ + int i, i3; \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptra[i] = ptr[i3]; \ + ptrb[i] = ptr[i3+1]; \ + ptrc[i] = ptr[i3+2]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrd[v_##_Tpvec::nlanes] = {0}; \ + int i, i4; \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptra[i] = ptr[i4]; \ + ptrb[i] = ptr[i4+1]; \ + ptrc[i] = ptr[i4+2]; \ + ptrd[i] = ptr[i4+3]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ + d = v_load(ptrd); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i2; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptr[i2] = ptra[i]; \ + ptr[i2+1] = ptrb[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i3; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptr[i3] = ptra[i]; \ + ptr[i3+1] = ptrb[i]; \ + ptr[i3+2] = ptrc[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + int i, i4; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrd[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + v_store(ptrd, d); \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptr[i4] = ptra[i]; \ + ptr[i4+1] = ptrb[i]; \ + ptr[i4+2] = ptrc[i]; \ + ptr[i4+3] = ptrd[i]; \ + } \ +} \ +inline v_##_Tpvec v_interleave_pairs(const v_##_Tpvec& vec) \ +{ \ + _Tp ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/4; i++) \ + { \ + ptr[4*i ] = ptrvec[4*i ]; \ + ptr[4*i+1] = ptrvec[4*i+2]; \ + ptr[4*i+2] = ptrvec[4*i+1]; \ + ptr[4*i+3] = ptrvec[4*i+3]; \ + } \ + return v_load(ptr); \ +} \ +inline v_##_Tpvec v_interleave_quads(const v_##_Tpvec& vec) \ +{ \ + _Tp ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/8; i++) \ + { \ + ptr[8*i ] = ptrvec[4*i ]; \ + ptr[8*i+1] = ptrvec[4*i+4]; \ + ptr[8*i+2] = ptrvec[4*i+1]; \ + ptr[8*i+3] = ptrvec[4*i+5]; \ + ptr[8*i+4] = ptrvec[4*i+2]; \ + ptr[8*i+5] = ptrvec[4*i+6]; \ + ptr[8*i+6] = ptrvec[4*i+3]; \ + ptr[8*i+7] = ptrvec[4*i+7]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8x16, uchar) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8x16, schar) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16x8, ushort) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16x8, short) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32x4, int) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32x4, float) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64x2, uint64) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64x2, int64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64x2, double) +#endif + +//////////// PopCount //////////// + +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +#define OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(_rTpvec, _Tpvec, _rTp, _Tp, suffix) \ +inline _rTpvec v_popcount(const _Tpvec& a) \ +{ \ + uchar ptra[16] = {0}; \ + v_store(ptra, v_reinterpret_as_u8(a)); \ + _rTp ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_setzero_##suffix()); \ + for (int i = 0; i < _Tpvec::nlanes*(int)sizeof(_Tp); i++) \ + ptr[i/sizeof(_Tp)] += popCountTable[ptra[i]]; \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_uint8x16, uchar, uchar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_int8x16, uchar, schar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_uint16x8, ushort, ushort, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_int16x8, ushort, short, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_uint32x4, unsigned, unsigned, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_int32x4, unsigned, int, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_uint64x2, uint64, uint64, u64) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64) + +//////////// SignMask //////////// + +#ifndef __clang__ +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, vl, shift) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + int mask = 0; \ + _Tpvec tmp = _Tpvec(vsrl_vx_##suffix##m1(a, shift, vl)); \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + mask |= (int)(tmp.val[i]) << i; \ + return mask; \ +} + +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint8x16, uchar, u8, 16, 7) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint16x8, ushort, u16, 8, 15) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint32x4, unsigned, u32, 4, 31) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint64x2, uint64, u64, 2, 63) + +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#endif + +#else +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, width, vl) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + uint8_t ans[16] = {0};\ + vsm(ans, vmslt(a, 0, vl), vl);\ + return reinterpret_cast(ans)[0];\ +} + +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int8x16, 8, 16) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int16x8, 16, 8) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int32x4, 32, 4) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int64x2, 64, 2) + +inline int v_signmask(const v_uint8x16& a) +{ return v_signmask(v_reinterpret_as_s8(a)); } +inline int v_signmask(const v_uint16x8& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } +inline int v_signmask(const v_uint32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_uint64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +#endif + +#endif + +//////////// Scan forward //////////// + +#define OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(_Tpvec, _Tp, suffix) \ +inline int v_scan_forward(const _Tpvec& a) \ +{ \ + _Tp ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_reinterpret_as_##suffix(a)); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + if(int(ptr[i]) < 0) \ + return i; \ + return 0; \ +} + +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float32x4, float, f32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int64x2, int64, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64) +#endif + +//////////// Pack triplets //////////// + +// use reinterpret instead of c-style casting. +#ifndef __clang__ +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + uint64 ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)vint8m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) +{ + return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); +} + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + uint64 ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vint16m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) +{ + return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); +} + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +#else + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + uint64 ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + return v_int8x16(vreinterpret_i8m1(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16))); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) +{ + return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); +} + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + uint64 ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + return v_int16x8(v_reinterpret_as_s16(v_uint8x16(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16)))); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) +{ + return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); +} + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +#endif + +////// FP16 support /////// + +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr, 4), 4)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v, 4), 4); +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + const int N = 4; + float buf[N]; + for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i]; + return v_load(buf); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + const int N = 4; + float buf[N]; + v_store(buf, v); + for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); +} +#endif + +////////////// Rounding ////////////// + +inline v_int32x4 v_round(const v_float32x4& a) +{ + return v_int32x4(vfcvt_x_f_v_i32m1(a, 4)); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a - ZP5; + return v_int32x4(vfcvt_x_f_v_i32m1(t, 4)); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a + ZP5; + return v_int32x4(vfcvt_x_f_v_i32m1(t, 4)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a, 4)); +} +#if CV_SIMD128_64F +#ifndef __clang__ +inline v_int32x4 v_round(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + double arr[4] = {a.val[0]-0.5f, a.val[1]-0.5f, 0, 0}; + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + double arr[4] = {a.val[0]+0.5f, a.val[1]+0.5f, 0, 0}; + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp, 4)); +} + +#else +inline v_int32x4 v_round(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a); + return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(dst, 1, b), 4)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4); + dst = vset_v_f64m1_f64m2(dst, 0, a); + dst = vfsub_vf_f64m2(dst, 0.5, 2); + return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4); + dst = vset_v_f64m1_f64m2(dst, 0, a); + dst = vfadd_vf_f64m2(dst, 0.5, 2); + return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} +#endif +#endif + + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + int ptr[8] = {0}; + v_int32x4 t1, t2; + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int ptr[8] = {0}; + v_int32x4 t1, t2; + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; +} + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + int64 ptr[4] = {0}; + v_int64x2 t1, t2; + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 ptr[4] = {0}; + v_int64x2 t1, t2; + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ + unsigned ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + int ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ + int ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + int64 ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ + int64 ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } +#endif + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ + int ptr[8] = {0}; + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2; +} +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int ptr[8] = {0}; + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2 + c; +} + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ + int64 ptr[4] = {0}; + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2; +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 ptr[4] = {0}; + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2 + c; +} + + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned ptr[16] = {0}; + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + unsigned ptr[16] = {0}; + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + int ptr[16] = {0}; + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + int ptr[16] = {0}; + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 ptr[8] = {0}; + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 ptr[8] = {0}; + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + int64 ptr[8] = {0}; + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ + int64 ptr[8] = {0}; + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod_fast(a, b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } +#endif + + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v), 4); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1, 4); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2, 4); + res = vfmacc_vf_f32m1(res, v_extract_n<3>(v), m3, 4); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v), 4); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1, 4); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2, 4); + return v_float32x4(res) + a; +} + +#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width, vl, hvl) \ +inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \ +{ \ + _Tpw ptr[_Tpwvec::nlanes*2] = {0}; \ + vse##width##_v_##suffix##m2(ptr, wmul(a, b, vl), vl); \ + c = _Tpwvec(vle##width##_v_##suffix##m1(ptr, hvl)); \ + d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes, hvl)); \ +} + +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8x16, v_uint16x8, ushort, u16, vwmulu_vv_u16m2, 16, 16, 8) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8x16, v_int16x8, short, i16, vwmul_vv_i16m2, 16, 16, 8) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16x8, v_uint32x4, unsigned, u32, vwmulu_vv_u32m2, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16x8, v_int32x4, int, i32, vwmul_vv_i32m2, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32x4, v_uint64x2, uint64, u64, vwmulu_vv_u64m2, 64, 4, 2) + + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + return v_int16x8(vnsra_wx_i16m1(vwmul_vv_i32m2(a, b, 8), 16, 8)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + return v_uint16x8(vnsrl_wx_u16m1(vwmulu_vv_u32m2(a, b, 8), 16, 8)); +} + + +//////// Saturating Multiply //////// + +#define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _wTpvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _wTpvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a = a * b; \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int16x8, v_int32x4) + + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + + +} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_rvv071.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_rvv071.hpp new file mode 100644 index 0000000..2bdc622 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_rvv071.hpp @@ -0,0 +1,2545 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// Copyright (C) 2015, PingTouGe Semiconductor Co., Ltd., all rights reserved. + +#ifndef OPENCV_HAL_INTRIN_RISCVV_HPP +#define OPENCV_HAL_INTRIN_RISCVV_HPP + +#include +#include +#include "opencv2/core/utility.hpp" + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +//////////// Types //////////// +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = (vuint8m1_t)vle_v_u8m1((unsigned char*)v, 16); + } + uchar get0() const + { + return vmv_x_s_u8m1_u8(val, 16); + } + + vuint8m1_t val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = (vint8m1_t)vle_v_i8m1((schar*)v, 16); + } + schar get0() const + { + return vmv_x_s_i8m1_i8(val, 16); + } + + vint8m1_t val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = (vuint16m1_t)vle_v_u16m1((unsigned short*)v, 8); + } + ushort get0() const + { + return vmv_x_s_u16m1_u16(val, 8); + } + + vuint16m1_t val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = (vint16m1_t)vle_v_i16m1((signed short*)v, 8); + } + short get0() const + { + return vmv_x_s_i16m1_i16(val, 8); + } + + vint16m1_t val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + val = (vuint32m1_t)vle_v_u32m1((unsigned int*)v, 4); + } + unsigned get0() const + { + return vmv_x_s_u32m1_u32(val, 4); + } + + vuint32m1_t val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = (vint32m1_t)vle_v_i32m1((signed int*)v, 4); + } + int get0() const + { + return vmv_x_s_i32m1_i32(val, 4); + } + vint32m1_t val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = (vfloat32m1_t)vle_v_f32m1((float*)v, 4); + } + float get0() const + { + return vfmv_f_s_f32m1_f32(val, 4); + } + vfloat32m1_t val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = (vuint64m1_t)vle_v_u64m1((unsigned long*)v, 2); + } + uint64 get0() const + { + return vmv_x_s_u64m1_u64(val, 2); + } + vuint64m1_t val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = (vint64m1_t)vle_v_i64m1((long*)v, 2); + } + int64 get0() const + { + return vmv_x_s_i64m1_i64(val, 2); + } + vint64m1_t val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = (vfloat64m1_t)vle_v_f64m1((double*)v, 2); + } + double get0() const + { + return vfmv_f_s_f64m1_f64(val, 2); + } + vfloat64m1_t val; +}; + +#define OPENCV_HAL_IMPL_RISCVV_INIT(_Tpv, _Tp, suffix) \ +inline _Tp##m1_t vreinterpretq_##suffix##_##suffix(_Tp##m1_t v) { return v; } \ +inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16((vuint8m1_t)(v.val)); } \ +inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16((vint8m1_t)(v.val)); } \ +inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8((vuint16m1_t)(v.val)); } \ +inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8((vint16m1_t)(v.val)); } \ +inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4((vuint32m1_t)(v.val)); } \ +inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4((vint32m1_t)(v.val)); } \ +inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2((vuint64m1_t)(v.val)); } \ +inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2((vint64m1_t)(v.val)); } \ +inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4((vfloat32m1_t)(v.val)); }\ +inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2((vfloat64m1_t)(v.val)); } + + +OPENCV_HAL_IMPL_RISCVV_INIT(uint8x16, vuint8, u8) +OPENCV_HAL_IMPL_RISCVV_INIT(int8x16, vint8, s8) +OPENCV_HAL_IMPL_RISCVV_INIT(uint16x8, vuint16, u16) +OPENCV_HAL_IMPL_RISCVV_INIT(int16x8, vint16, s16) +OPENCV_HAL_IMPL_RISCVV_INIT(uint32x4, vuint32, u32) +OPENCV_HAL_IMPL_RISCVV_INIT(int32x4, vint32, s32) +OPENCV_HAL_IMPL_RISCVV_INIT(uint64x2, vuint64, u64) +OPENCV_HAL_IMPL_RISCVV_INIT(int64x2, vint64, s64) +OPENCV_HAL_IMPL_RISCVV_INIT(float64x2, vfloat64, f64) +OPENCV_HAL_IMPL_RISCVV_INIT(float32x4, vfloat32, f32) +#define OPENCV_HAL_IMPL_RISCVV_INIT_SET(__Tp, _Tp, suffix, len, num) \ +inline v_##_Tp##x##num v_setzero_##suffix() { return v_##_Tp##x##num((v##_Tp##m1_t){0}); } \ +inline v_##_Tp##x##num v_setall_##suffix(__Tp v) { return v_##_Tp##x##num(vmv_v_x_##len##m1(v, num)); } + +OPENCV_HAL_IMPL_RISCVV_INIT_SET(uchar, uint8, u8, u8, 16) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(char, int8, s8, i8, 16) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(ushort, uint16, u16, u16, 8) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(short, int16, s16, i16, 8) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(unsigned int, uint32, u32, u32, 4) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(int, int32, s32, i32, 4) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(unsigned long, uint64, u64, u64, 2) +OPENCV_HAL_IMPL_RISCVV_INIT_SET(long, int64, s64, i64, 2) +inline v_float32x4 v_setzero_f32() { return v_float32x4((vfloat32m1_t){0}); } +inline v_float32x4 v_setall_f32(float v) { return v_float32x4(vfmv_v_f_f32m1(v, 4)); } + +inline v_float64x2 v_setzero_f64() { return v_float64x2(vfmv_v_f_f64m1(0, 2)); } +inline v_float64x2 v_setall_f64(double v) { return v_float64x2(vfmv_v_f_f64m1(v, 2)); } + + +#define OPENCV_HAL_IMPL_RISCVV_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +#define OPENCV_HAL_IMPL_RISCVV_BIN_OPN(bin_op, _Tpvec, intrin, num) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val, num)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val, num); \ + return a; \ +} + +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint8x16, vsaddu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint8x16, vssubu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int8x16, vsadd_vv_i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int8x16, vssub_vv_i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint16x8, vsaddu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint16x8, vssubu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int16x8, vsadd_vv_i16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int16x8, vssub_vv_i16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int32x4, vsadd_vv_i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int32x4, vssub_vv_i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_int32x4, vmul_vv_i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint32x4, vadd_vv_u32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint32x4, vsub_vv_u32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_uint32x4, vmul_vv_u32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int64x2, vsadd_vv_i64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int64x2, vssub_vv_i64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint64x2, vadd_vv_u64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint64x2, vsub_vv_u64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_float32x4, vfadd_vv_f32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_float32x4, vfsub_vv_f32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_float32x4, vfmul_vv_f32m1, 4) +inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(vfdiv_vv_f32m1(a.val, b.val, 4)); +} +inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b) +{ + a.val = vfdiv_vv_f32m1(a.val, b.val, 4); + return a; +} + +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_float64x2, vfadd_vv_f64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_float64x2, vfsub_vv_f64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_float64x2, vfmul_vv_f64m1, 2) +inline v_float64x2 operator / (const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(vfdiv_vv_f64m1(a.val, b.val, 2)); +} +inline v_float64x2& operator /= (v_float64x2& a, const v_float64x2& b) +{ + a.val = vfdiv_vv_f64m1(a.val, b.val, 2); + return a; +} +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_RISCVV_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +#define OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(_Tpvec, func, intrin, num) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val, num)); \ +} +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 2) + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + return v_float32x4(vfsqrt_v_f32m1(x.val, 4)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + return v_float32x4(vfrdiv_vf_f32m1(vfsqrt_v_f32m1(x.val, 4), 1, 4)); +} + +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a.val, a.val, 4), b.val, b.val, 4)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a.val, a.val, 4), b.val, b.val, 4)); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_float32x4(vfmacc_vv_f32m1(c.val, a.val, b.val, 4)); +} + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_int32x4(vmacc_vv_i32m1(c.val, a.val, b.val, 4)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + vfloat32m1_t res = vfmul_vf_f32m1(m0.val, v.val[0], 4);//vmuli_f32(m0.val, v.val, 0); + res = vfmacc_vf_f32m1(res, v.val[1], m1.val, 4);//vmulai_f32(res, m1.val, v.val, 1); + res = vfmacc_vf_f32m1(res, v.val[2], m2.val, 4);//vmulai_f32(res, m1.val, v.val, 1); + res = vfmacc_vf_f32m1(res, v.val[3], m3.val, 4);//vmulai_f32(res, m1.val, v.val, 1); + return v_float32x4(res); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + vfloat32m1_t res = vfmul_vf_f32m1(m0.val, v.val[0], 4);//vmuli_f32(m0.val, v.val, 0); + res = vfmacc_vf_f32m1(res, v.val[1], m1.val, 4);//vmulai_f32(res, m1.val, v.val, 1); + res = vfmacc_vf_f32m1(res, v.val[2], m2.val, 4);//vmulai_f32(res, m1.val, v.val, 1); + res = vfadd_vv_f32m1(res, a.val, 4);//vmulai_f32(res, m1.val, v.val, 1); + return v_float32x4(res); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + return v_float64x2(vfsqrt_v_f64m1(x.val, 2)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + return v_float64x2(vfrdiv_vf_f64m1(vfsqrt_v_f64m1(x.val, 2), 1, 2)); +} + +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a.val, a.val, 2), b.val, b.val, 2)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a.val, a.val, 2), b.val, b.val, 2)); +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_float64x2(vfmacc_vv_f64m1(c.val, a.val, b.val, 2)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} + +#define OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(_Tpvec, suffix, num) \ + OPENCV_HAL_IMPL_RISCVV_BIN_OPN(&, _Tpvec, vand_vv_##suffix, num) \ + OPENCV_HAL_IMPL_RISCVV_BIN_OPN(|, _Tpvec, vor_vv_##suffix, num) \ + OPENCV_HAL_IMPL_RISCVV_BIN_OPN(^, _Tpvec, vxor_vv_##suffix, num) \ + inline _Tpvec operator ~ (const _Tpvec & a) \ + { \ + return _Tpvec(vnot_v_##suffix(a.val, num)); \ + } + +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint8x16, u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint16x8, u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint32x4, u32m1, 4) +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_uint64x2, u64m1, 2) +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int8x16, i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int16x8, i16m1, 8) +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int32x4, i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_LOGIC_OPN(v_int64x2, i64m1, 2) + +#define OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + return v_float32x4(vfloat32m1_t(intrin(vint32m1_t(a.val), vint32m1_t(b.val), 4))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + a.val = vfloat32m1_t(intrin(vint32m1_t(a.val), vint32m1_t(b.val), 4)); \ + return a; \ +} + +OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(&, vand_vv_i32m1) +OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(|, vor_vv_i32m1) +OPENCV_HAL_IMPL_RISCVV_FLT_BIT_OP(^, vxor_vv_i32m1) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + return v_float32x4((vfloat32m1_t)(vnot_v_i32m1((vint32m1_t)(a.val), 4))); +} + +#define OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + return v_float64x2(vfloat64m1_t(intrin(vint64m1_t(a.val), vint64m1_t(b.val), 2))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + a.val = vfloat64m1_t(intrin(vint64m1_t(a.val), vint64m1_t(b.val), 2)); \ + return a; \ +} + +OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(&, vand_vv_i64m1) +OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(|, vor_vv_i64m1) +OPENCV_HAL_IMPL_RISCVV_FLT_64BIT_OP(^, vxor_vv_i64m1) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + return v_float64x2((vfloat64m1_t)(vnot_v_i64m1((vint64m1_t)(a.val), 2))); +} +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + return v_int16x8(vmulh_vv_i16m1(a.val, b.val, 8)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + return v_uint16x8(vmulhu_vv_u16m1(a.val, b.val, 8)); +} + +//#define OPENCV_HAL_IMPL_RISCVV_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ +//inline _Tpuvec v_abs(const _Tpsvec& a) { \ +// E##xm1_t mask=vmflt_vf_e32xm1_f32m1(x.val, 0.0, 4); + +//OPENCV_HAL_IMPL_RISCVV_ABS(v_uint8x16, v_int8x16, u8, s8) +//OPENCV_HAL_IMPL_RISCVV_ABS(v_uint16x8, v_int16x8, u16, s16) +//OPENCV_HAL_IMPL_RISCVV_ABS(v_uint32x4, v_int32x4, u32, s32) + +inline v_uint32x4 v_abs(v_int32x4 x) +{ + vbool32_t mask=vmslt_vx_i32m1_b32(x.val, 0, 4); + return v_uint32x4((vuint32m1_t)vrsub_vx_i32m1_m(mask, x.val, x.val, 0, 4)); +} + +inline v_uint16x8 v_abs(v_int16x8 x) +{ + vbool16_t mask=vmslt_vx_i16m1_b16(x.val, 0, 8); + return v_uint16x8((vuint16m1_t)vrsub_vx_i16m1_m(mask, x.val, x.val, 0, 8)); +} + +inline v_uint8x16 v_abs(v_int8x16 x) +{ + vbool8_t mask=vmslt_vx_i8m1_b8(x.val, 0, 16); + return v_uint8x16((vuint8m1_t)vrsub_vx_i8m1_m(mask, x.val, x.val, 0, 16)); +} + +inline v_float32x4 v_abs(v_float32x4 x) +{ + return (v_float32x4)vfsgnjx_vv_f32m1(x.val, x.val, 4); +} + +inline v_float64x2 v_abs(v_float64x2 x) +{ + return (v_float64x2)vfsgnjx_vv_f64m1(x.val, x.val, 2); +} + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ + vfloat32m1_t ret = vfsub_vv_f32m1(a.val, b.val, 4); + return (v_float32x4)vfsgnjx_vv_f32m1(ret, ret, 4); +} + +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m1_t ret = vfsub_vv_f64m1(a.val, b.val, 2); + return (v_float64x2)vfsgnjx_vv_f64m1(ret, ret, 2); +} + +#define OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(bit, num) \ +inline v_uint##bit##x##num v_absdiff(v_uint##bit##x##num a, v_uint##bit##x##num b){ \ + vuint##bit##m1_t vmax = vmaxu_vv_u##bit##m1(a.val, b.val, num); \ + vuint##bit##m1_t vmin = vminu_vv_u##bit##m1(a.val, b.val, num); \ + return v_uint##bit##x##num(vsub_vv_u##bit##m1(vmax, vmin, num));\ +} + +OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(8, 16) +OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(16, 8) +OPENCV_HAL_IMPL_RISCVV_ABSDIFF_U(32, 4) + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(v_int8x16 a, v_int8x16 b){ + vint8m1_t vmax = vmax_vv_i8m1(a.val, b.val, 16); + vint8m1_t vmin = vmin_vv_i8m1(a.val, b.val, 16); + return v_int8x16(vssub_vv_i8m1(vmax, vmin, 16)); +} +inline v_int16x8 v_absdiffs(v_int16x8 a, v_int16x8 b){ + vint16m1_t vmax = vmax_vv_i16m1(a.val, b.val, 8); + vint16m1_t vmin = vmin_vv_i16m1(a.val, b.val, 8); + return v_int16x8(vssub_vv_i16m1(vmax, vmin, 8)); +} + +#define OPENCV_HAL_IMPL_RISCVV_ABSDIFF(_Tpvec, _Tpv, num) \ +inline v_uint##_Tpvec v_absdiff(v_int##_Tpvec a, v_int##_Tpvec b){ \ + vint##_Tpv##_t max = vmax_vv_i##_Tpv(a.val, b.val, num);\ + vint##_Tpv##_t min = vmin_vv_i##_Tpv(a.val, b.val, num);\ + return v_uint##_Tpvec((vuint##_Tpv##_t)vsub_vv_i##_Tpv(max, min, num)); \ +} + +OPENCV_HAL_IMPL_RISCVV_ABSDIFF(8x16, 8m1, 16) +OPENCV_HAL_IMPL_RISCVV_ABSDIFF(16x8, 16m1, 8) +OPENCV_HAL_IMPL_RISCVV_ABSDIFF(32x4, 32m1, 4) + +// Multiply and expand +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + vint16m2_t res = vundefined_i16m2(); + res = vwmul_vv_i16m2(a.val, b.val, 16); + c.val = vget_i16m2_i16m1(res, 0); + d.val = vget_i16m2_i16m1(res, 1); +} + +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + vuint16m2_t res = vundefined_u16m2(); + res = vwmulu_vv_u16m2(a.val, b.val, 16); + c.val = vget_u16m2_u16m1(res, 0); + d.val = vget_u16m2_u16m1(res, 1); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + vint32m2_t res = vundefined_i32m2(); + res = vwmul_vv_i32m2(a.val, b.val, 8); + c.val = vget_i32m2_i32m1(res, 0); + d.val = vget_i32m2_i32m1(res, 1); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + vuint32m2_t res = vundefined_u32m2(); + res = vwmulu_vv_u32m2(a.val, b.val, 8); + c.val = vget_u32m2_u32m1(res, 0); + d.val = vget_u32m2_u32m1(res, 1); +} + +inline void v_mul_expand(const v_int32x4& a, const v_int32x4& b, + v_int64x2& c, v_int64x2& d) +{ + vint64m2_t res = vundefined_i64m2(); + res = vwmul_vv_i64m2(a.val, b.val, 4); + c.val = vget_i64m2_i64m1(res, 0); + d.val = vget_i64m2_i64m1(res, 1); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + vuint64m2_t res = vundefined_u64m2(); + res = vwmulu_vv_u64m2(a.val, b.val, 4); + c.val = vget_u64m2_u64m1(res, 0); + d.val = vget_u64m2_u64m1(res, 1); +} + +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 16) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 8) +OPENCV_HAL_IMPL_RISCVV_BINN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 8) +//////// Dot Product //////// +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + vint32m2_t res = vundefined_i32m2(); + res = vwmul_vv_i32m2(a.val, b.val, 8); + res = vrgather_vv_i32m2(res, (vuint32m2_t){0, 2, 4, 6, 1, 3, 5, 7}, 8); + return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(res, 0), vget_i32m2_i32m1(res, 1), 4)); +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + vint32m2_t res = vundefined_i32m2(); + res = vwmul_vv_i32m2(a.val, b.val, 8); + res = vrgather_vv_i32m2(res, (vuint32m2_t){0, 2, 4, 6, 1, 3, 5, 7}, 8); + return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(res, 0),vget_i32m2_i32m1(res, 1), 4), c.val, 4)); +} + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + vint64m2_t res = vundefined_i64m2(); + res = vwmul_vv_i64m2(a.val, b.val, 4); + res = vrgather_vv_i64m2(res, (vuint64m2_t){0, 2, 1, 3}, 4); + return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(res, 0), vget_i64m2_i64m1(res, 1), 2)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + vint64m2_t res = vundefined_i64m2(); + res = vwmul_vv_i64m2(a.val, b.val, 4); + res = vrgather_vv_i64m2(res, (vuint64m2_t){0, 2, 1, 3}, 4); + return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(res, 0), vget_i64m2_i64m1(res, 1), 2), c.val, 2)); +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + vuint16m2_t v1 = vundefined_u16m2(); + vuint32m2_t v2 = vundefined_u32m2(); + v1 = vwmulu_vv_u16m2(a.val, b.val, 16); + v1 = vrgather_vv_u16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16); + v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8); + return v_uint32x4(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4)); +} + +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ + vuint16m2_t v1 = vundefined_u16m2(); + vuint32m2_t v2 = vundefined_u32m2(); + v1 = vwmulu_vv_u16m2(a.val, b.val, 16); + v1 = vrgather_vv_u16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16); + v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8); + return v_uint32x4(vadd_vv_u32m1(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4), c.val, 4)); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + vint16m2_t v1 = vundefined_i16m2(); + vint32m2_t v2 = vundefined_i32m2(); + v1 = vwmul_vv_i16m2(a.val, b.val, 16); + v1 = vrgather_vv_i16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16); + v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8); + return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4)); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ + vint16m2_t v1 = vundefined_i16m2(); + vint32m2_t v2 = vundefined_i32m2(); + v1 = vwmul_vv_i16m2(a.val, b.val, 16); + v1 = vrgather_vv_i16m2(v1, (vuint16m2_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16); + v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8); + return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4), c.val, 4)); +} + +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + vuint32m2_t v1 = vundefined_u32m2(); + vuint64m2_t v2 = vundefined_u64m2(); + v1 = vwmulu_vv_u32m2(a.val, b.val, 8); + v1 = vrgather_vv_u32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8); + v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4); + return v_uint64x2(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2)); +} + +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, + const v_uint64x2& c) +{ + vuint32m2_t v1 = vundefined_u32m2(); + vuint64m2_t v2 = vundefined_u64m2(); + v1 = vwmulu_vv_u32m2(a.val, b.val, 8); + v1 = vrgather_vv_u32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8); + v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4); + return v_uint64x2(vadd_vv_u64m1(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2), c.val, 2)); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + vint32m2_t v1 = vundefined_i32m2(); + vint64m2_t v2 = vundefined_i64m2(); + v1 = vwmul_vv_i32m2(a.val, b.val, 8); + v1 = vrgather_vv_i32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8); + v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4); + return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2)); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ + vint32m2_t v1 = vundefined_i32m2(); + vint64m2_t v2 = vundefined_i64m2(); + v1 = vwmul_vv_i32m2(a.val, b.val, 8); + v1 = vrgather_vv_i32m2(v1, (vuint32m2_t){0, 4, 1, 5, 2, 6, 3, 7}, 8); + v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4); + return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2), c.val, 2)); +} + +//////// Fast Dot Product //////// +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ + vint32m2_t v1 = vundefined_i32m2(); + v1 = vwmul_vv_i32m2(a.val, b.val, 8); + return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4)); +} + +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + vint32m2_t v1 = vundefined_i32m2(); + v1 = vwmul_vv_i32m2(a.val, b.val, 8); + return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4), c.val, 4)); +} + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ + vint64m2_t v1 = vundefined_i64m2(); + v1 = vwmul_vv_i64m2(a.val, b.val, 4); + return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(v1, 0), vget_i64m2_i64m1(v1, 1), 2)); +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + vint64m2_t v1 = vundefined_i64m2(); + v1 = vwmul_vv_i64m2(a.val, b.val, 8); + return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v1, 0), vget_i64m2_i64m1(v1, 1), 4), c.val, 4)); +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + vuint16m2_t v1 = vundefined_u16m2(); + vuint32m2_t v2 = vundefined_u32m2(); + v1 = vwmulu_vv_u16m2(a.val, b.val, 16); + v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8); + return v_uint32x4(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4)); +} + +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + vuint16m2_t v1 = vundefined_u16m2(); + vuint32m2_t v2 = vundefined_u32m2(); + v1 = vwmulu_vv_u16m2(a.val, b.val, 16); + v2 = vwaddu_vv_u32m2(vget_u16m2_u16m1(v1, 0), vget_u16m2_u16m1(v1, 1), 8); + return v_uint32x4(vadd_vv_u32m1(vadd_vv_u32m1(vget_u32m2_u32m1(v2, 0), vget_u32m2_u32m1(v2, 1), 4), c.val, 4)); +} + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + vint16m2_t v1 = vundefined_i16m2(); + vint32m2_t v2 = vundefined_i32m2(); + v1 = vwmul_vv_i16m2(a.val, b.val, 16); + v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8); + return v_int32x4(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4)); +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + vint16m2_t v1 = vundefined_i16m2(); + vint32m2_t v2 = vundefined_i32m2(); + v1 = vwmul_vv_i16m2(a.val, b.val, 16); + v2 = vwadd_vv_i32m2(vget_i16m2_i16m1(v1, 0), vget_i16m2_i16m1(v1, 1), 8); + return v_int32x4(vadd_vv_i32m1(vadd_vv_i32m1(vget_i32m2_i32m1(v2, 0), vget_i32m2_i32m1(v2, 1), 4), c.val, 4)); +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + vuint32m2_t v1 = vundefined_u32m2(); + vuint64m2_t v2 = vundefined_u64m2(); + v1 = vwmulu_vv_u32m2(a.val, b.val, 8); + v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4); + return v_uint64x2(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2)); +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + vuint32m2_t v1 = vundefined_u32m2(); + vuint64m2_t v2 = vundefined_u64m2(); + v1 = vwmulu_vv_u32m2(a.val, b.val, 8); + v2 = vwaddu_vv_u64m2(vget_u32m2_u32m1(v1, 0), vget_u32m2_u32m1(v1, 1), 4); + return v_uint64x2(vadd_vv_u64m1(vadd_vv_u64m1(vget_u64m2_u64m1(v2, 0), vget_u64m2_u64m1(v2, 1), 2), c.val, 2)); +} + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + vint32m2_t v1 = vundefined_i32m2(); + vint64m2_t v2 = vundefined_i64m2(); + v1 = vwmul_vv_i32m2(a.val, b.val, 8); + v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4); + return v_int64x2(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2)); +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ + vint32m2_t v1 = vundefined_i32m2(); + vint64m2_t v2 = vundefined_i64m2(); + v1 = vwmul_vv_i32m2(a.val, b.val, 8); + v2 = vwadd_vv_i64m2(vget_i32m2_i32m1(v1, 0), vget_i32m2_i32m1(v1, 1), 4); + return v_int64x2(vadd_vv_i64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v2, 0), vget_i64m2_i64m1(v2, 1), 2), c.val, 2)); +} + + +#define OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(_Tpvec, _Tpvec2, len, scalartype, func, intrin, num) \ +inline scalartype v_reduce_##func(const v_##_Tpvec##x##num& a) \ +{\ + v##_Tpvec2##m1_t val = vmv_v_x_##len##m1(0, num); \ + val = intrin(val, a.val, val, num); \ + return vmv_x_s_##len##m1_##len(val, num); \ +} + + +#define OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(_Tpvec, _Tpvec2, scalartype, func, funcu, num) \ +inline scalartype v_reduce_##func(const v_##_Tpvec##x##num& a) \ +{\ + v##_Tpvec##m1_t val = (v##_Tpvec##m1_t)vmv_v_x_i8m1(0, num); \ + val = v##funcu##_vs_##_Tpvec2##m1_##_Tpvec2##m1(val, a.val, a.val, num); \ + return val[0]; \ +} +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(int8, int16, i16, int, sum, vwredsum_vs_i8m1_i16m1, 16) +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(int16, int32, i32, int, sum, vwredsum_vs_i16m1_i32m1, 8) +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(int32, int64, i64, int, sum, vwredsum_vs_i32m1_i64m1, 4) +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(uint8, uint16, u16, unsigned, sum, vwredsumu_vs_u8m1_u16m1, 16) +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(uint16, uint32, u32, unsigned, sum, vwredsumu_vs_u16m1_u32m1, 8) +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_W(uint32, uint64, u64, unsigned, sum, vwredsumu_vs_u32m1_u64m1, 4) +inline float v_reduce_sum(const v_float32x4& a) \ +{\ + vfloat32m1_t val = vfmv_v_f_f32m1(0.0, 4); \ + val = vfredsum_vs_f32m1_f32m1(val, a.val, val, 4); \ + return vfmv_f_s_f32m1_f32(val, 4); \ +} +inline double v_reduce_sum(const v_float64x2& a) \ +{\ + vfloat64m1_t val = vfmv_v_f_f64m1(0.0, 2); \ + val = vfredsum_vs_f64m1_f64m1(val, a.val, val, 2); \ + return vfmv_f_s_f64m1_f64(val, 2); \ +} +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ return vext_x_v_u64m1_u64((vuint64m1_t)a.val, 0, 2)+vext_x_v_u64m1_u64((vuint64m1_t)a.val, 1, 2); } + +inline int64 v_reduce_sum(const v_int64x2& a) +{ return vext_x_v_i64m1_i64((vint64m1_t)a.val, 0, 2)+vext_x_v_i64m1_i64((vint64m1_t)a.val, 1, 2); } + +#define OPENCV_HAL_IMPL_RISCVV_REDUCE_OP(func) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int8, i8, int, func, red##func, 16) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int16, i16, int, func, red##func, 8) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int32, i32, int, func, red##func, 4) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(int64, i64, int, func, red##func, 2) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(uint8, u8, unsigned, func, red##func##u, 16) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(uint16, u16, unsigned, func, red##func##u, 8) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(uint32, u32, unsigned, func, red##func##u, 4) \ +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP_(float32, f32, float, func, fred##func, 4) +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP(max) +OPENCV_HAL_IMPL_RISCVV_REDUCE_OP(min) + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + vfloat32m1_t a0 = vfmv_v_f_f32m1(0.0, 4); + vfloat32m1_t b0 = vfmv_v_f_f32m1(0.0, 4); + vfloat32m1_t c0 = vfmv_v_f_f32m1(0.0, 4); + vfloat32m1_t d0 = vfmv_v_f_f32m1(0.0, 4); + a0 = vfredsum_vs_f32m1_f32m1(a0, a.val, a0, 4); + b0 = vfredsum_vs_f32m1_f32m1(b0, b.val, b0, 4); + c0 = vfredsum_vs_f32m1_f32m1(c0, c.val, c0, 4); + d0 = vfredsum_vs_f32m1_f32m1(d0, d.val, d0, 4); + return v_float32x4(a0[0], b0[0], c0[0], d0[0]); +} + +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + vfloat32m1_t a0 = vfmv_v_f_f32m1(0.0, 4); + vfloat32m1_t x = vfsub_vv_f32m1(a.val, b.val, 4); + vbool32_t mask=vmflt_vf_f32m1_b32(x, 0, 4); + vfloat32m1_t val = vfrsub_vf_f32m1_m(mask, x, x, 0, 4); + a0 = vfredsum_vs_f32m1_f32m1(a0, val, a0, 4); + return a0[0]; +} + +#define OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(_Tpvec, _Tpvec2) \ +inline unsigned v_reduce_sad(const _Tpvec& a, const _Tpvec&b){ \ + _Tpvec2 x = v_absdiff(a, b); \ + return v_reduce_sum(x); \ +} + +OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_int8x16, v_uint8x16) +OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_uint8x16, v_uint8x16) +OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_int16x8, v_uint16x8) +OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_uint16x8, v_uint16x8) +OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_int32x4, v_uint32x4) +OPENCV_HAL_IMPL_RISCVV_REDUCE_SAD(v_uint32x4, v_uint32x4) + +#define OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(_Tpvec, _Tp, _T, num, uv) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vbool##_T##_t mask = vmseq_vv_##_Tp##_b##_T(a.val, b.val, num); \ + return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \ +} \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vbool##_T##_t mask = vmsne_vv_##_Tp##_b##_T(a.val, b.val, num); \ + return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \ +} \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vbool##_T##_t mask = vmslt##uv##_Tp##_b##_T(a.val, b.val, num); \ + return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \ +} \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vbool##_T##_t mask = vmslt##uv##_Tp##_b##_T(b.val, a.val, num); \ + return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \ +} \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vbool##_T##_t mask = vmsle##uv##_Tp##_b##_T(a.val, b.val, num); \ + return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \ +} \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vbool##_T##_t mask = vmsle##uv##_Tp##_b##_T(b.val, a.val, num); \ + return _Tpvec(vmerge_vxm_##_Tp(mask, vmv_v_x_##_Tp(0, num), -1, num)); \ +} \ + +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int8x16, i8m1, 8, 16, _vv_) +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int16x8, i16m1, 16, 8, _vv_) +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int32x4, i32m1, 32, 4, _vv_) +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_int64x2, i64m1, 64, 2, _vv_) +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint8x16, u8m1, 8, 16, u_vv_) +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint16x8, u16m1, 16, 8, u_vv_) +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint32x4, u32m1, 32, 4, u_vv_) +OPENCV_HAL_IMPL_RISCVV_INT_CMP_OP(v_uint64x2, u64m1, 64, 2, u_vv_) + +//TODO: == +inline v_float32x4 operator == (const v_float32x4& a, const v_float32x4& b) +{ + vbool32_t mask = vmfeq_vv_f32m1_b32(a.val, b.val, 4); + vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4); + return v_float32x4((vfloat32m1_t)res); +} +inline v_float32x4 operator != (const v_float32x4& a, const v_float32x4& b) +{ + vbool32_t mask = vmfne_vv_f32m1_b32(a.val, b.val, 4); + vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4); + return v_float32x4((vfloat32m1_t)res); +} +inline v_float32x4 operator < (const v_float32x4& a, const v_float32x4& b) +{ + vbool32_t mask = vmflt_vv_f32m1_b32(a.val, b.val, 4); + vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4); + return v_float32x4((vfloat32m1_t)res); +} +inline v_float32x4 operator <= (const v_float32x4& a, const v_float32x4& b) +{ + vbool32_t mask = vmfle_vv_f32m1_b32(a.val, b.val, 4); + vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4); + return v_float32x4((vfloat32m1_t)res); +} +inline v_float32x4 operator > (const v_float32x4& a, const v_float32x4& b) +{ + vbool32_t mask = vmfgt_vv_f32m1_b32(a.val, b.val, 4); + vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4); + return v_float32x4((vfloat32m1_t)res); +} +inline v_float32x4 operator >= (const v_float32x4& a, const v_float32x4& b) +{ + vbool32_t mask = vmfge_vv_f32m1_b32(a.val, b.val, 4); + vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4); + return v_float32x4((vfloat32m1_t)res); +} +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ + vbool32_t mask = vmford_vv_f32m1_b32(a.val, a.val, 4); + vint32m1_t res = vmerge_vxm_i32m1(mask, vmv_v_x_i32m1(0.0, 4), -1, 4); + return v_float32x4((vfloat32m1_t)res); +} + +//TODO: == +inline v_float64x2 operator == (const v_float64x2& a, const v_float64x2& b) +{ + vbool64_t mask = vmfeq_vv_f64m1_b64(a.val, b.val, 2); + vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2); + return v_float64x2((vfloat64m1_t)res); +} +inline v_float64x2 operator != (const v_float64x2& a, const v_float64x2& b) +{ + vbool64_t mask = vmfne_vv_f64m1_b64(a.val, b.val, 2); + vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2); + return v_float64x2((vfloat64m1_t)res); +} +inline v_float64x2 operator < (const v_float64x2& a, const v_float64x2& b) +{ + vbool64_t mask = vmflt_vv_f64m1_b64(a.val, b.val, 2); + vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2); + return v_float64x2((vfloat64m1_t)res); +} +inline v_float64x2 operator <= (const v_float64x2& a, const v_float64x2& b) +{ + vbool64_t mask = vmfle_vv_f64m1_b64(a.val, b.val, 2); + vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2); + return v_float64x2((vfloat64m1_t)res); +} +inline v_float64x2 operator > (const v_float64x2& a, const v_float64x2& b) +{ + vbool64_t mask = vmfgt_vv_f64m1_b64(a.val, b.val, 2); + vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2); + return v_float64x2((vfloat64m1_t)res); +} +inline v_float64x2 operator >= (const v_float64x2& a, const v_float64x2& b) +{ + vbool64_t mask = vmfge_vv_f64m1_b64(a.val, b.val, 2); + vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2); + return v_float64x2((vfloat64m1_t)res); +} +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ + vbool64_t mask = vmford_vv_f64m1_b64(a.val, a.val, 2); + vint64m1_t res = vmerge_vxm_i64m1(mask, vmv_v_x_i64m1(0.0, 2), -1, 2); + return v_float64x2((vfloat64m1_t)res); +} +#define OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(_Tp, _T) \ +inline void v_transpose4x4(const v_##_Tp##32x4& a0, const v_##_Tp##32x4& a1, \ + const v_##_Tp##32x4& a2, const v_##_Tp##32x4& a3, \ + v_##_Tp##32x4& b0, v_##_Tp##32x4& b1, \ + v_##_Tp##32x4& b2, v_##_Tp##32x4& b3) \ +{ \ + v##_Tp##32m4_t val = vundefined_##_T##m4(); \ + val = vset_##_T##m4(val, 0, a0.val); \ + val = vset_##_T##m4(val, 1, a1.val); \ + val = vset_##_T##m4(val, 2, a2.val); \ + val = vset_##_T##m4(val, 3, a3.val); \ + val = vrgather_vv_##_T##m4(val, (vuint32m4_t){0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}, 16); \ + b0.val = vget_##_T##m4_##_T##m1(val, 0); \ + b1.val = vget_##_T##m4_##_T##m1(val, 1); \ + b2.val = vget_##_T##m4_##_T##m1(val, 2); \ + b3.val = vget_##_T##m4_##_T##m1(val, 3); \ +} +OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(uint, u32) +OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(int, i32) +OPENCV_HAL_IMPL_RISCVV_TRANSPOSE4x4(float, f32) + + +#define OPENCV_HAL_IMPL_RISCVV_SHIFT_LEFT(_Tpvec, suffix, _T, num) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ return _Tpvec((vsll_vx_##_T##m1(a.val, n, num))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec((vsll_vx_##_T##m1(a.val, n, num))); } + +#define OPENCV_HAL_IMPL_RISCVV_SHIFT_RIGHT(_Tpvec, suffix, _T, num, intric) \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ return _Tpvec((v##intric##_vx_##_T##m1(a.val, n, num))); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec((v##intric##_vx_##_T##m1(a.val, n, num))); }\ +template inline _Tpvec v_rshr(const _Tpvec& a) \ +{ return _Tpvec((v##intric##_vx_##_T##m1(vadd_vx_##_T##m1(a.val, 1<<(n-1), num), n, num))); } + +// trade efficiency for convenience +#define OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(suffix, _T, num, intrin) \ +OPENCV_HAL_IMPL_RISCVV_SHIFT_LEFT(v_##suffix##x##num, suffix, _T, num) \ +OPENCV_HAL_IMPL_RISCVV_SHIFT_RIGHT(v_##suffix##x##num, suffix, _T, num, intrin) + +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint8, u8, 16, srl) +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint16, u16, 8, srl) +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint32, u32, 4, srl) +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(uint64, u64, 2, srl) +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int8, i8, 16, sra) +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int16, i16, 8, sra) +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int32, i32, 4, sra) +OPENCV_HAL_IMPL_RISCVV_SHIFT_OP(int64, i64, 2, sra) + +#if 0 +#define VUP4(n) {0, 1, 2, 3} +#define VUP8(n) {0, 1, 2, 3, 4, 5, 6, 7} +#define VUP16(n) {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +#define VUP2(n) {0, 1} +#endif +#define OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(_Tpvec, suffix, _T, num, num2, vmv, len) \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + suffix##m1_t tmp = vmv##_##_T##m1(0, num);\ + tmp = vslideup_vx_##_T##m1_m(vmset_m_##len(num), tmp, a.val, n, num);\ + return _Tpvec(tmp);\ +} \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + return _Tpvec(vslidedown_vx_##_T##m1(a.val, n, num));\ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + suffix##m2_t tmp = vundefined_##_T##m2(); \ + tmp = vset_##_T##m2(tmp, 0, a.val); \ + tmp = vset_##_T##m2(tmp, 1, b.val); \ + tmp = vslidedown_vx_##_T##m2(tmp, n, num2);\ + return _Tpvec(vget_##_T##m2_##_T##m1(tmp, 0));\ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + suffix##m2_t tmp = vundefined_##_T##m2(); \ + tmp = vset_##_T##m2(tmp, 0, b.val); \ + tmp = vset_##_T##m2(tmp, 1, a.val); \ + tmp = vslideup_vx_##_T##m2(tmp, n, num2);\ + return _Tpvec(vget_##_T##m2_##_T##m1(tmp, 1));\ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ \ + CV_UNUSED(b); return a; \ +} + +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint8x16, vuint8, u8, 16, 32, vmv_v_x, b8) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int8x16, vint8, i8, 16, 32, vmv_v_x, b8) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint16x8, vuint16, u16, 8, 16, vmv_v_x, b16) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int16x8, vint16, i16, 8, 16, vmv_v_x, b16) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint32x4, vuint32, u32, 4, 8, vmv_v_x, b32) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int32x4, vint32, i32, 4, 8, vmv_v_x, b32) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_uint64x2, vuint64, u64, 2, 4, vmv_v_x, b64) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_int64x2, vint64, i64, 2, 4, vmv_v_x, b64) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_float32x4, vfloat32, f32, 4, 8, vfmv_v_f, b32) +OPENCV_HAL_IMPL_RISCVV_ROTATE_OP(v_float64x2, vfloat64, f64, 2, 4, vfmv_v_f, b64) + +#define OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(_Tpvec, _Tp, _Tp2, len, hnum, num) \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \ + vuint64m1_t tmp = {*(unaligned_uint64*)ptr0, *(unaligned_uint64*)ptr1};\ + return _Tpvec(_Tp2##_t(tmp)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vle_v_##len(ptr, hnum)); }\ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(vle_v_##len(ptr, num)); } \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec((_Tp2##_t)vle_v_##len((const _Tp *)ptr, num)); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ vse_v_##len(ptr, a.val, hnum);}\ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + _Tp2##_t a0 = vslidedown_vx_##len(a.val, hnum, num); \ + vse_v_##len(ptr, a0, hnum);}\ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ vse_v_##len(ptr, a.val, num); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ vse_v_##len(ptr, a.val, num); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ vse_v_##len(ptr, a.val, num); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ vse_v_##len(ptr, a.val, num); } + +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint8x16, uchar, vuint8m1, u8m1, 8, 16) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int8x16, schar, vint8m1, i8m1, 8, 16) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint16x8, ushort, vuint16m1, u16m1, 4, 8) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int16x8, short, vint16m1, i16m1, 4, 8) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint32x4, unsigned, vuint32m1, u32m1, 2, 4) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int32x4, int, vint32m1, i32m1, 2, 4) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_uint64x2, unsigned long, vuint64m1, u64m1, 1, 2) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_int64x2, long, vint64m1, i64m1, 1, 2) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_float32x4, float, vfloat32m1, f32m1, 2, 4) +OPENCV_HAL_IMPL_RISCVV_LOADSTORE_OP(v_float64x2, double, vfloat64m1, f64m1, 1, 2) + + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ +#if 1 + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + return v_int8x16(vle_v_i8m1(elems, 16)); +#else + int32xm4_t index32 = vlev_int32xm4(idx, 16); + vint16m2_t index16 = vnsra_vx_i16m2_int32xm4(index32, 0, 16); + vint8m1_t index = vnsra_vx_i8m1_i16m2(index16, 0, 16); + return v_int8x16(vlxbv_i8m1(tab, index, 16)); +#endif +} + +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx){ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + return v_int8x16(vle_v_i8m1(elems, 16)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + return v_int8x16(vle_v_i8m1(elems, 16)); +} + +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + return v_int16x8(vle_v_i16m1(elems, 8)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + return v_int16x8(vle_v_i16m1(elems, 8)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3] + }; + return v_int16x8(vle_v_i16m1(elems, 8)); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_int32x4(vle_v_i32m1(elems, 4)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + return v_int32x4(vle_v_i32m1(elems, 4)); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vle_v_i32m1(tab+idx[0], 4)); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + vint64m1_t res = {tab[idx[0]], tab[idx[1]]}; + return v_int64x2(res); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(vle_v_i64m1(tab+idx[0], 2)); +} + +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) +{ + vuint64m1_t res = {tab[idx[0]], tab[idx[1]]}; + return v_uint64x2(res); +} +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) +{ + return v_uint64x2(vle_v_u64m1(tab+idx[0], 2)); +} + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + return v_float32x4(vle_v_f32m1(elems, 4)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0]+1], + tab[idx[1]], + tab[idx[1]+1] + }; + return v_float32x4(vle_v_f32m1(elems, 4)); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + return v_float32x4(vle_v_f32m1(tab + idx[0], 4)); +} +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + vfloat64m1_t res = {tab[idx[0]], tab[idx[1]]}; + return v_float64x2(res); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(vle_v_f64m1(tab+idx[0], 2)); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idxvec.val[0]], + tab[idxvec.val[1]], + tab[idxvec.val[2]], + tab[idxvec.val[3]] + }; + return v_int32x4(vle_v_i32m1(elems, 4)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[idxvec.val[0]], + tab[idxvec.val[1]], + tab[idxvec.val[2]], + tab[idxvec.val[3]] + }; + return v_uint32x4(vle_v_u32m1(elems, 4)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idxvec.val[0]], + tab[idxvec.val[1]], + tab[idxvec.val[2]], + tab[idxvec.val[3]] + }; + return v_float32x4(vle_v_f32m1(elems, 4)); +} +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + vfloat64m1_t res = {tab[idxvec.val[0]], tab[idxvec.val[1]]}; + return v_float64x2(res); +} +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + vint32m1_t index_x = vmul_vx_i32m1(idxvec.val, 4, 4); + vint32m1_t index_y = vadd_vx_i32m1(index_x, 4, 4); + + x.val = vlxe_v_f32m1(tab, index_x, 4); + y.val = vlxe_v_f32m1(tab, index_y, 4); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} + +#define OPENCV_HAL_IMPL_RISCVV_PACKS(_Tp, _Tp2, _T2, num2, _T1, num, intrin, shr, _Type) \ +inline v_##_Tp##x##num v_pack(const v_##_Tp2##x##num2& a, const v_##_Tp2##x##num2& b) \ +{ \ + v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \ + tmp = vset_##_T2##m2(tmp, 0, a.val); \ + tmp = vset_##_T2##m2(tmp, 1, b.val); \ + return v_##_Tp##x##num(shr##_##_T1##m1(tmp, 0, num)); \ +}\ +template inline \ +v_##_Tp##x##num v_rshr_pack(const v_##_Tp2##x##num2& a, const v_##_Tp2##x##num2& b) \ +{ \ + v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \ + tmp = vset_##_T2##m2(tmp, 0, a.val); \ + tmp = vset_##_T2##m2(tmp, 1, b.val); \ + return v_##_Tp##x##num(intrin##_##_T1##m1(tmp, n, num)); \ +}\ +inline void v_pack_store(_Type* ptr, const v_##_Tp2##x##num2& a) \ +{ \ + v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \ + tmp = vset_##_T2##m2(tmp, 0, a.val); \ + tmp = vset_##_T2##m2(tmp, 1, vmv_v_x_##_T2##m1(0, num2)); \ + asm("" ::: "memory"); \ + vse_v_##_T1##m1(ptr, shr##_##_T1##m1(tmp, 0, num), num2); \ +}\ +template inline \ +void v_rshr_pack_store(_Type* ptr, const v_##_Tp2##x##num2& a) \ +{ \ + v##_Tp2##m2_t tmp = vundefined_##_T2##m2(); \ + tmp = vset_##_T2##m2(tmp, 0, a.val); \ + tmp = vset_##_T2##m2(tmp, 1, vmv_v_x_##_T2##m1(0, num2)); \ + vse_v_##_T1##m1(ptr, intrin##_##_T1##m1(tmp, n, num), num2); \ +} +OPENCV_HAL_IMPL_RISCVV_PACKS(int8, int16, i16, 8, i8, 16, vnclip_vx, vnclip_vx, signed char) +OPENCV_HAL_IMPL_RISCVV_PACKS(int16, int32, i32, 4, i16, 8, vnclip_vx, vnclip_vx, signed short) +OPENCV_HAL_IMPL_RISCVV_PACKS(int32, int64, i64, 2, i32, 4, vnclip_vx, vnsra_vx, int) +OPENCV_HAL_IMPL_RISCVV_PACKS(uint8, uint16, u16, 8, u8, 16, vnclipu_vx, vnclipu_vx, unsigned char) +OPENCV_HAL_IMPL_RISCVV_PACKS(uint16, uint32, u32, 4, u16, 8, vnclipu_vx, vnclipu_vx, unsigned short) +OPENCV_HAL_IMPL_RISCVV_PACKS(uint32, uint64, u64, 2, u32, 4, vnclipu_vx, vnsrl_vx, unsigned int) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + vuint16m2_t tmp = vundefined_u16m2(); \ + tmp = vset_u16m2(tmp, 0, a.val); \ + tmp = vset_u16m2(tmp, 1, b.val); \ + return v_uint8x16(vnsrl_vx_u8m1(tmp, 0, 16)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + vuint32m4_t vabcd = vundefined_u32m4(); \ + vuint16m2_t v16 = vundefined_u16m2(); \ + vabcd = vset_u32m4(vabcd, 0, a.val); \ + vabcd = vset_u32m4(vabcd, 1, b.val); \ + vabcd = vset_u32m4(vabcd, 2, c.val); \ + vabcd = vset_u32m4(vabcd, 3, d.val); \ + v16 = vnsrl_vx_u16m2(vabcd, 0, 16); + return v_uint8x16(vnsrl_vx_u8m1(v16, 0, 16)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + vuint64m8_t v64 = vundefined_u64m8(); \ + vuint32m4_t v32 = vundefined_u32m4(); \ + vuint16m2_t v16 = vundefined_u16m2(); \ + v64 = vset_u64m8(v64, 0, a.val); \ + v64 = vset_u64m8(v64, 1, b.val); \ + v64 = vset_u64m8(v64, 2, c.val); \ + v64 = vset_u64m8(v64, 3, d.val); \ + v64 = vset_u64m8(v64, 4, e.val); \ + v64 = vset_u64m8(v64, 5, f.val); \ + v64 = vset_u64m8(v64, 6, g.val); \ + v64 = vset_u64m8(v64, 7, h.val); \ + v32 = vnsrl_vx_u32m4(v64, 0, 16); + v16 = vnsrl_vx_u16m2(v32, 0, 16); + return v_uint8x16(vnsrl_vx_u8m1(v16, 0, 16)); +} + +//inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) \ +//{ \ +// int16xm2_u tmp; \ +// tmp.m1[0] = (vint16m1_t)a.val; \ +// tmp.m1[1] = (vint16m1_t)b.val; \ +// e8xm1_t mask = (e8xm1_t)vmsge_vx_e16xm2_i16m2(tmp.v, 0, 16);\ +// return v_uint8x16(vnclipuvi_mask_u8m1_u16m2(vmv_v_x_u8m1(0, 16), (vuint16m2_t)tmp.v, 0, mask, 16)); +//} + +#define OPENCV_HAL_IMPL_RISCVV_PACK_U(tp1, num1, tp2, num2, _Tp) \ +inline v_uint##tp1##x##num1 v_pack_u(const v_int##tp2##x##num2& a, const v_int##tp2##x##num2& b) \ +{ \ + vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \ + tmp = vset_##i##tp2##m2(tmp, 0, a.val); \ + tmp = vset_##i##tp2##m2(tmp, 1, b.val); \ + vint##tp2##m2_t val = vmax_vx_i##tp2##m2(tmp, 0, num1);\ + return v_uint##tp1##x##num1(vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val, 0, num1)); \ +} \ +inline void v_pack_u_store(_Tp* ptr, const v_int##tp2##x##num2& a) \ +{ \ + vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \ + tmp = vset_##i##tp2##m2(tmp, 0, a.val); \ + vint##tp2##m2_t val = vmax_vx_i##tp2##m2(tmp, 0, num1);\ + return vse_v_u##tp1##m1(ptr, vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val, 0, num1), num2); \ +} \ +template inline \ +v_uint##tp1##x##num1 v_rshr_pack_u(const v_int##tp2##x##num2& a, const v_int##tp2##x##num2& b) \ +{ \ + vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \ + tmp = vset_##i##tp2##m2(tmp, 0, a.val); \ + tmp = vset_##i##tp2##m2(tmp, 1, b.val); \ + vint##tp2##m2_t val = vmax_vx_i##tp2##m2(tmp, 0, num1);\ + return v_uint##tp1##x##num1(vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val, n, num1)); \ +} \ +template inline \ +void v_rshr_pack_u_store(_Tp* ptr, const v_int##tp2##x##num2& a) \ +{ \ + vint##tp2##m2_t tmp = vundefined_##i##tp2##m2(); \ + tmp = vset_##i##tp2##m2(tmp, 0, a.val); \ + vint##tp2##m2_t val_ = vmax_vx_i##tp2##m2(tmp, 0, num1);\ + vuint##tp1##m1_t val = vnclipu_vx_u##tp1##m1((vuint##tp2##m2_t)val_, n, num1); \ + return vse_v_u##tp1##m1(ptr, val, num2);\ +} +OPENCV_HAL_IMPL_RISCVV_PACK_U(8, 16, 16, 8, unsigned char ) +OPENCV_HAL_IMPL_RISCVV_PACK_U(16, 8, 32, 4, unsigned short) + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#endif + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_RISCVV_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_RISCVV_MUL_SAT(v_uint16x8, v_uint32x4) + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +static const signed char popCountTable[256] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; + +inline vuint8m1_t vcnt_u8(vuint8m1_t val){ + vuint8m1_t v0 = val & 1; + return vlxe_v_u8m1((unsigned char*)popCountTable, val >> 1, 16)+v0; +} + +inline v_uint8x16 +v_popcount(const v_uint8x16& a) +{ + return v_uint8x16(vcnt_u8(a.val)); +} + +inline v_uint8x16 +v_popcount(const v_int8x16& a) +{ + return v_uint8x16(vcnt_u8((vuint8m1_t)a.val)); +} + +inline v_uint16x8 +v_popcount(const v_uint16x8& a) +{ + vuint8m2_t tmp = vundefined_u8m2(); + tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val)); + vuint64m2_t mask = (vuint64m2_t){0x0E0C0A0806040200, 0, 0x0F0D0B0907050301, 0}; + tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \ + vuint16m2_t res = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 8); + return v_uint16x8(vget_u16m2_u16m1(res, 0)); +} + +inline v_uint16x8 +v_popcount(const v_int16x8& a) +{ + vuint8m2_t tmp = vundefined_u8m2(); + tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val)); + vuint64m2_t mask = (vuint64m2_t){0x0E0C0A0806040200, 0, 0x0F0D0B0907050301, 0}; + tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \ + vuint16m2_t res = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 8); + return v_uint16x8(vget_u16m2_u16m1(res, 0)); +} + +inline v_uint32x4 +v_popcount(const v_uint32x4& a) +{ + vuint8m2_t tmp = vundefined_u8m2(); + tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val)); + vuint64m2_t mask = (vuint64m2_t){0xFFFFFFFF0C080400, 0xFFFFFFFF0D090501, + 0xFFFFFFFF0E0A0602, 0xFFFFFFFF0F0B0703}; + tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \ + vuint16m2_t res_ = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 16); + vuint32m2_t res = vwaddu_vv_u32m2(vget_u16m2_u16m1(res_, 0), vget_u16m2_u16m1(res_, 1), 8); + return v_uint32x4(vget_u32m2_u32m1(res, 0)); +} + +inline v_uint32x4 +v_popcount(const v_int32x4& a) +{ + vuint8m2_t tmp = vundefined_u8m2(); + tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val)); + vuint64m2_t mask = (vuint64m2_t){0xFFFFFFFF0C080400, 0xFFFFFFFF0D090501, + 0xFFFFFFFF0E0A0602, 0xFFFFFFFF0F0B0703}; + tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \ + vuint16m2_t res_ = vwaddu_vv_u16m2(vget_u8m2_u8m1(tmp, 0), vget_u8m2_u8m1(tmp, 1), 16); + vuint32m2_t res = vwaddu_vv_u32m2(vget_u16m2_u16m1(res_, 0), vget_u16m2_u16m1(res_, 1), 8); + return v_uint32x4(vget_u32m2_u32m1(res, 0)); +} + +inline v_uint64x2 +v_popcount(const v_uint64x2& a) +{ + vuint8m2_t tmp = vundefined_u8m2(); + tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val)); + vuint64m2_t mask = (vuint64m2_t){0x0706050403020100, 0x0000000000000000, + 0x0F0E0D0C0B0A0908, 0x0000000000000000}; + tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \ + vuint8m1_t zero = vmv_v_x_u8m1(0, 16); + vuint8m1_t res1 = zero; + vuint8m1_t res2 = zero; + res1 = vredsum_vs_u8m1_u8m1(res1, vget_u8m2_u8m1(tmp, 0), zero, 8); + res2 = vredsum_vs_u8m1_u8m1(res2, vget_u8m2_u8m1(tmp, 1), zero, 8); + + return v_uint64x2((unsigned long)vmv_x_s_u8m1_u8(res1, 8), (unsigned long)vmv_x_s_u8m1_u8(res2, 8)); +} + +inline v_uint64x2 +v_popcount(const v_int64x2& a) +{ + vuint8m2_t tmp = vundefined_u8m2(); + tmp = vset_u8m2(tmp, 0, vcnt_u8((vuint8m1_t)a.val)); + vuint64m2_t mask = (vuint64m2_t){0x0706050403020100, 0x0000000000000000, + 0x0F0E0D0C0B0A0908, 0x0000000000000000}; + tmp = vrgather_vv_u8m2(tmp, (vuint8m2_t)mask, 32); \ + vuint8m1_t zero = vmv_v_x_u8m1(0, 16); + vuint8m1_t res1 = zero; + vuint8m1_t res2 = zero; + res1 = vredsum_vs_u8m1_u8m1(res1, vget_u8m2_u8m1(tmp, 0), zero, 8); + res2 = vredsum_vs_u8m1_u8m1(res2, vget_u8m2_u8m1(tmp, 1), zero, 8); + + return v_uint64x2((unsigned long)vmv_x_s_u8m1_u8(res1, 8), (unsigned long)vmv_x_s_u8m1_u8(res2, 8)); +} + +#define SMASK 1, 2, 4, 8, 16, 32, 64, 128 +inline int v_signmask(const v_uint8x16& a) +{ + vuint8m1_t t0 = vsrl_vx_u8m1(a.val, 7, 16); + vuint8m1_t m1 = (vuint8m1_t){SMASK, SMASK}; + vuint16m2_t t1 = vwmulu_vv_u16m2(t0, m1, 16); + vuint32m1_t res = vmv_v_x_u32m1(0, 4); + vuint32m2_t t2 = vwmulu_vx_u32m2(vget_u16m2_u16m1(t1, 1), 256, 8); + res = vredsum_vs_u32m2_u32m1(res, t2, res, 8); + res = vwredsumu_vs_u16m1_u32m1(res, vget_u16m2_u16m1(t1, 0), res, 8); + return vmv_x_s_u32m1_u32(res, 8); +} +inline int v_signmask(const v_int8x16& a) +{ + vuint8m1_t t0 = vsrl_vx_u8m1((vuint8m1_t)a.val, 7, 16); + vuint8m1_t m1 = (vuint8m1_t){SMASK, SMASK}; + vint16m2_t t1 = (vint16m2_t)vwmulu_vv_u16m2(t0, m1, 16); + vint32m1_t res = vmv_v_x_i32m1(0, 4); + vint32m2_t t2 = vwmul_vx_i32m2(vget_i16m2_i16m1(t1, 1), 256, 8); + res = vredsum_vs_i32m2_i32m1(res, t2, res, 8); + res = vwredsum_vs_i16m1_i32m1(res, vget_i16m2_i16m1(t1, 0), res, 8); + return vmv_x_s_i32m1_i32(res, 8); +} + +inline int v_signmask(const v_int16x8& a) +{ + vint16m1_t t0 = (vint16m1_t)vsrl_vx_u16m1((vuint16m1_t)a.val, 15, 8); + vint16m1_t m1 = (vint16m1_t){SMASK}; + vint16m1_t t1 = vmul_vv_i16m1(t0, m1, 8); + vint16m1_t res = vmv_v_x_i16m1(0, 8); + res = vredsum_vs_i16m1_i16m1(res, t1, res, 8); + return vmv_x_s_i16m1_i16(res, 8); +} +inline int v_signmask(const v_uint16x8& a) +{ + vint16m1_t t0 = (vint16m1_t)vsrl_vx_u16m1((vuint16m1_t)a.val, 15, 8); + vint16m1_t m1 = (vint16m1_t){SMASK}; + vint16m1_t t1 = vmul_vv_i16m1(t0, m1, 8); + vint16m1_t res = vmv_v_x_i16m1(0, 8); + res = vredsum_vs_i16m1_i16m1(res, t1, res, 8); + return vmv_x_s_i16m1_i16(res, 8); +} +inline int v_signmask(const v_int32x4& a) +{ + vint32m1_t t0 = (vint32m1_t)vsrl_vx_u32m1((vuint32m1_t)a.val, 31, 4); + vint32m1_t m1 = (vint32m1_t){1, 2, 4, 8}; + vint32m1_t res = vmv_v_x_i32m1(0, 4); + vint32m1_t t1 = vmul_vv_i32m1(t0, m1, 4); + res = vredsum_vs_i32m1_i32m1(res, t1, res, 4); + return vmv_x_s_i32m1_i32(res, 4); +} +inline int v_signmask(const v_uint32x4& a) +{ + vint32m1_t t0 = (vint32m1_t)vsrl_vx_u32m1(a.val, 31, 4); + vint32m1_t m1 = (vint32m1_t){1, 2, 4, 8}; + vint32m1_t res = vmv_v_x_i32m1(0, 4); + vint32m1_t t1 = vmul_vv_i32m1(t0, m1, 4); + res = vredsum_vs_i32m1_i32m1(res, t1, res, 4); + return vmv_x_s_i32m1_i32(res, 4); +} +inline int v_signmask(const v_uint64x2& a) +{ + vuint64m1_t v0 = vsrl_vx_u64m1(a.val, 63, 2); + int res = (int)vext_x_v_u64m1_u64(v0, 0, 2) + ((int)vext_x_v_u64m1_u64(v0, 1, 2) << 1); + return res; +} +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +inline int v_signmask(const v_float32x4& a) +{ + vint32m1_t t0 = (vint32m1_t)vsrl_vx_u32m1((vuint32m1_t)a.val, 31, 4); + vint32m1_t m1 = (vint32m1_t){1, 2, 4, 8}; + vint32m1_t res = vmv_v_x_i32m1(0, 4); + vint32m1_t t1 = vmul_vv_i32m1(t0, m1, 4); + res = vredsum_vs_i32m1_i32m1(res, t1, res, 4); + return vmv_x_s_i32m1_i32(res, 4); +} + +inline int v_scan_forward(const v_int8x16& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_uint8x16& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_int16x8& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_uint16x8& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_int32x4& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_uint32x4& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_float32x4& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_int64x2& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } +inline int v_scan_forward(const v_uint64x2& a) { +int val = v_signmask(a); +if(val==0) return 0; +else return trailingZeros32(val); } + +#define OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(_Tpvec, suffix, _T, shift, num) \ +inline bool v_check_all(const v_##_Tpvec& a) \ +{ \ + suffix##m1_t v0 = vsrl_vx_##_T(vnot_v_##_T(a.val, num), shift, num); \ + vuint64m1_t v1 = vuint64m1_t(v0); \ + return (v1[0] | v1[1]) == 0; \ +} \ +inline bool v_check_any(const v_##_Tpvec& a) \ +{ \ + suffix##m1_t v0 = vsrl_vx_##_T(a.val, shift, num); \ + vuint64m1_t v1 = vuint64m1_t(v0); \ + return (v1[0] | v1[1]) != 0; \ +} + +OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint8x16, vuint8, u8m1, 7, 16) +OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint16x8, vuint16, u16m1, 15, 8) +OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint32x4, vuint32, u32m1, 31, 4) +OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint64x2, vuint64, u64m1, 63, 2) + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } + +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +#define OPENCV_HAL_IMPL_RISCVV_SELECT(_Tpvec, suffix, _Tpvec2, num) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vmerge_vvm_##suffix(_Tpvec2(mask.val), b.val, a.val, num)); \ +} + +OPENCV_HAL_IMPL_RISCVV_SELECT(v_int8x16, i8m1, vbool8_t, 16) +OPENCV_HAL_IMPL_RISCVV_SELECT(v_int16x8, i16m1, vbool16_t, 8) +OPENCV_HAL_IMPL_RISCVV_SELECT(v_int32x4, i32m1, vbool32_t, 4) +OPENCV_HAL_IMPL_RISCVV_SELECT(v_uint8x16, u8m1, vbool8_t, 16) +OPENCV_HAL_IMPL_RISCVV_SELECT(v_uint16x8, u16m1, vbool16_t, 8) +OPENCV_HAL_IMPL_RISCVV_SELECT(v_uint32x4, u32m1, vbool32_t, 4) +inline v_float32x4 v_select(const v_float32x4& mask, const v_float32x4& a, const v_float32x4& b) +{ + return v_float32x4((vfloat32m1_t)vmerge_vvm_u32m1((vbool32_t)mask.val, (vuint32m1_t)b.val, (vuint32m1_t)a.val, 4)); +} +inline v_float64x2 v_select(const v_float64x2& mask, const v_float64x2& a, const v_float64x2& b) +{ + return v_float64x2((vfloat64m1_t)vmerge_vvm_u64m1((vbool64_t)mask.val, (vuint64m1_t)b.val, (vuint64m1_t)a.val, 2)); +} + +#define OPENCV_HAL_IMPL_RISCVV_EXPAND(add, _Tpvec, _Tpwvec, _Tp, _Tp1, num1, _Tp2, num2, _T1, _T2) \ +inline void v_expand(const _Tpvec& a, v_##_Tpwvec& b0, v_##_Tpwvec& b1) \ +{ \ + _T1##_t b = vw##add##_vv_##_Tp2##m2(a.val, vmv_v_x_##_Tp1(0, num1), num1); \ + b0.val = vget_##_Tp2##m2_##_Tp2##m1(b, 0); \ + b1.val = vget_##_Tp2##m2_##_Tp2##m1(b, 1); \ +} \ +inline v_##_Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _T1##_t b = vw##add##_vv_##_Tp2##m2(a.val, vmv_v_x_##_Tp1(0, num2), num2); \ + return v_##_Tpwvec(vget_##_Tp2##m2_##_Tp2##m1(b, 0)); \ +} \ +inline v_##_Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _T1##_t b = vw##add##_vv_##_Tp2##m2(a.val, vmv_v_x_##_Tp1(0, num1), num1); \ + return v_##_Tpwvec(vget_##_Tp2##m2_##_Tp2##m1(b, 1)); \ +} \ +inline v_##_Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + _T2##_t val = vle##_v_##_Tp1(ptr, num2); \ + _T1##_t b = vw##add##_vv_##_Tp2##m2(val, vmv_v_x_##_Tp1(0, num2), num2); \ + return v_##_Tpwvec(vget_##_Tp2##m2_##_Tp2##m1(b, 0)); \ +} + +OPENCV_HAL_IMPL_RISCVV_EXPAND(addu, v_uint8x16, uint16x8, uchar, u8m1, 16, u16, 8, vuint16m2, vuint8m1) +OPENCV_HAL_IMPL_RISCVV_EXPAND(addu, v_uint16x8, uint32x4, ushort, u16m1, 8, u32, 4, vuint32m2, vuint16m1) +OPENCV_HAL_IMPL_RISCVV_EXPAND(addu, v_uint32x4, uint64x2, uint, u32m1, 4, u64, 2, vuint64m2, vuint32m1) +OPENCV_HAL_IMPL_RISCVV_EXPAND(add, v_int8x16, int16x8, schar, i8m1, 16, i16, 8, vint16m2, vint8m1) +OPENCV_HAL_IMPL_RISCVV_EXPAND(add, v_int16x8, int32x4, short, i16m1, 8, i32, 4, vint32m2, vint16m1) +OPENCV_HAL_IMPL_RISCVV_EXPAND(add, v_int32x4, int64x2, int, i32m1, 4, i64, 2, vint64m2, vint32m1) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + vuint16m2_t b = vundefined_u16m2(); + vuint32m2_t c = vundefined_u32m2(); + vuint8m1_t val = vle_v_u8m1(ptr, 4); \ + b = vwaddu_vv_u16m2(val, vmv_v_x_u8m1(0, 4), 4); \ + c = vwaddu_vv_u32m2(vget_u16m2_u16m1(b, 0), vmv_v_x_u16m1(0, 4), 4); \ + return v_uint32x4(vget_u32m2_u32m1(c, 0)); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + vint16m2_t b = vundefined_i16m2(); + vint32m2_t c = vundefined_i32m2(); + vint8m1_t val = vle_v_i8m1(ptr, 4); \ + b = vwadd_vv_i16m2(val, vmv_v_x_i8m1(0, 4), 4); \ + c = vwadd_vv_i32m2(vget_i16m2_i16m1(b, 0), vmv_v_x_i16m1(0, 4), 4); \ + return v_int32x4(vget_i32m2_i32m1(c, 0)); +} +#define VITL_16 (vuint64m2_t){0x1303120211011000, 0x1707160615051404, 0x1B0B1A0A19091808, 0x1F0F1E0E1D0D1C0C} +#define VITL_8 (vuint64m2_t){0x0009000100080000, 0x000B0003000A0002, 0x000D0005000C0004, 0x000F0007000E0006} +#define VITL_4 (vuint64m2_t){0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003} +#define VITL_2 (vuint64m2_t){0, 2, 1, 3} +#define LOW_4 0x0000000100000000, 0x0000000500000004 +#define LOW_8 0x0003000200010000, 0x000B000A00090008 +#define LOW_16 0x0706050403020100, 0x1716151413121110 +#define HIGH_4 0x0000000300000002, 0x0000000700000006 +#define HIGH_8 0x0007000600050004, 0x000F000E000D000C +#define HIGH_16 0x0F0E0D0C0B0A0908, 0x1F1E1D1C1B1A1918 +#define OPENCV_HAL_IMPL_RISCVV_UNPACKS(_Tpvec, _Tp, _T, _UTp, _UT, num, num2, len, numh) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + v##_Tp##m2_t tmp = vundefined_##_T##m2();\ + tmp = vset_##_T##m2(tmp, 0, a0.val); \ + tmp = vset_##_T##m2(tmp, 1, a1.val); \ + vuint64m2_t mask = VITL_##num; \ + tmp = (v##_Tp##m2_t)vrgather_vv_##_T##m2((v##_Tp##m2_t)tmp, (v##_UTp##m2_t)mask, num2); \ + b0.val = vget_##_T##m2_##_T##m1(tmp, 0); \ + b1.val = vget_##_T##m2_##_T##m1(tmp, 1); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + v##_Tp##m1_t b0 = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a.val, b.val, numh, num); \ + return v_##_Tpvec(b0);\ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + v##_Tp##m1_t b0 = vslidedown_vx_##_T##m1(b.val, numh, num); \ + v##_Tp##m1_t a0 = vslidedown_vx_##_T##m1(a.val, numh, num); \ + v##_Tp##m1_t b1 = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a0, b0, numh, num); \ + return v_##_Tpvec(b1);\ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c.val = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a.val, b.val, numh, num); \ + v##_Tp##m1_t b0 = vslidedown_vx_##_T##m1(b.val, numh, num); \ + v##_Tp##m1_t a0 = vslidedown_vx_##_T##m1(a.val, numh, num); \ + d.val = vslideup_vx_##_T##m1_m(vmset_m_##len(num), a0, b0, numh, num); \ +} + +OPENCV_HAL_IMPL_RISCVV_UNPACKS(uint8x16, uint8, u8, uint8, u8, 16, 32, b8, 8) +OPENCV_HAL_IMPL_RISCVV_UNPACKS(int8x16, int8, i8, uint8, u8, 16, 32, b8, 8) +OPENCV_HAL_IMPL_RISCVV_UNPACKS(uint16x8, uint16, u16, uint16, u16, 8, 16, b16, 4) +OPENCV_HAL_IMPL_RISCVV_UNPACKS(int16x8, int16, i16, uint16, u16, 8, 16, b16, 4) +OPENCV_HAL_IMPL_RISCVV_UNPACKS(uint32x4, uint32, u32, uint32, u32, 4, 8, b32, 2) +OPENCV_HAL_IMPL_RISCVV_UNPACKS(int32x4, int32, i32, uint32, u32, 4, 8, b32, 2) +OPENCV_HAL_IMPL_RISCVV_UNPACKS(float32x4, float32, f32, uint32, u32, 4, 8, b32, 2) +OPENCV_HAL_IMPL_RISCVV_UNPACKS(float64x2, float64, f64, uint64, u64, 2, 4, b64, 1) + +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + vuint64m1_t mask = (vuint64m1_t){0x08090A0B0C0D0E0F, 0x0001020304050607}; + return v_uint8x16(vrgather_vv_u8m1(a.val, (vuint8m1_t)mask, 16)); +} +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ + vint64m1_t mask = (vint64m1_t){0x08090A0B0C0D0E0F, 0x0001020304050607}; + return v_int8x16(vrgather_vv_i8m1(a.val, (vuint8m1_t)mask, 16)); +} + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + vuint64m1_t mask = (vuint64m1_t){0x0004000500060007, 0x000000100020003}; + return v_uint16x8(vrgather_vv_u16m1(a.val, (vuint16m1_t)mask, 8)); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ + vint64m1_t mask = (vint64m1_t){0x0004000500060007, 0x000000100020003}; + return v_int16x8(vrgather_vv_i16m1(a.val, (vuint16m1_t)mask, 8)); +} +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + return v_uint32x4(vrgather_vv_u32m1(a.val, (vuint32m1_t){3, 2, 1, 0}, 4)); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ + return v_int32x4(vrgather_vv_i32m1(a.val, (vuint32m1_t){3, 2, 1, 0}, 4)); +} + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + return v_uint64x2(a.val[1], a.val[0]); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ + return v_int64x2(a.val[1], a.val[0]); +} + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ + return v_float64x2(a.val[1], a.val[0]); +} + +#define OPENCV_HAL_IMPL_RISCVV_EXTRACT(_Tpvec, suffix, size) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ return v_rotate_right(a, b);} +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint8x16, u8, 0) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int8x16, s8, 0) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint16x8, u16, 1) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int16x8, s16, 1) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint32x4, u32, 2) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int32x4, s32, 2) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_uint64x2, u64, 3) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_int64x2, s64, 3) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_float32x4, f32, 2) +OPENCV_HAL_IMPL_RISCVV_EXTRACT(v_float64x2, f64, 3) + + +#define OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(_Tpvec, _Tp, suffix) \ +template inline _Tp v_extract_n(_Tpvec v) { return v.val[i]; } + +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int16x8, short, s16) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint32x4, uint, u32) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int32x4, int, s32) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_float32x4, float, f32) +OPENCV_HAL_IMPL_RISCVV_EXTRACT_N(v_float64x2, double, f64) + +#define OPENCV_HAL_IMPL_RISCVV_BROADCAST(_Tpvec, _Tp, num) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) { return _Tpvec(vrgather_vx_##_Tp##m1(v.val, i, num)); } + +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint8x16, u8, 16) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int8x16, i8, 16) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint16x8, u16, 8) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int16x8, i16, 8) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint32x4, u32, 4) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int32x4, i32, 4) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_uint64x2, u64, 2) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_int64x2, i64, 2) +OPENCV_HAL_IMPL_RISCVV_BROADCAST(v_float32x4, f32, 4) +inline v_int32x4 v_round(const v_float32x4& a) +{ + __builtin_riscv_fsrm(0); + vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4); + vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4); + vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} +inline v_int32x4 v_floor(const v_float32x4& a) +{ + __builtin_riscv_fsrm(2); + vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4); + vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4); + vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + __builtin_riscv_fsrm(3); + vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4); + vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4); + vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + __builtin_riscv_fsrm(1); + vint32m1_t nan = vand_vx_i32m1((vint32m1_t)a.val, 0x7f800000, 4); + vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4); + vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), a.val, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} + +inline v_int32x4 v_round(const v_float64x2& a) +{ + __builtin_riscv_fsrm(0); + vfloat64m2_t _val = vundefined_f64m2(); + _val = vset_f64m2(_val, 0, a.val); + //_val = vset_f64m2(_val, 1, a.val); + _val = vset_f64m2(_val, 1, vfmv_v_f_f64m1(0, 2)); + vint32m1_t val = vfncvt_x_f_v_i32m1(_val, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + __builtin_riscv_fsrm(0); + vfloat64m2_t _val = vundefined_f64m2(); + _val = vset_f64m2(_val, 0, a.val); + _val = vset_f64m2(_val, 1, b.val); + vint32m1_t val = vfncvt_x_f_v_i32m1(_val, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} +inline v_int32x4 v_floor(const v_float64x2& a) +{ + __builtin_riscv_fsrm(2); + vfloat64m2_t _val = vundefined_f64m2(); + _val = vset_f64m2(_val, 0, a.val); + vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2); + + vint32m1_t nan = vand_vx_i32m1((vint32m1_t)aval, 0x7f800000, 4); + vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4); + vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), aval, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + __builtin_riscv_fsrm(3); + vfloat64m2_t _val = vundefined_f64m2(); + _val = vset_f64m2(_val, 0, a.val); + vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2); + + vint32m1_t nan = vand_vx_i32m1((vint32m1_t)aval, 0x7f800000, 4); + vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4); + vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), aval, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + __builtin_riscv_fsrm(1); + vfloat64m2_t _val = vundefined_f64m2(); + _val = vset_f64m2(_val, 0, a.val); + vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2); + + vint32m1_t nan = vand_vx_i32m1((vint32m1_t)aval, 0x7f800000, 4); + vbool32_t mask = vmsne_vx_i32m1_b32(nan, 0x7f800000, 4); + vint32m1_t val = vfcvt_x_f_v_i32m1_m(mask, vmv_v_x_i32m1(0, 4), aval, 4); + __builtin_riscv_fsrm(0); + return v_int32x4(val); +} + +#define OPENCV_HAL_IMPL_RISCVV_LOAD_DEINTERLEAVED(intrin, _Tpvec, num, _Tp, _T) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b) \ +{ \ + v##_Tpvec##m1x2_t ret = intrin##2e_v_##_T##m1x2(ptr, num);\ + a.val = vget_##_T##m1x2_##_T##m1(ret, 0); \ + b.val = vget_##_T##m1x2_##_T##m1(ret, 1); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, v_##_Tpvec##x##num& c) \ +{ \ + v##_Tpvec##m1x3_t ret = intrin##3e_v_##_T##m1x3(ptr, num);\ + a.val = vget_##_T##m1x3_##_T##m1(ret, 0); \ + b.val = vget_##_T##m1x3_##_T##m1(ret, 1); \ + c.val = vget_##_T##m1x3_##_T##m1(ret, 2); \ +}\ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, \ + v_##_Tpvec##x##num& c, v_##_Tpvec##x##num& d) \ +{ \ + v##_Tpvec##m1x4_t ret = intrin##4e_v_##_T##m1x4(ptr, num);\ + a.val = vget_##_T##m1x4_##_T##m1(ret, 0); \ + b.val = vget_##_T##m1x4_##_T##m1(ret, 1); \ + c.val = vget_##_T##m1x4_##_T##m1(ret, 2); \ + d.val = vget_##_T##m1x4_##_T##m1(ret, 3); \ +} \ + +#define OPENCV_HAL_IMPL_RISCVV_STORE_INTERLEAVED(intrin, _Tpvec, num, _Tp, _T) \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + v##_Tpvec##m1x2_t ret = vundefined_##_T##m1x2(); \ + ret = vset_##_T##m1x2(ret, 0, a.val); \ + ret = vset_##_T##m1x2(ret, 1, b.val); \ + intrin##2e_v_##_T##m1x2(ptr, ret, num); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \ + const v_##_Tpvec##x##num& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + v##_Tpvec##m1x3_t ret = vundefined_##_T##m1x3(); \ + ret = vset_##_T##m1x3(ret, 0, a.val); \ + ret = vset_##_T##m1x3(ret, 1, b.val); \ + ret = vset_##_T##m1x3(ret, 2, c.val); \ + intrin##3e_v_##_T##m1x3(ptr, ret, num); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \ + const v_##_Tpvec##x##num& c, const v_##_Tpvec##x##num& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + v##_Tpvec##m1x4_t ret = vundefined_##_T##m1x4(); \ + ret = vset_##_T##m1x4(ret, 0, a.val); \ + ret = vset_##_T##m1x4(ret, 1, b.val); \ + ret = vset_##_T##m1x4(ret, 2, c.val); \ + ret = vset_##_T##m1x4(ret, 3, d.val); \ + intrin##4e_v_##_T##m1x4(ptr, ret, num); \ +} + +#define OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(_Tpvec, _Tp, num, ld, st, _T) \ +OPENCV_HAL_IMPL_RISCVV_LOAD_DEINTERLEAVED(ld, _Tpvec, num, _Tp, _T) \ +OPENCV_HAL_IMPL_RISCVV_STORE_INTERLEAVED(st, _Tpvec, num, _Tp, _T) + +//OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint8, uchar, ) +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(int8, schar, 16, vlseg, vsseg, i8) +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(int16, short, 8, vlseg, vsseg, i16) +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(int32, int, 4, vlseg, vsseg, i32) + +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint8, unsigned char, 16, vlseg, vsseg, u8) +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint16, unsigned short, 8, vlseg, vsseg, u16) +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED(uint32, unsigned int, 4, vlseg, vsseg, u32) + +#define OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(_Tpvec, _Tp, num, _T) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b) \ +{ \ + v##_Tpvec##m1x2_t ret = vlseg2e_v_##_T##m1x2(ptr, num); \ + a.val = vget_##_T##m1x2_##_T##m1(ret, 0); \ + b.val = vget_##_T##m1x2_##_T##m1(ret, 1); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, v_##_Tpvec##x##num& c) \ +{ \ + v##_Tpvec##m1x3_t ret = vlseg3e_v_##_T##m1x3(ptr, num); \ + a.val = vget_##_T##m1x3_##_T##m1(ret, 0); \ + b.val = vget_##_T##m1x3_##_T##m1(ret, 1); \ + c.val = vget_##_T##m1x3_##_T##m1(ret, 2); \ +}\ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec##x##num& a, v_##_Tpvec##x##num& b, \ + v_##_Tpvec##x##num& c, v_##_Tpvec##x##num& d) \ +{ \ + v##_Tpvec##m1x4_t ret = vlseg4e_v_##_T##m1x4(ptr, num); \ + a.val = vget_##_T##m1x4_##_T##m1(ret, 0); \ + b.val = vget_##_T##m1x4_##_T##m1(ret, 1); \ + c.val = vget_##_T##m1x4_##_T##m1(ret, 2); \ + d.val = vget_##_T##m1x4_##_T##m1(ret, 3); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + v##_Tpvec##m1x2_t ret = vundefined_##_T##m1x2(); \ + ret = vset_##_T##m1x2(ret, 0, a.val); \ + ret = vset_##_T##m1x2(ret, 1, b.val); \ + vsseg2e_v_##_T##m1x2(ptr, ret, num); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \ + const v_##_Tpvec##x##num& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + v##_Tpvec##m1x3_t ret = vundefined_##_T##m1x3(); \ + ret = vset_##_T##m1x3(ret, 0, a.val); \ + ret = vset_##_T##m1x3(ret, 1, b.val); \ + ret = vset_##_T##m1x3(ret, 2, c.val); \ + vsseg3e_v_##_T##m1x3(ptr, ret, num); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec##x##num& a, const v_##_Tpvec##x##num& b, \ + const v_##_Tpvec##x##num& c, const v_##_Tpvec##x##num& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + v##_Tpvec##m1x4_t ret = vundefined_##_T##m1x4(); \ + ret = vset_##_T##m1x4(ret, 0, a.val); \ + ret = vset_##_T##m1x4(ret, 1, b.val); \ + ret = vset_##_T##m1x4(ret, 2, c.val); \ + ret = vset_##_T##m1x4(ret, 3, d.val); \ + vsseg4e_v_##_T##m1x4(ptr, ret, num); \ +} +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(float32, float, 4, f32) +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(float64, double, 2, f64) + +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(uint64, unsigned long, 2, u64) +OPENCV_HAL_IMPL_RISCVV_INTERLEAVED_(int64, long, 2, i64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(vfcvt_f_x_v_f32m1(a.val, 4)); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + vfloat64m2_t _val = vundefined_f64m2(); + _val = vset_f64m2(_val, 0, a.val); + vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 2); + return v_float32x4(aval); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m2_t _val = vundefined_f64m2(); + _val = vset_f64m2(_val, 0, a.val); + _val = vset_f64m2(_val, 1, b.val); + vfloat32m1_t aval = vfncvt_f_f_v_f32m1(_val, 4); + return v_float32x4(aval); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + vfloat32m1_t val = vfcvt_f_x_v_f32m1(a.val, 4); + vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(val, 4); + return v_float64x2(vget_f64m2_f64m1(_val, 0)); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + vfloat32m1_t val = vfcvt_f_x_v_f32m1(a.val, 4); + vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(val, 4); + return v_float64x2(vget_f64m2_f64m1(_val, 1)); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(a.val, 4); + return v_float64x2(vget_f64m2_f64m1(_val, 0)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + vfloat64m2_t _val = vfwcvt_f_f_v_f64m2(a.val, 4); + return v_float64x2(vget_f64m2_f64m1(_val, 1)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + return v_float64x2(vfcvt_f_x_v_f64m1(a.val, 2)); +} + +#endif +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + vuint64m1_t m0 = {0x0705060403010200, 0x0F0D0E0C0B090A08}; + return v_int8x16(vrgather_vv_i8m1(vec.val, (vuint8m1_t)m0, 16)); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) +{ + return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); +} + +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + vuint64m1_t m0 = {0x0703060205010400, 0x0F0B0E0A0D090C08}; + return v_int8x16(vrgather_vv_i8m1(vec.val, (vuint8m1_t)m0, 16)); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) +{ + return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); +} + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + vuint64m1_t m0 = {0x0706030205040100, 0x0F0E0B0A0D0C0908}; + return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vec.val, (vuint8m1_t)m0, 16)); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + vuint64m1_t m0 = {0x0B0A030209080100, 0x0F0E07060D0C0504}; + return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16)); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + vuint64m1_t m0 = {0x0B0A090803020100, 0x0F0E0D0C07060504}; + return v_int32x4((vint32m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16)); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + vuint64m1_t m0 = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + vuint64m1_t m0 = {0x0908050403020100, 0xFFFFFFFF0D0C0B0A}; + return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)(vec.val), (vuint8m1_t)m0, 16)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ + vint64m2_t v1 = vwmul_vv_i64m2(a.val, b.val, 4); + vfloat64m1_t res = vfcvt_f_x_v_f64m1(vadd_vv_i64m1(vget_i64m2_i64m1(v1, 0), vget_i64m2_i64m1(v1, 1), 2), 2); + return v_float64x2(res); +} +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ v_float64x2 res = v_dotprod_expand_fast(a, b); + return res + c; } +#endif +////// FP16 support /////// +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + vfloat16m1_t v = vle_v_f16m1((__fp16*)ptr, 4); + vfloat32m2_t v32 = vfwcvt_f_f_v_f32m2(v, 4); + return v_float32x4(vget_f32m2_f32m1(v32, 0)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + vfloat32m2_t v32 = vundefined_f32m2(); + v32 = vset_f32m2(v32, 0, v.val); + vfloat16m1_t hv = vfncvt_f_f_v_f16m1(v32, 4); + vse_v_f16m1((__fp16*)ptr, hv, 4); +} + + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_sse.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_sse.hpp new file mode 100644 index 0000000..443ee16 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_sse.hpp @@ -0,0 +1,3467 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_SSE_HPP +#define OPENCV_HAL_SSE_HPP + +#include +#include "opencv2/core/utility.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +#define CV_SIMD128_FP16 0 // no native operations with FP16 type. + +namespace cv +{ + +//! @cond IGNORED + +// +// Compilation troubleshooting: +// - MSVC: error C2719: 'a': formal parameter with requested alignment of 16 won't be aligned +// Replace parameter declaration to const reference: +// -v_int32x4 a +// +const v_int32x4& a +// + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + typedef __m128i vector_type; + enum { nlanes = 16 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint8x16() {} + explicit v_uint8x16(__m128i v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3, + (char)v4, (char)v5, (char)v6, (char)v7, + (char)v8, (char)v9, (char)v10, (char)v11, + (char)v12, (char)v13, (char)v14, (char)v15); + } + + uchar get0() const + { + return (uchar)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + typedef __m128i vector_type; + enum { nlanes = 16 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int8x16() {} + explicit v_int8x16(__m128i v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3, + (char)v4, (char)v5, (char)v6, (char)v7, + (char)v8, (char)v9, (char)v10, (char)v11, + (char)v12, (char)v13, (char)v14, (char)v15); + } + + schar get0() const + { + return (schar)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + typedef __m128i vector_type; + enum { nlanes = 8 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint16x8() {} + explicit v_uint16x8(__m128i v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3, + (short)v4, (short)v5, (short)v6, (short)v7); + } + + ushort get0() const + { + return (ushort)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + typedef __m128i vector_type; + enum { nlanes = 8 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int16x8() {} + explicit v_int16x8(__m128i v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3, + (short)v4, (short)v5, (short)v6, (short)v7); + } + + short get0() const + { + return (short)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + typedef __m128i vector_type; + enum { nlanes = 4 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint32x4() {} + explicit v_uint32x4(__m128i v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + val = _mm_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3); + } + + unsigned get0() const + { + return (unsigned)_mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + typedef __m128i vector_type; + enum { nlanes = 4 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int32x4() {} + explicit v_int32x4(__m128i v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + val = _mm_setr_epi32(v0, v1, v2, v3); + } + + int get0() const + { + return _mm_cvtsi128_si32(val); + } + + __m128i val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + typedef __m128 vector_type; + enum { nlanes = 4 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_float32x4() {} + explicit v_float32x4(__m128 v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + val = _mm_setr_ps(v0, v1, v2, v3); + } + + float get0() const + { + return _mm_cvtss_f32(val); + } + + __m128 val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + typedef __m128i vector_type; + enum { nlanes = 2 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_uint64x2() {} + explicit v_uint64x2(__m128i v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { +#if defined(_MSC_VER) && _MSC_VER >= 1920/*MSVS 2019*/ && defined(_M_X64) && !defined(__clang__) + val = _mm_setr_epi64x((int64_t)v0, (int64_t)v1); +#elif defined(__GNUC__) + val = _mm_setr_epi64((__m64)v0, (__m64)v1); +#else + val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32)); +#endif + } + + uint64 get0() const + { + #if !defined(__x86_64__) && !defined(_M_X64) + int a = _mm_cvtsi128_si32(val); + int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32)); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #else + return (uint64)_mm_cvtsi128_si64(val); + #endif + } + + __m128i val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + typedef __m128i vector_type; + enum { nlanes = 2 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_int64x2() {} + explicit v_int64x2(__m128i v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { +#if defined(_MSC_VER) && _MSC_VER >= 1920/*MSVS 2019*/ && defined(_M_X64) && !defined(__clang__) + val = _mm_setr_epi64x((int64_t)v0, (int64_t)v1); +#elif defined(__GNUC__) + val = _mm_setr_epi64((__m64)v0, (__m64)v1); +#else + val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32)); +#endif + } + + int64 get0() const + { + #if !defined(__x86_64__) && !defined(_M_X64) + int a = _mm_cvtsi128_si32(val); + int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32)); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #else + return _mm_cvtsi128_si64(val); + #endif + } + + __m128i val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + typedef __m128d vector_type; + enum { nlanes = 2 }; + + /* coverity[uninit_ctor]: suppress warning */ + v_float64x2() {} + explicit v_float64x2(__m128d v) : val(v) {} + v_float64x2(double v0, double v1) + { + val = _mm_setr_pd(v0, v1); + } + + double get0() const + { + return _mm_cvtsd_f64(val); + } + + __m128d val; +}; + +namespace hal_sse_internal +{ + template + to_sse_type v_sse_reinterpret_as(const from_sse_type& val); + +#define OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(to_sse_type, from_sse_type, sse_cast_intrin) \ + template<> inline \ + to_sse_type v_sse_reinterpret_as(const from_sse_type& a) \ + { return sse_cast_intrin(a); } + + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128i, OPENCV_HAL_NOP) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128, _mm_castps_si128) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128d, _mm_castpd_si128) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128i, _mm_castsi128_ps) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128, OPENCV_HAL_NOP) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128d, _mm_castpd_ps) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128i, _mm_castsi128_pd) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128, _mm_castps_pd) + OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128d, OPENCV_HAL_NOP) +} + +#define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ +{ return _Tpvec(cast(a.val)); } + +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, schar, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, schar, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int32x4, int, s32, si128, epi32, int, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_float32x4, float, f32, ps, ps, float, _mm_castsi128_ps) +OPENCV_HAL_IMPL_SSE_INITVEC(v_float64x2, double, f64, pd, pd, double, _mm_castsi128_pd) + +inline v_uint64x2 v_setzero_u64() { return v_uint64x2(_mm_setzero_si128()); } +inline v_int64x2 v_setzero_s64() { return v_int64x2(_mm_setzero_si128()); } +inline v_uint64x2 v_setall_u64(uint64 val) { return v_uint64x2(val, val); } +inline v_int64x2 v_setall_s64(int64 val) { return v_int64x2(val, val); } + +template inline +v_uint64x2 v_reinterpret_as_u64(const _Tpvec& a) { return v_uint64x2(a.val); } +template inline +v_int64x2 v_reinterpret_as_s64(const _Tpvec& a) { return v_int64x2(a.val); } +inline v_float32x4 v_reinterpret_as_f32(const v_uint64x2& a) +{ return v_float32x4(_mm_castsi128_ps(a.val)); } +inline v_float32x4 v_reinterpret_as_f32(const v_int64x2& a) +{ return v_float32x4(_mm_castsi128_ps(a.val)); } +inline v_float64x2 v_reinterpret_as_f64(const v_uint64x2& a) +{ return v_float64x2(_mm_castsi128_pd(a.val)); } +inline v_float64x2 v_reinterpret_as_f64(const v_int64x2& a) +{ return v_float64x2(_mm_castsi128_pd(a.val)); } + +#define OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(_Tpvec, suffix) \ +inline _Tpvec v_reinterpret_as_##suffix(const v_float32x4& a) \ +{ return _Tpvec(_mm_castps_si128(a.val)); } \ +inline _Tpvec v_reinterpret_as_##suffix(const v_float64x2& a) \ +{ return _Tpvec(_mm_castpd_si128(a.val)); } + +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint8x16, u8) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int8x16, s8) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint16x8, u16) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int16x8, s16) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint32x4, u32) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int32x4, s32) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint64x2, u64) +OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int64x2, s64) + +inline v_float32x4 v_reinterpret_as_f32(const v_float32x4& a) {return a; } +inline v_float64x2 v_reinterpret_as_f64(const v_float64x2& a) {return a; } +inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& a) {return v_float32x4(_mm_castpd_ps(a.val)); } +inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& a) {return v_float64x2(_mm_castps_pd(a.val)); } + +//////////////// PACK /////////////// +inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + __m128i delta = _mm_set1_epi16(255); + return v_uint8x16(_mm_packus_epi16(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta)), + _mm_subs_epu16(b.val, _mm_subs_epu16(b.val, delta)))); +} + +inline void v_pack_store(uchar* ptr, const v_uint16x8& a) +{ + __m128i delta = _mm_set1_epi16(255); + __m128i a1 = _mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta)); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1)); +} + +inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) +{ return v_uint8x16(_mm_packus_epi16(a.val, b.val)); } + +inline void v_pack_u_store(uchar* ptr, const v_int16x8& a) +{ _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a.val, a.val)); } + +template inline +v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + return v_uint8x16(_mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(a.val, delta), n), + _mm_srli_epi16(_mm_adds_epu16(b.val, delta), n))); +} + +template inline +void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a) +{ + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + __m128i a1 = _mm_srli_epi16(_mm_adds_epu16(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1)); +} + +template inline +v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + return v_uint8x16(_mm_packus_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n), + _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n))); +} + +template inline +void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1)); +} + +inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) +{ return v_int8x16(_mm_packs_epi16(a.val, b.val)); } + +inline void v_pack_store(schar* ptr, const v_int16x8& a) +{ _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a.val, a.val)); } + +template inline +v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + return v_int8x16(_mm_packs_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n), + _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n))); +} +template inline +void v_rshr_pack_store(schar* ptr, const v_int16x8& a) +{ + // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers. + __m128i delta = _mm_set1_epi16((short)(1 << (n-1))); + __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a1, a1)); +} + + +// byte-wise "mask ? a : b" +inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b) +{ +#if CV_SSE4_1 + return _mm_blendv_epi8(b, a, mask); +#else + return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask)); +#endif +} + +inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) +{ return v_uint16x8(_v128_packs_epu32(a.val, b.val)); } + +inline void v_pack_store(ushort* ptr, const v_uint32x4& a) +{ + __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32); + __m128i r = _mm_packs_epi32(a1, a1); + _mm_storel_epi64((__m128i*)ptr, _mm_sub_epi16(r, _mm_set1_epi16(-32768))); +} + +template inline +v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i b1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(b.val, delta), n), delta32); + return v_uint16x8(_mm_sub_epi16(_mm_packs_epi32(a1, b1), _mm_set1_epi16(-32768))); +} + +template inline +void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + _mm_storel_epi64((__m128i*)ptr, a2); +} + +inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_uint16x8(_mm_packus_epi32(a.val, b.val)); +#else + __m128i delta32 = _mm_set1_epi32(32768); + + // preliminary saturate negative values to zero + __m128i a1 = _mm_and_si128(a.val, _mm_cmpgt_epi32(a.val, _mm_set1_epi32(0))); + __m128i b1 = _mm_and_si128(b.val, _mm_cmpgt_epi32(b.val, _mm_set1_epi32(0))); + + __m128i r = _mm_packs_epi32(_mm_sub_epi32(a1, delta32), _mm_sub_epi32(b1, delta32)); + return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768))); +#endif +} + +inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) +{ +#if CV_SSE4_1 + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a.val, a.val)); +#else + __m128i delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(a.val, delta32); + __m128i r = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + _mm_storel_epi64((__m128i*)ptr, r); +#endif +} + +template inline +v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + __m128i delta = _mm_set1_epi32(1 << (n - 1)); + return v_uint16x8(_mm_packus_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), + _mm_srai_epi32(_mm_add_epi32(b.val, delta), n))); +#else + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + __m128i b1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(b.val, delta), n), delta32); + __m128i b2 = _mm_sub_epi16(_mm_packs_epi32(b1, b1), _mm_set1_epi16(-32768)); + return v_uint16x8(_mm_unpacklo_epi64(a2, b2)); +#endif +} + +template inline +void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) +{ +#if CV_SSE4_1 + __m128i delta = _mm_set1_epi32(1 << (n - 1)); + __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a1, a1)); +#else + __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768); + __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32); + __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768)); + _mm_storel_epi64((__m128i*)ptr, a2); +#endif +} + +inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) +{ return v_int16x8(_mm_packs_epi32(a.val, b.val)); } + +inline void v_pack_store(short* ptr, const v_int32x4& a) +{ + _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a.val, a.val)); +} + +template inline +v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)); + return v_int16x8(_mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), + _mm_srai_epi32(_mm_add_epi32(b.val, delta), n))); +} + +template inline +void v_rshr_pack_store(short* ptr, const v_int32x4& a) +{ + __m128i delta = _mm_set1_epi32(1 << (n-1)); + __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n); + _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a1, a1)); +} + + +// [a0 0 | b0 0] [a1 0 | b1 0] +inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0 + return v_uint32x4(_mm_unpacklo_epi32(v0, v1)); +} + +inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a1); +} + +// [a0 0 | b0 0] [a1 0 | b1 0] +inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b) +{ + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0 + return v_int32x4(_mm_unpacklo_epi32(v0, v1)); +} + +inline void v_pack_store(int* ptr, const v_int64x2& a) +{ + __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a1); +} + +template inline +v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + uint64 delta = (uint64)1 << (n-1); + v_uint64x2 delta2(delta, delta); + __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i b1 = _mm_srli_epi64(_mm_add_epi64(b.val, delta2.val), n); + __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0 + return v_uint32x4(_mm_unpacklo_epi32(v0, v1)); +} + +template inline +void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + uint64 delta = (uint64)1 << (n-1); + v_uint64x2 delta2(delta, delta); + __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a2); +} + +inline __m128i v_sign_epi64(__m128i a) +{ + return _mm_shuffle_epi32(_mm_srai_epi32(a, 31), _MM_SHUFFLE(3, 3, 1, 1)); // x m0 | x m1 +} + +inline __m128i v_srai_epi64(__m128i a, int imm) +{ + __m128i smask = v_sign_epi64(a); + return _mm_xor_si128(_mm_srli_epi64(_mm_xor_si128(a, smask), imm), smask); +} + +template inline +v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) +{ + int64 delta = (int64)1 << (n-1); + v_int64x2 delta2(delta, delta); + __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i b1 = v_srai_epi64(_mm_add_epi64(b.val, delta2.val), n); + __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0 + __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0 + return v_int32x4(_mm_unpacklo_epi32(v0, v1)); +} + +template inline +void v_rshr_pack_store(int* ptr, const v_int64x2& a) +{ + int64 delta = (int64)1 << (n-1); + v_int64x2 delta2(delta, delta); + __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n); + __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0)); + _mm_storel_epi64((__m128i*)ptr, a2); +} + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + __m128i ab = _mm_packs_epi16(a.val, b.val); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + __m128i ab = _mm_packs_epi32(a.val, b.val); + __m128i cd = _mm_packs_epi32(c.val, d.val); + return v_uint8x16(_mm_packs_epi16(ab, cd)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + __m128i ab = _mm_packs_epi32(a.val, b.val); + __m128i cd = _mm_packs_epi32(c.val, d.val); + __m128i ef = _mm_packs_epi32(e.val, f.val); + __m128i gh = _mm_packs_epi32(g.val, h.val); + + __m128i abcd = _mm_packs_epi32(ab, cd); + __m128i efgh = _mm_packs_epi32(ef, gh); + return v_uint8x16(_mm_packs_epi16(abcd, efgh)); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val); + __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val); + __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val); + __m128 v3 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(3, 3, 3, 3)), m3.val); + + return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, v3))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val); + __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val); + __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val); + + return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, a.val))); +} + +#define OPENCV_HAL_IMPL_SSE_BIN_OP(bin_op, _Tpvec, intrin) \ + inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ + { \ + return _Tpvec(intrin(a.val, b.val)); \ + } \ + inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ + { \ + a.val = intrin(a.val, b.val); \ + return a; \ + } + +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint8x16, _mm_adds_epu8) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint8x16, _mm_subs_epu8) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int8x16, _mm_adds_epi8) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int8x16, _mm_subs_epi8) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint16x8, _mm_adds_epu16) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint16x8, _mm_subs_epu16) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int16x8, _mm_adds_epi16) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int16x8, _mm_subs_epi16) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint32x4, _mm_add_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint32x4, _mm_sub_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint32x4, _v128_mullo_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int32x4, _mm_add_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int32x4, _mm_sub_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int32x4, _v128_mullo_epi32) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float32x4, _mm_add_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float32x4, _mm_sub_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float32x4, _mm_mul_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float32x4, _mm_div_ps) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float64x2, _mm_add_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float64x2, _mm_sub_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float64x2, _mm_mul_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float64x2, _mm_div_pd) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint64x2, _mm_add_epi64) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint64x2, _mm_sub_epi64) +OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int64x2, _mm_add_epi64) +OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64) + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_SSE_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int16x8, v_int32x4) + +// Multiply and expand +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v_uint16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v_int16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + __m128i v0 = _mm_mullo_epi16(a.val, b.val); + __m128i v1 = _mm_mulhi_epi16(a.val, b.val); + c.val = _mm_unpacklo_epi16(v0, v1); + d.val = _mm_unpackhi_epi16(v0, v1); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + __m128i v0 = _mm_mullo_epi16(a.val, b.val); + __m128i v1 = _mm_mulhi_epu16(a.val, b.val); + c.val = _mm_unpacklo_epi16(v0, v1); + d.val = _mm_unpackhi_epi16(v0, v1); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + __m128i c0 = _mm_mul_epu32(a.val, b.val); + __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + c.val = _mm_unpacklo_epi64(c0, c1); + d.val = _mm_unpackhi_epi64(c0, c1); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { return v_int16x8(_mm_mulhi_epi16(a.val, b.val)); } +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { return v_uint16x8(_mm_mulhi_epu16(a.val, b.val)); } + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(_mm_madd_epi16(a.val, b.val)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + __m128i even = _mm_mul_epi32(a.val, b.val); + __m128i odd = _mm_mul_epi32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + return v_int64x2(_mm_add_epi64(even, odd)); +#else + __m128i even_u = _mm_mul_epu32(a.val, b.val); + __m128i odd_u = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32)); + // convert unsigned to signed high multiplication (from: Agner Fog(veclib) and H S Warren: Hacker's delight, 2003, p. 132) + __m128i a_sign = _mm_srai_epi32(a.val, 31); + __m128i b_sign = _mm_srai_epi32(b.val, 31); + // |x * sign of x + __m128i axb = _mm_and_si128(a.val, b_sign); + __m128i bxa = _mm_and_si128(b.val, a_sign); + // sum of sign corrections + __m128i ssum = _mm_add_epi32(bxa, axb); + __m128i even_ssum = _mm_slli_epi64(ssum, 32); + __m128i odd_ssum = _mm_and_si128(ssum, _mm_set_epi32(-1, 0, -1, 0)); + // convert to signed and prod + return v_int64x2(_mm_add_epi64(_mm_sub_epi64(even_u, even_ssum), _mm_sub_epi64(odd_u, odd_ssum))); +#endif +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i a0 = _mm_srli_epi16(_mm_slli_si128(a.val, 1), 8); // even + __m128i a1 = _mm_srli_epi16(a.val, 8); // odd + __m128i b0 = _mm_srli_epi16(_mm_slli_si128(b.val, 1), 8); + __m128i b1 = _mm_srli_epi16(b.val, 8); + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_uint32x4(_mm_add_epi32(p0, p1)); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + __m128i a0 = _mm_srai_epi16(_mm_slli_si128(a.val, 1), 8); // even + __m128i a1 = _mm_srai_epi16(a.val, 8); // odd + __m128i b0 = _mm_srai_epi16(_mm_slli_si128(b.val, 1), 8); + __m128i b1 = _mm_srai_epi16(b.val, 8); + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_int32x4(_mm_add_epi32(p0, p1)); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 c, d; + v_mul_expand(a, b, c, d); + + v_uint64x2 c0, c1, d0, d1; + v_expand(c, c0, c1); + v_expand(d, d0, d1); + + c0 += c1; d0 += d1; + return v_uint64x2(_mm_add_epi64( + _mm_unpacklo_epi64(c0.val, d0.val), + _mm_unpackhi_epi64(c0.val, d0.val) + )); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return v_int64x2(_mm_add_epi64( + _mm_unpacklo_epi64(c.val, d.val), + _mm_unpackhi_epi64(c.val, d.val) + )); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_cvt_f64(v_dotprod(a, b)); +#else + v_float64x2 c = v_cvt_f64(a) * v_cvt_f64(b); + v_float64x2 d = v_cvt_f64_high(a) * v_cvt_f64_high(b); + + return v_float64x2(_mm_add_pd( + _mm_unpacklo_pd(c.val, d.val), + _mm_unpackhi_pd(c.val, d.val) + )); +#endif +} +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod_fast(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i a0 = v_expand_low(a).val; + __m128i a1 = v_expand_high(a).val; + __m128i b0 = v_expand_low(b).val; + __m128i b1 = v_expand_high(b).val; + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_uint32x4(_mm_add_epi32(p0, p1)); +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_SSE4_1 + __m128i a0 = _mm_cvtepi8_epi16(a.val); + __m128i a1 = v_expand_high(a).val; + __m128i b0 = _mm_cvtepi8_epi16(b.val); + __m128i b1 = v_expand_high(b).val; + __m128i p0 = _mm_madd_epi16(a0, b0); + __m128i p1 = _mm_madd_epi16(a1, b1); + return v_int32x4(_mm_add_epi32(p0, p1)); +#else + return v_dotprod_expand(a, b); +#endif +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 c, d; + v_mul_expand(a, b, c, d); + + v_uint64x2 c0, c1, d0, d1; + v_expand(c, c0, c1); + v_expand(d, d0, d1); + + c0 += c1; d0 += d1; + return c0 + d0; +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return c + d; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c); +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } + +#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \ + OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \ + OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \ + OPENCV_HAL_IMPL_SSE_BIN_OP(^, _Tpvec, _mm_xor_##suffix) \ + inline _Tpvec operator ~ (const _Tpvec& a) \ + { \ + return _Tpvec(_mm_xor_##suffix(a.val, not_const)); \ + } + +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint8x16, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int8x16, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint16x8, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int16x8, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint32x4, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int32x4, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint64x2, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int64x2, si128, _mm_set1_epi32(-1)) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float32x4, ps, _mm_castsi128_ps(_mm_set1_epi32(-1))) +OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float64x2, pd, _mm_castsi128_pd(_mm_set1_epi32(-1))) + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ return v_float32x4(_mm_sqrt_ps(x.val)); } + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f); + __m128 t = x.val; + __m128 h = _mm_mul_ps(t, _0_5); + t = _mm_rsqrt_ps(t); + t = _mm_mul_ps(t, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t, t), h))); + return v_float32x4(t); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ return v_float64x2(_mm_sqrt_pd(x.val)); } + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + const __m128d v_1 = _mm_set1_pd(1.); + return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val))); +} + +#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \ +inline _Tpuvec v_abs(const _Tpsvec& x) \ +{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); } + +OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8) +OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16) +inline v_uint32x4 v_abs(const v_int32x4& x) +{ + __m128i s = _mm_srli_epi32(x.val, 31); + __m128i f = _mm_srai_epi32(x.val, 31); + return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s)); +} +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); } +inline v_float64x2 v_abs(const v_float64x2& x) +{ + return v_float64x2(_mm_and_pd(x.val, + _mm_castsi128_pd(_mm_srli_epi64(_mm_set1_epi32(-1), 1)))); +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_SSE_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_min, _mm_min_epu8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_max, _mm_max_epu8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_min, _mm_min_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_max, _mm_max_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_min, _mm_min_ps) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_max, _mm_max_ps) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_min, _mm_min_pd) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_max, _mm_max_pd) + +inline v_int8x16 v_min(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_SSE4_1 + return v_int8x16(_mm_min_epi8(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi8((char)-128); + return v_int8x16(_mm_xor_si128(delta, _mm_min_epu8(_mm_xor_si128(a.val, delta), + _mm_xor_si128(b.val, delta)))); +#endif +} +inline v_int8x16 v_max(const v_int8x16& a, const v_int8x16& b) +{ +#if CV_SSE4_1 + return v_int8x16(_mm_max_epi8(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi8((char)-128); + return v_int8x16(_mm_xor_si128(delta, _mm_max_epu8(_mm_xor_si128(a.val, delta), + _mm_xor_si128(b.val, delta)))); +#endif +} +inline v_uint16x8 v_min(const v_uint16x8& a, const v_uint16x8& b) +{ +#if CV_SSE4_1 + return v_uint16x8(_mm_min_epu16(a.val, b.val)); +#else + return v_uint16x8(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, b.val))); +#endif +} +inline v_uint16x8 v_max(const v_uint16x8& a, const v_uint16x8& b) +{ +#if CV_SSE4_1 + return v_uint16x8(_mm_max_epu16(a.val, b.val)); +#else + return v_uint16x8(_mm_adds_epu16(_mm_subs_epu16(a.val, b.val), b.val)); +#endif +} +inline v_uint32x4 v_min(const v_uint32x4& a, const v_uint32x4& b) +{ +#if CV_SSE4_1 + return v_uint32x4(_mm_min_epu32(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi32((int)0x80000000); + __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)); + return v_uint32x4(v_select_si128(mask, b.val, a.val)); +#endif +} +inline v_uint32x4 v_max(const v_uint32x4& a, const v_uint32x4& b) +{ +#if CV_SSE4_1 + return v_uint32x4(_mm_max_epu32(a.val, b.val)); +#else + __m128i delta = _mm_set1_epi32((int)0x80000000); + __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta)); + return v_uint32x4(v_select_si128(mask, a.val, b.val)); +#endif +} +inline v_int32x4 v_min(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_int32x4(_mm_min_epi32(a.val, b.val)); +#else + return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), b.val, a.val)); +#endif +} +inline v_int32x4 v_max(const v_int32x4& a, const v_int32x4& b) +{ +#if CV_SSE4_1 + return v_int32x4(_mm_max_epi32(a.val, b.val)); +#else + return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), a.val, b.val)); +#endif +} + +#define OPENCV_HAL_IMPL_SSE_INT_CMP_OP(_Tpuvec, _Tpsvec, suffix, sbit) \ +inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \ +{ return _Tpuvec(_mm_cmpeq_##suffix(a.val, b.val)); } \ +inline _Tpuvec operator != (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpuvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \ +} \ +inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \ +{ return _Tpsvec(_mm_cmpeq_##suffix(a.val, b.val)); } \ +inline _Tpsvec operator != (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpsvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \ +} \ +inline _Tpuvec operator < (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask))); \ +} \ +inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask))); \ +} \ +inline _Tpuvec operator <= (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + __m128i not_mask = _mm_set1_epi32(-1); \ + __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask)); \ + return _Tpuvec(_mm_xor_si128(res, not_mask)); \ +} \ +inline _Tpuvec operator >= (const _Tpuvec& a, const _Tpuvec& b) \ +{ \ + __m128i smask = _mm_set1_##suffix(sbit); \ + __m128i not_mask = _mm_set1_epi32(-1); \ + __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask)); \ + return _Tpuvec(_mm_xor_si128(res, not_mask)); \ +} \ +inline _Tpsvec operator < (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + return _Tpsvec(_mm_cmpgt_##suffix(b.val, a.val)); \ +} \ +inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + return _Tpsvec(_mm_cmpgt_##suffix(a.val, b.val)); \ +} \ +inline _Tpsvec operator <= (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(a.val, b.val), not_mask)); \ +} \ +inline _Tpsvec operator >= (const _Tpsvec& a, const _Tpsvec& b) \ +{ \ + __m128i not_mask = _mm_set1_epi32(-1); \ + return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(b.val, a.val), not_mask)); \ +} + +OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint8x16, v_int8x16, epi8, (char)-128) +OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint16x8, v_int16x8, epi16, (short)-32768) +OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint32x4, v_int32x4, epi32, (int)0x80000000) + +#define OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(_Tpvec, suffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpeq_##suffix(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpneq_##suffix(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmplt_##suffix(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpgt_##suffix(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmple_##suffix(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpge_##suffix(a.val, b.val)); } + +OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps) +OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd) + +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#else +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \ + return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#endif + +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(_mm_cmpord_pd(a.val, a.val)); } + +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_add_wrap, _mm_add_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_sub_wrap, _mm_sub_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_sub_wrap, _mm_sub_epi8) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_sub_wrap, _mm_sub_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_sub_wrap, _mm_sub_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_mul_wrap, _mm_mullo_epi16) +OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_mul_wrap, _mm_mullo_epi16) + +inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i ad = _mm_srai_epi16(a.val, 8); + __m128i bd = _mm_srai_epi16(b.val, 8); + __m128i p0 = _mm_mullo_epi16(a.val, b.val); // even + __m128i p1 = _mm_slli_epi16(_mm_mullo_epi16(ad, bd), 8); // odd + const __m128i b01 = _mm_set1_epi32(0xFF00FF00); + return v_uint8x16(_v128_blendv_epi8(p0, p1, b01)); +} +inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b) +{ + return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b))); +} + +/** Absolute difference **/ + +inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = v_sub_wrap(a, b); + v_int8x16 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ + return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); +} +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ + v_int32x4 d = a - b; + v_int32x4 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = a - b; + v_int8x16 m = a < b; + return (d ^ m) - m; + } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_max(a, b) - v_min(a, b); } + + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return a * b + c; +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ +#if CV_FMA3 + return v_float32x4(_mm_fmadd_ps(a.val, b.val, c.val)); +#else + return v_float32x4(_mm_add_ps(_mm_mul_ps(a.val, b.val), c.val)); +#endif +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ +#if CV_FMA3 + return v_float64x2(_mm_fmadd_pd(a.val, b.val, c.val)); +#else + return v_float64x2(_mm_add_pd(_mm_mul_pd(a.val, b.val), c.val)); +#endif +} + +#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \ +inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpreg absmask = _mm_castsi128_##suffix(absmask_vec); \ + return _Tpvec(_mm_and_##suffix(_mm_sub_##suffix(a.val, b.val), absmask)); \ +} \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpvec res = v_fma(a, a, b*b); \ + return _Tpvec(_mm_sqrt_##suffix(res.val)); \ +} \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_fma(a, a, b*b); \ +} \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ \ + return v_fma(a, b, c); \ +} + +OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float32x4, float, __m128, ps, _mm_set1_epi32((int)0x7fffffff)) +OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float64x2, double, __m128d, pd, _mm_srli_epi64(_mm_set1_epi32(-1), 1)) + +#define OPENCV_HAL_IMPL_SSE_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \ +inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \ +} \ +inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(srai(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shl(const _Tpuvec& a) \ +{ \ + return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shl(const _Tpsvec& a) \ +{ \ + return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shr(const _Tpuvec& a) \ +{ \ + return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shr(const _Tpsvec& a) \ +{ \ + return _Tpsvec(srai(a.val, imm)); \ +} + +OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint16x8, v_int16x8, epi16, _mm_srai_epi16) +OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint32x4, v_int32x4, epi32, _mm_srai_epi32) +OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint64x2, v_int64x2, epi64, v_srai_epi64) + +namespace hal_sse_internal +{ + template 16)), + bool is_first = (imm == 0), + bool is_half = (imm == 8), + bool is_second = (imm == 16), + bool is_other = (((imm > 0) && (imm < 8)) || ((imm > 8) && (imm < 16)))> + class v_sse_palignr_u8_class; + + template + class v_sse_palignr_u8_class; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i& a, const __m128i&) const + { + return a; + } + }; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + return _mm_unpacklo_epi64(_mm_unpackhi_epi64(a, a), b); + } + }; + + template + class v_sse_palignr_u8_class + { + public: + inline __m128i operator()(const __m128i&, const __m128i& b) const + { + return b; + } + }; + + template + class v_sse_palignr_u8_class + { +#if CV_SSSE3 + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + return _mm_alignr_epi8(b, a, imm); + } +#else + public: + inline __m128i operator()(const __m128i& a, const __m128i& b) const + { + enum { imm2 = (sizeof(__m128i) - imm) }; + return _mm_or_si128(_mm_srli_si128(a, imm), _mm_slli_si128(b, imm2)); + } +#endif + }; + + template + inline __m128i v_sse_palignr_u8(const __m128i& a, const __m128i& b) + { + CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_sse_palignr_u8."); + return v_sse_palignr_u8_class()(a, b); + } +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + _mm_srli_si128( + v_sse_reinterpret_as<__m128i>(a.val), imm2))); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + _mm_slli_si128( + v_sse_reinterpret_as<__m128i>(a.val), imm2))); +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_sse_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + v_sse_palignr_u8( + v_sse_reinterpret_as<__m128i>(a.val), + v_sse_reinterpret_as<__m128i>(b.val)))); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_sse_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_sse_reinterpret_as( + v_sse_palignr_u8( + v_sse_reinterpret_as<__m128i>(b.val), + v_sse_reinterpret_as<__m128i>(a.val)))); +} + +#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(_Tpvec, _Tp) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(_mm_loadl_epi64((const __m128i*)ptr)); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \ + _mm_loadl_epi64((const __m128i*)ptr1))); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storeu_si128((__m128i*)ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ _mm_store_si128((__m128i*)ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_si128((__m128i*)ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_si128((__m128i*)ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_si128((__m128i*)ptr, a.val); \ + else \ + _mm_store_si128((__m128i*)ptr, a.val); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storel_epi64((__m128i*)ptr, a.val); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a.val, a.val)); } + +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint8x16, uchar) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int8x16, schar) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint16x8, ushort) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int16x8, short) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int32x4, int) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint64x2, uint64) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int64x2, int64) + +#define OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(_mm_loadu_##suffix(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(_mm_load_##suffix(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(_mm_castsi128_##suffix(_mm_loadl_epi64((const __m128i*)ptr))); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + return _Tpvec(_mm_castsi128_##suffix( \ + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \ + _mm_loadl_epi64((const __m128i*)ptr1)))); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storeu_##suffix(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ _mm_store_##suffix(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ _mm_stream_##suffix(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ \ + if( mode == hal::STORE_UNALIGNED ) \ + _mm_storeu_##suffix(ptr, a.val); \ + else if( mode == hal::STORE_ALIGNED_NOCACHE ) \ + _mm_stream_##suffix(ptr, a.val); \ + else \ + _mm_store_##suffix(ptr, a.val); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + __m128i a1 = _mm_cast##suffix##_si128(a.val); \ + _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a1, a1)); \ +} + +OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) +OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) + +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ + __m128i half = _mm_sad_epu8(a.val, _mm_setzero_si128()); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + __m128i half = _mm_set1_epi8((schar)-128); + half = _mm_sad_epu8(_mm_xor_si128(a.val, half), _mm_setzero_si128()); + return _mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))) - 2048; +} +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(func) \ +inline schar v_reduce_##func(const v_int8x16& a) \ +{ \ + __m128i val = a.val; \ + __m128i smask = _mm_set1_epi8((schar)-128); \ + val = _mm_xor_si128(val, smask); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (schar)_mm_cvtsi128_si32(val) ^ (schar)-128; \ +} \ +inline uchar v_reduce_##func(const v_uint8x16& a) \ +{ \ + __m128i val = a.val; \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (uchar)_mm_cvtsi128_si32(val); \ +} +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(min) + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \ +inline scalartype v_reduce_##func(const v_##_Tpvec& a) \ +{ \ + __m128i val = a.val; \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \ + return (scalartype)_mm_cvtsi128_si32(val); \ +} \ +inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \ +{ \ + __m128i val = a.val; \ + __m128i smask = _mm_set1_epi16(sbit); \ + val = _mm_xor_si128(val, smask); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \ + return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^ sbit); \ +} +OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768) + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 8))); \ + val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 4))); \ + return (scalartype)_mm_cvt##extract(val); \ +} + +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + scalartype CV_DECL_ALIGNED(16) buf[4]; \ + v_store_aligned(buf, a); \ + scalartype s0 = scalar_func(buf[0], buf[1]); \ + scalartype s1 = scalar_func(buf[2], buf[3]); \ + return scalar_func(s0, s1); \ +} + +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32) + +inline int v_reduce_sum(const v_int16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} +inline double v_reduce_sum(const v_float64x2& a) +{ + double CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ +#if CV_SSE3 + __m128 ab = _mm_hadd_ps(a.val, b.val); + __m128 cd = _mm_hadd_ps(c.val, d.val); + return v_float32x4(_mm_hadd_ps(ab, cd)); +#else + __m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val)); + __m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val)); + return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd))); +#endif +} + +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min) + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + __m128i half = _mm_sad_epu8(a.val, b.val); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + __m128i half = _mm_set1_epi8(0x7f); + half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ + __m128i m1 = _mm_set1_epi32(0x55555555); + __m128i m2 = _mm_set1_epi32(0x33333333); + __m128i m4 = _mm_set1_epi32(0x0f0f0f0f); + __m128i p = a.val; + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); + return v_uint8x16(p); +} +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff); +} +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff); +} +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ + return v_uint64x2(_mm_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm_setzero_si128())); +} +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \ +inline int v_signmask(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)); } \ +inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \ +inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; } +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3) + +#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \ +inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \ +inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \ +inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; } +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8) +OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8) + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \ +} + +OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, _mm_castps_si128, _mm_castsi128_ps, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, _mm_castps_si128, _mm_castsi128_ps, ps) +// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd) +// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, OPENCV_HAL_NOP, OPENCV_HAL_NOP, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, OPENCV_HAL_NOP, OPENCV_HAL_NOP, pd) + +#else // CV_SSE4_1 + +#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(_mm_xor_##suffix(b.val, _mm_and_##suffix(_mm_xor_##suffix(b.val, a.val), mask.val))); \ +} + +OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128) +// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, si128) +// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128) +OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps) +OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd) +#endif + +/* Expand */ +#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ + inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ + { \ + b0.val = intrin(a.val); \ + b1.val = __CV_CAT(intrin, _high)(a.val); \ + } \ + inline _Tpwvec v_expand_low(const _Tpvec& a) \ + { return _Tpwvec(intrin(a.val)); } \ + inline _Tpwvec v_expand_high(const _Tpvec& a) \ + { return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \ + inline _Tpwvec v_load_expand(const _Tp* ptr) \ + { \ + __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ + return _Tpwvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, _v128_cvtepu8_epi16) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int8x16, v_int16x8, schar, _v128_cvtepi8_epi16) +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, _v128_cvtepu16_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int16x8, v_int32x4, short, _v128_cvtepi16_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND(v_uint32x4, v_uint64x2, unsigned, _v128_cvtepu32_epi64) +OPENCV_HAL_IMPL_SSE_EXPAND(v_int32x4, v_int64x2, int, _v128_cvtepi32_epi64) + +#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v_load_expand_q(const _Tp* ptr) \ + { \ + __m128i a = _mm_cvtsi32_si128(*(const int*)ptr); \ + return _Tpvec(intrin(a)); \ + } + +OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_uint32x4, uchar, _v128_cvtepu8_epi32) +OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_int32x4, schar, _v128_cvtepi8_epi32) + +#define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = _mm_unpacklo_##suffix(a0.val, a1.val); \ + b1.val = _mm_unpackhi_##suffix(a0.val, a1.val); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \ + return _Tpvec(cast_to(_mm_unpacklo_epi64(a1, b1))); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \ + return _Tpvec(cast_to(_mm_unpackhi_epi64(a1, b1))); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \ + c.val = cast_to(_mm_unpacklo_epi64(a1, b1)); \ + d.val = cast_to(_mm_unpackhi_epi64(a1, b1)); \ +} + +OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_int16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) +OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd) + +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return v_uint8x16(_mm_shuffle_epi8(a.val, perm)); +#else + uchar CV_DECL_ALIGNED(32) d[16]; + v_store_aligned(d, a); + return v_uint8x16(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8], d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]); +#endif +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + return v_uint16x8(_mm_shuffle_epi8(a.val, perm)); +#else + __m128i r = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3)); + r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + return v_uint16x8(r); +#endif +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + return v_uint32x4(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + return v_uint64x2(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(1, 0, 3, 2))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ + return v_rotate_right(a, b); +} + +inline v_int32x4 v_round(const v_float32x4& a) +{ return v_int32x4(_mm_cvtps_epi32(a.val)); } + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + __m128i a1 = _mm_cvtps_epi32(a.val); + __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(_mm_cvtepi32_ps(a1), a.val)); + return v_int32x4(_mm_add_epi32(a1, mask)); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + __m128i a1 = _mm_cvtps_epi32(a.val); + __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(a.val, _mm_cvtepi32_ps(a1))); + return v_int32x4(_mm_sub_epi32(a1, mask)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(_mm_cvttps_epi32(a.val)); } + +inline v_int32x4 v_round(const v_float64x2& a) +{ return v_int32x4(_mm_cvtpd_epi32(a.val)); } + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + __m128i ai = _mm_cvtpd_epi32(a.val), bi = _mm_cvtpd_epi32(b.val); + return v_int32x4(_mm_unpacklo_epi64(ai, bi)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + __m128i a1 = _mm_cvtpd_epi32(a.val); + __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(_mm_cvtepi32_pd(a1), a.val)); + mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0 + return v_int32x4(_mm_add_epi32(a1, mask)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + __m128i a1 = _mm_cvtpd_epi32(a.val); + __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(a.val, _mm_cvtepi32_pd(a1))); + mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0 + return v_int32x4(_mm_sub_epi32(a1, mask)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ return v_int32x4(_mm_cvttpd_epi32(a.val)); } + +#define OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + __m128i t0 = cast_from(_mm_unpacklo_##suffix(a0.val, a1.val)); \ + __m128i t1 = cast_from(_mm_unpacklo_##suffix(a2.val, a3.val)); \ + __m128i t2 = cast_from(_mm_unpackhi_##suffix(a0.val, a1.val)); \ + __m128i t3 = cast_from(_mm_unpackhi_##suffix(a2.val, a3.val)); \ +\ + b0.val = cast_to(_mm_unpacklo_epi64(t0, t1)); \ + b1.val = cast_to(_mm_unpackhi_epi64(t0, t1)); \ + b2.val = cast_to(_mm_unpacklo_epi64(t2, t3)); \ + b3.val = cast_to(_mm_unpackhi_epi64(t2, t3)); \ +} + +OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) + +// load deinterleave +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) +{ + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + + __m128i t10 = _mm_unpacklo_epi8(t00, t01); + __m128i t11 = _mm_unpackhi_epi8(t00, t01); + + __m128i t20 = _mm_unpacklo_epi8(t10, t11); + __m128i t21 = _mm_unpackhi_epi8(t10, t11); + + __m128i t30 = _mm_unpacklo_epi8(t20, t21); + __m128i t31 = _mm_unpackhi_epi8(t20, t21); + + a.val = _mm_unpacklo_epi8(t30, t31); + b.val = _mm_unpackhi_epi8(t30, t31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) +{ +#if CV_SSE4_1 + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + __m128i s0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + __m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1); + __m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1); + __m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1); + const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13); + const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14); + const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15); + a0 = _mm_shuffle_epi8(a0, sh_b); + b0 = _mm_shuffle_epi8(b0, sh_g); + c0 = _mm_shuffle_epi8(c0, sh_r); + a.val = a0; + b.val = b0; + c.val = c0; +#elif CV_SSSE3 + const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14); + const __m128i m1 = _mm_alignr_epi8(m0, m0, 11); + const __m128i m2 = _mm_alignr_epi8(m0, m0, 6); + + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + + __m128i s0 = _mm_shuffle_epi8(t0, m0); + __m128i s1 = _mm_shuffle_epi8(t1, m1); + __m128i s2 = _mm_shuffle_epi8(t2, m2); + + t0 = _mm_alignr_epi8(s1, _mm_slli_si128(s0, 10), 5); + a.val = _mm_alignr_epi8(s2, t0, 5); + + t1 = _mm_alignr_epi8(_mm_srli_si128(s1, 5), _mm_slli_si128(s0, 5), 6); + b.val = _mm_alignr_epi8(_mm_srli_si128(s2, 5), t1, 5); + + t2 = _mm_alignr_epi8(_mm_srli_si128(s2, 10), s1, 11); + c.val = _mm_alignr_epi8(t2, s0, 11); +#else + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32)); + + __m128i t10 = _mm_unpacklo_epi8(t00, _mm_unpackhi_epi64(t01, t01)); + __m128i t11 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t00, t00), t02); + __m128i t12 = _mm_unpacklo_epi8(t01, _mm_unpackhi_epi64(t02, t02)); + + __m128i t20 = _mm_unpacklo_epi8(t10, _mm_unpackhi_epi64(t11, t11)); + __m128i t21 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t10, t10), t12); + __m128i t22 = _mm_unpacklo_epi8(t11, _mm_unpackhi_epi64(t12, t12)); + + __m128i t30 = _mm_unpacklo_epi8(t20, _mm_unpackhi_epi64(t21, t21)); + __m128i t31 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t20, t20), t22); + __m128i t32 = _mm_unpacklo_epi8(t21, _mm_unpackhi_epi64(t22, t22)); + + a.val = _mm_unpacklo_epi8(t30, _mm_unpackhi_epi64(t31, t31)); + b.val = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t30, t30), t32); + c.val = _mm_unpacklo_epi8(t31, _mm_unpackhi_epi64(t32, t32)); +#endif +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) +{ + __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ... + __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ... + __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); // a8 b8 c8 d8 ... + __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 48)); // a12 b12 c12 d12 ... + + __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 a8 b0 b8 ... + __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a2 a10 b2 b10 ... + __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a4 a12 b4 b12 ... + __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a6 a14 b6 b14 ... + + u0 = _mm_unpacklo_epi8(v0, v2); // a0 a4 a8 a12 ... + u1 = _mm_unpacklo_epi8(v1, v3); // a2 a6 a10 a14 ... + u2 = _mm_unpackhi_epi8(v0, v2); // a1 a5 a9 a13 ... + u3 = _mm_unpackhi_epi8(v1, v3); // a3 a7 a11 a15 ... + + v0 = _mm_unpacklo_epi8(u0, u1); // a0 a2 a4 a6 ... + v1 = _mm_unpacklo_epi8(u2, u3); // a1 a3 a5 a7 ... + v2 = _mm_unpackhi_epi8(u0, u1); // c0 c2 c4 c6 ... + v3 = _mm_unpackhi_epi8(u2, u3); // c1 c3 c5 c7 ... + + a.val = _mm_unpacklo_epi8(v0, v1); + b.val = _mm_unpackhi_epi8(v0, v1); + c.val = _mm_unpacklo_epi8(v2, v3); + d.val = _mm_unpackhi_epi8(v2, v3); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) +{ + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3 + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7 + + __m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5 + __m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7 + __m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6 + __m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7 + + a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7 + b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7 +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) +{ +#if CV_SSE4_1 + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); + __m128i v2 = _mm_loadu_si128((__m128i*)(ptr + 16)); + __m128i a0 = _mm_blend_epi16(_mm_blend_epi16(v0, v1, 0x92), v2, 0x24); + __m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24); + __m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24); + + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + a0 = _mm_shuffle_epi8(a0, sh_a); + b0 = _mm_shuffle_epi8(b0, sh_b); + c0 = _mm_shuffle_epi8(c0, sh_c); + + a.val = a0; + b.val = b0; + c.val = c0; +#else + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8)); + __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16)); + + __m128i t10 = _mm_unpacklo_epi16(t00, _mm_unpackhi_epi64(t01, t01)); + __m128i t11 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t00, t00), t02); + __m128i t12 = _mm_unpacklo_epi16(t01, _mm_unpackhi_epi64(t02, t02)); + + __m128i t20 = _mm_unpacklo_epi16(t10, _mm_unpackhi_epi64(t11, t11)); + __m128i t21 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t10, t10), t12); + __m128i t22 = _mm_unpacklo_epi16(t11, _mm_unpackhi_epi64(t12, t12)); + + a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21)); + b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22); + c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22)); +#endif +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) +{ + __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1 + __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 8)); // a2 b2 c2 d2 ... + __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ... + __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 24)); // a6 b6 c6 d6 ... + + __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 a4 b0 b4 ... + __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a1 a5 b1 b5 ... + __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a2 a6 b2 b6 ... + __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a3 a7 b3 b7 ... + + u0 = _mm_unpacklo_epi16(v0, v2); // a0 a2 a4 a6 ... + u1 = _mm_unpacklo_epi16(v1, v3); // a1 a3 a5 a7 ... + u2 = _mm_unpackhi_epi16(v0, v2); // c0 c2 c4 c6 ... + u3 = _mm_unpackhi_epi16(v1, v3); // c1 c3 c5 c7 ... + + a.val = _mm_unpacklo_epi16(u0, u1); + b.val = _mm_unpackhi_epi16(u0, u1); + c.val = _mm_unpacklo_epi16(u2, u3); + d.val = _mm_unpackhi_epi16(u2, u3); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) +{ + __m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 + __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 4)); // a2 b2 a3 b3 + + __m128i v2 = _mm_unpacklo_epi32(v0, v1); // a0 a2 b0 b2 + __m128i v3 = _mm_unpackhi_epi32(v0, v1); // a1 a3 b1 b3 + + a.val = _mm_unpacklo_epi32(v2, v3); // a0 a1 a2 a3 + b.val = _mm_unpackhi_epi32(v2, v3); // b0 b1 ab b3 +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) +{ + __m128i t00 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 4)); + __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 8)); + + __m128i t10 = _mm_unpacklo_epi32(t00, _mm_unpackhi_epi64(t01, t01)); + __m128i t11 = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t00, t00), t02); + __m128i t12 = _mm_unpacklo_epi32(t01, _mm_unpackhi_epi64(t02, t02)); + + a.val = _mm_unpacklo_epi32(t10, _mm_unpackhi_epi64(t11, t11)); + b.val = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t10, t10), t12); + c.val = _mm_unpacklo_epi32(t11, _mm_unpackhi_epi64(t12, t12)); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 s0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0 + v_uint32x4 s1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1 + v_uint32x4 s2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2 + v_uint32x4 s3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) +{ + __m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1 + __m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3 + + a.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a1 a2 a3 + b.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(3, 1, 3, 1)); // b0 b1 ab b3 +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) +{ + __m128 t0 = _mm_loadu_ps(ptr + 0); + __m128 t1 = _mm_loadu_ps(ptr + 4); + __m128 t2 = _mm_loadu_ps(ptr + 8); + + __m128 at12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 1, 0, 2)); + a.val = _mm_shuffle_ps(t0, at12, _MM_SHUFFLE(2, 0, 3, 0)); + + __m128 bt01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 0, 0, 1)); + __m128 bt12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 2, 0, 3)); + b.val = _mm_shuffle_ps(bt01, bt12, _MM_SHUFFLE(2, 0, 2, 0)); + + __m128 ct01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 1, 0, 2)); + c.val = _mm_shuffle_ps(ct01, t2, _MM_SHUFFLE(3, 0, 2, 0)); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) +{ + __m128 t0 = _mm_loadu_ps(ptr + 0); + __m128 t1 = _mm_loadu_ps(ptr + 4); + __m128 t2 = _mm_loadu_ps(ptr + 8); + __m128 t3 = _mm_loadu_ps(ptr + 12); + __m128 t02lo = _mm_unpacklo_ps(t0, t2); + __m128 t13lo = _mm_unpacklo_ps(t1, t3); + __m128 t02hi = _mm_unpackhi_ps(t0, t2); + __m128 t13hi = _mm_unpackhi_ps(t1, t3); + a.val = _mm_unpacklo_ps(t02lo, t13lo); + b.val = _mm_unpackhi_ps(t02lo, t13lo); + c.val = _mm_unpacklo_ps(t02hi, t13hi); + d.val = _mm_unpackhi_ps(t02hi, t13hi); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t1)); + b = v_uint64x2(_mm_unpackhi_epi64(t0, t1)); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0, b0 + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0, a1 + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // b1, c1 + + t1 = _mm_shuffle_epi32(t1, 0x4e); // a1, c0 + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t1)); + b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2)); + c = v_uint64x2(_mm_unpackhi_epi64(t1, t2)); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, + v_uint64x2& b, v_uint64x2& c, v_uint64x2& d) +{ + __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 + __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0 d0 + __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // a1 b1 + __m128i t3 = _mm_loadu_si128((const __m128i*)(ptr + 6)); // c1 d1 + + a = v_uint64x2(_mm_unpacklo_epi64(t0, t2)); + b = v_uint64x2(_mm_unpackhi_epi64(t0, t2)); + c = v_uint64x2(_mm_unpacklo_epi64(t1, t3)); + d = v_uint64x2(_mm_unpackhi_epi64(t1, t3)); +} + +// store interleave + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi8(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi8(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ +#if CV_SSE4_1 + const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5); + const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10); + const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15); + __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); + __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); + __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); + + const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0); + const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0); + __m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0); + __m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0); + __m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0); +#elif CV_SSSE3 + const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5); + const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10); + const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15); + + __m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5); + t0 = _mm_alignr_epi8(c.val, t0, 5); + __m128i v0 = _mm_shuffle_epi8(t0, m0); + + __m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6); + t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5); + __m128i v1 = _mm_shuffle_epi8(t1, m1); + + __m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11); + t2 = _mm_alignr_epi8(t2, a.val, 11); + __m128i v2 = _mm_shuffle_epi8(t2, m2); +#else + __m128i z = _mm_setzero_si128(); + __m128i ab0 = _mm_unpacklo_epi8(a.val, b.val); + __m128i ab1 = _mm_unpackhi_epi8(a.val, b.val); + __m128i c0 = _mm_unpacklo_epi8(c.val, z); + __m128i c1 = _mm_unpackhi_epi8(c.val, z); + + __m128i p00 = _mm_unpacklo_epi16(ab0, c0); + __m128i p01 = _mm_unpackhi_epi16(ab0, c0); + __m128i p02 = _mm_unpacklo_epi16(ab1, c1); + __m128i p03 = _mm_unpackhi_epi16(ab1, c1); + + __m128i p10 = _mm_unpacklo_epi32(p00, p01); + __m128i p11 = _mm_unpackhi_epi32(p00, p01); + __m128i p12 = _mm_unpacklo_epi32(p02, p03); + __m128i p13 = _mm_unpackhi_epi32(p02, p03); + + __m128i p20 = _mm_unpacklo_epi64(p10, p11); + __m128i p21 = _mm_unpackhi_epi64(p10, p11); + __m128i p22 = _mm_unpacklo_epi64(p12, p13); + __m128i p23 = _mm_unpackhi_epi64(p12, p13); + + p20 = _mm_slli_si128(p20, 1); + p22 = _mm_slli_si128(p22, 1); + + __m128i p30 = _mm_slli_epi64(_mm_unpacklo_epi32(p20, p21), 8); + __m128i p31 = _mm_srli_epi64(_mm_unpackhi_epi32(p20, p21), 8); + __m128i p32 = _mm_slli_epi64(_mm_unpacklo_epi32(p22, p23), 8); + __m128i p33 = _mm_srli_epi64(_mm_unpackhi_epi32(p22, p23), 8); + + __m128i p40 = _mm_unpacklo_epi64(p30, p31); + __m128i p41 = _mm_unpackhi_epi64(p30, p31); + __m128i p42 = _mm_unpacklo_epi64(p32, p33); + __m128i p43 = _mm_unpackhi_epi64(p32, p33); + + __m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10)); + __m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6)); + __m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2)); +#endif + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + } +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, const v_uint8x16& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + __m128i u0 = _mm_unpacklo_epi8(a.val, c.val); // a0 c0 a1 c1 ... + __m128i u1 = _mm_unpackhi_epi8(a.val, c.val); // a8 c8 a9 c9 ... + __m128i u2 = _mm_unpacklo_epi8(b.val, d.val); // b0 d0 b1 d1 ... + __m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ... + + __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ... + __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ... + __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ... + __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ... + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 16), v1); + _mm_stream_si128((__m128i*)(ptr + 32), v2); + _mm_stream_si128((__m128i*)(ptr + 48), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 16), v1); + _mm_store_si128((__m128i*)(ptr + 32), v2); + _mm_store_si128((__m128i*)(ptr + 48), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 16), v1); + _mm_storeu_si128((__m128i*)(ptr + 32), v2); + _mm_storeu_si128((__m128i*)(ptr + 48), v3); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi16(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi16(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, + const v_uint16x8& b, const v_uint16x8& c, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ +#if CV_SSE4_1 + const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11); + const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5); + const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15); + __m128i a0 = _mm_shuffle_epi8(a.val, sh_a); + __m128i b0 = _mm_shuffle_epi8(b.val, sh_b); + __m128i c0 = _mm_shuffle_epi8(c.val, sh_c); + + __m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24); + __m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24); + __m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24); +#else + __m128i z = _mm_setzero_si128(); + __m128i ab0 = _mm_unpacklo_epi16(a.val, b.val); + __m128i ab1 = _mm_unpackhi_epi16(a.val, b.val); + __m128i c0 = _mm_unpacklo_epi16(c.val, z); + __m128i c1 = _mm_unpackhi_epi16(c.val, z); + + __m128i p10 = _mm_unpacklo_epi32(ab0, c0); + __m128i p11 = _mm_unpackhi_epi32(ab0, c0); + __m128i p12 = _mm_unpacklo_epi32(ab1, c1); + __m128i p13 = _mm_unpackhi_epi32(ab1, c1); + + __m128i p20 = _mm_unpacklo_epi64(p10, p11); + __m128i p21 = _mm_unpackhi_epi64(p10, p11); + __m128i p22 = _mm_unpacklo_epi64(p12, p13); + __m128i p23 = _mm_unpackhi_epi64(p12, p13); + + p20 = _mm_slli_si128(p20, 2); + p22 = _mm_slli_si128(p22, 2); + + __m128i p30 = _mm_unpacklo_epi64(p20, p21); + __m128i p31 = _mm_unpackhi_epi64(p20, p21); + __m128i p32 = _mm_unpacklo_epi64(p22, p23); + __m128i p33 = _mm_unpackhi_epi64(p22, p23); + + __m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10)); + __m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6)); + __m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2)); +#endif + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + } +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + const v_uint16x8& c, const v_uint16x8& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + __m128i u0 = _mm_unpacklo_epi16(a.val, c.val); // a0 c0 a1 c1 ... + __m128i u1 = _mm_unpackhi_epi16(a.val, c.val); // a4 c4 a5 c5 ... + __m128i u2 = _mm_unpacklo_epi16(b.val, d.val); // b0 d0 b1 d1 ... + __m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ... + + __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ... + __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ... + __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ... + __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ... + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 8), v1); + _mm_stream_si128((__m128i*)(ptr + 16), v2); + _mm_stream_si128((__m128i*)(ptr + 24), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 8), v1); + _mm_store_si128((__m128i*)(ptr + 16), v2); + _mm_store_si128((__m128i*)(ptr + 24), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 8), v1); + _mm_storeu_si128((__m128i*)(ptr + 16), v2); + _mm_storeu_si128((__m128i*)(ptr + 24), v3); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + } +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3; + v_transpose4x4(a, b, c, z, u0, u1, u2, u3); + + __m128i v0 = _mm_or_si128(u0.val, _mm_slli_si128(u1.val, 12)); + __m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8)); + __m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4)); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 4), v1); + _mm_stream_si128((__m128i*)(ptr + 8), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 4), v1); + _mm_store_si128((__m128i*)(ptr + 8), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 4), v1); + _mm_storeu_si128((__m128i*)(ptr + 8), v2); + } +} + +inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + v_uint32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0.val); + _mm_stream_si128((__m128i*)(ptr + 4), v1.val); + _mm_stream_si128((__m128i*)(ptr + 8), v2.val); + _mm_stream_si128((__m128i*)(ptr + 12), v3.val); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0.val); + _mm_store_si128((__m128i*)(ptr + 4), v1.val); + _mm_store_si128((__m128i*)(ptr + 8), v2.val); + _mm_store_si128((__m128i*)(ptr + 12), v3.val); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0.val); + _mm_storeu_si128((__m128i*)(ptr + 4), v1.val); + _mm_storeu_si128((__m128i*)(ptr + 8), v2.val); + _mm_storeu_si128((__m128i*)(ptr + 12), v3.val); + } +} + +// 2-channel, float only +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 v0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1 + __m128 v1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3 + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + } +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0)); + __m128 v0 = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 u2 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 u3 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 v1 = _mm_shuffle_ps(u2, u3, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 u4 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(3, 3, 2, 2)); + __m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0)); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + } +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128 u0 = _mm_unpacklo_ps(a.val, c.val); + __m128 u1 = _mm_unpacklo_ps(b.val, d.val); + __m128 u2 = _mm_unpackhi_ps(a.val, c.val); + __m128 u3 = _mm_unpackhi_ps(b.val, d.val); + __m128 v0 = _mm_unpacklo_ps(u0, u1); + __m128 v2 = _mm_unpacklo_ps(u2, u3); + __m128 v1 = _mm_unpackhi_ps(u0, u1); + __m128 v3 = _mm_unpackhi_ps(u2, u3); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_ps(ptr, v0); + _mm_stream_ps(ptr + 4, v1); + _mm_stream_ps(ptr + 8, v2); + _mm_stream_ps(ptr + 12, v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_ps(ptr, v0); + _mm_store_ps(ptr + 4, v1); + _mm_store_ps(ptr + 8, v2); + _mm_store_ps(ptr + 12, v3); + } + else + { + _mm_storeu_ps(ptr, v0); + _mm_storeu_ps(ptr + 4, v1); + _mm_storeu_ps(ptr + 8, v2); + _mm_storeu_ps(ptr + 12, v3); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpackhi_epi64(a.val, b.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val)); + __m128i v2 = _mm_unpackhi_epi64(b.val, c.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + } +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, const v_uint64x2& d, + hal::StoreMode mode = hal::STORE_UNALIGNED) +{ + __m128i v0 = _mm_unpacklo_epi64(a.val, b.val); + __m128i v1 = _mm_unpacklo_epi64(c.val, d.val); + __m128i v2 = _mm_unpackhi_epi64(a.val, b.val); + __m128i v3 = _mm_unpackhi_epi64(c.val, d.val); + + if( mode == hal::STORE_ALIGNED_NOCACHE ) + { + _mm_stream_si128((__m128i*)(ptr), v0); + _mm_stream_si128((__m128i*)(ptr + 2), v1); + _mm_stream_si128((__m128i*)(ptr + 4), v2); + _mm_stream_si128((__m128i*)(ptr + 6), v3); + } + else if( mode == hal::STORE_ALIGNED ) + { + _mm_store_si128((__m128i*)(ptr), v0); + _mm_store_si128((__m128i*)(ptr + 2), v1); + _mm_store_si128((__m128i*)(ptr + 4), v2); + _mm_store_si128((__m128i*)(ptr + 6), v3); + } + else + { + _mm_storeu_si128((__m128i*)(ptr), v0); + _mm_storeu_si128((__m128i*)(ptr + 2), v1); + _mm_storeu_si128((__m128i*)(ptr + 4), v2); + _mm_storeu_si128((__m128i*)(ptr + 6), v3); + } +} + +#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(_mm_cvtepi32_ps(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + return v_float32x4(_mm_cvtpd_ps(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + return v_float32x4(_mm_movelh_ps(_mm_cvtpd_ps(a.val), _mm_cvtpd_ps(b.val))); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + return v_float64x2(_mm_cvtepi32_pd(a.val)); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8))); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + return v_float64x2(_mm_cvtps_pd(a.val)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val))); +} + +// from (Mysticial and wim) https://stackoverflow.com/q/41144668 +inline v_float64x2 v_cvt_f64(const v_int64x2& v) +{ + // constants encoded as floating-point + __m128i magic_i_hi32 = _mm_set1_epi64x(0x4530000080000000); // 2^84 + 2^63 + __m128i magic_i_all = _mm_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52 + __m128d magic_d_all = _mm_castsi128_pd(magic_i_all); + // Blend the 32 lowest significant bits of v with magic_int_lo +#if CV_SSE4_1 + __m128i magic_i_lo = _mm_set1_epi64x(0x4330000000000000); // 2^52 + __m128i v_lo = _mm_blend_epi16(v.val, magic_i_lo, 0xcc); +#else + __m128i magic_i_lo = _mm_set1_epi32(0x43300000); // 2^52 + __m128i v_lo = _mm_unpacklo_epi32(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(0, 0, 2, 0)), magic_i_lo); +#endif + // Extract the 32 most significant bits of v + __m128i v_hi = _mm_srli_epi64(v.val, 32); + // Flip the msb of v_hi and blend with 0x45300000 + v_hi = _mm_xor_si128(v_hi, magic_i_hi32); + // Compute in double precision + __m128d v_hi_dbl = _mm_sub_pd(_mm_castsi128_pd(v_hi), magic_d_all); + // (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition + __m128d result = _mm_add_pd(v_hi_dbl, _mm_castsi128_pd(v_lo)); + return v_float64x2(result); +} + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]])); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]]), + _mm_setr_pi8(tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]) + )); +#endif +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3]), + *(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7]))); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3])), + _mm_setr_pi16(*(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7])) + )); +#endif +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int8x16(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), + *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +#else + return v_int8x16(_mm_setr_epi64( + _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])), + _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])) + )); +#endif +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int16x8(_mm_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], + tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]])); +#else + return v_int16x8(_mm_setr_epi64( + _mm_setr_pi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]), + _mm_setr_pi16(tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]) + )); +#endif +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int16x8(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), + *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +#else + return v_int16x8(_mm_setr_epi64( + _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])), + _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])) + )); +#endif +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ +#if defined(_MSC_VER) + return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]], + tab[idx[2]], tab[idx[3]])); +#else + return v_int32x4(_mm_setr_epi64( + _mm_setr_pi32(tab[idx[0]], tab[idx[1]]), + _mm_setr_pi32(tab[idx[2]], tab[idx[3]]) + )); +#endif +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(_mm_loadu_si128((const __m128i*)(tab + idx[0]))); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(_mm_set_epi64x(tab[idx[1]], tab[idx[0]])); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(_mm_loadu_si128((const __m128i*)(tab + idx[0]))); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]])); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_float64x2(_mm_castsi128_pd(_mm_loadu_si128((const __m128i*)(tab + idx[0])))); } + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + int idx[2]; + v_store_low(idx, idxvec); + return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]])); +} + +// loads pairs from the table and deinterleaves them, e.g. returns: +// x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]), +// y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1]) +// note that the indices are float's indices, not the float-pair indices. +// in theory, this function can be used to implement bilinear interpolation, +// when idxvec are the offsets within the image. +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + __m128 z = _mm_setzero_ps(); + __m128 xy01 = _mm_loadl_pi(z, (__m64*)(tab + idx[0])); + __m128 xy23 = _mm_loadl_pi(z, (__m64*)(tab + idx[2])); + xy01 = _mm_loadh_pi(xy01, (__m64*)(tab + idx[1])); + xy23 = _mm_loadh_pi(xy23, (__m64*)(tab + idx[3])); + __m128 xxyy02 = _mm_unpacklo_ps(xy01, xy23); + __m128 xxyy13 = _mm_unpackhi_ps(xy01, xy23); + x = v_float32x4(_mm_unpacklo_ps(xxyy02, xxyy13)); + y = v_float32x4(_mm_unpackhi_ps(xxyy02, xxyy13)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int idx[2]; + v_store_low(idx, idxvec); + __m128d xy0 = _mm_loadu_pd(tab + idx[0]); + __m128d xy1 = _mm_loadu_pd(tab + idx[1]); + x = v_float64x2(_mm_unpacklo_pd(xy0, xy1)); + y = v_float64x2(_mm_unpackhi_pd(xy0, xy1)); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200))); +#else + __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0)); + a = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a))); +#endif +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400))); +#else + __m128i a = _mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a))); +#endif +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100))); +#else + __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0)); + return v_int16x8(_mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0))); +#endif +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100))); +#else + return v_int16x8(_mm_unpacklo_epi16(vec.val, _mm_unpackhi_epi64(vec.val, vec.val))); +#endif +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + return v_int32x4(_mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0))); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ +#if CV_SSSE3 + return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100))); +#else + __m128i mask = _mm_set1_epi64x(0x00000000FFFFFFFF); + __m128i a = _mm_srli_si128(_mm_or_si128(_mm_andnot_si128(mask, vec.val), _mm_and_si128(mask, _mm_sll_epi32(vec.val, _mm_set_epi64x(0, 8)))), 1); + return v_int8x16(_mm_srli_si128(_mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 1, 0, 3)), 2)); +#endif +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ +#if CV_SSSE3 + return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100))); +#else + return v_int16x8(_mm_srli_si128(_mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(2, 1, 0, 3)), 2)); +#endif +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +template +inline uchar v_extract_n(const v_uint8x16& v) +{ +#if CV_SSE4_1 + return (uchar)_mm_extract_epi8(v.val, i); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline schar v_extract_n(const v_int8x16& v) +{ + return (schar)v_extract_n(v_reinterpret_as_u8(v)); +} + +template +inline ushort v_extract_n(const v_uint16x8& v) +{ + return (ushort)_mm_extract_epi16(v.val, i); +} + +template +inline short v_extract_n(const v_int16x8& v) +{ + return (short)v_extract_n(v_reinterpret_as_u16(v)); +} + +template +inline uint v_extract_n(const v_uint32x4& v) +{ +#if CV_SSE4_1 + return (uint)_mm_extract_epi32(v.val, i); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline int v_extract_n(const v_int32x4& v) +{ + return (int)v_extract_n(v_reinterpret_as_u32(v)); +} + +template +inline uint64 v_extract_n(const v_uint64x2& v) +{ +#ifdef CV__SIMD_NATIVE_mm_extract_epi64 + return (uint64)_v128_extract_epi64(v.val); +#else + return v_rotate_right(v).get0(); +#endif +} + +template +inline int64 v_extract_n(const v_int64x2& v) +{ + return (int64)v_extract_n(v_reinterpret_as_u64(v)); +} + +template +inline float v_extract_n(const v_float32x4& v) +{ + union { uint iv; float fv; } d; + d.iv = v_extract_n(v_reinterpret_as_u32(v)); + return d.fv; +} + +template +inline double v_extract_n(const v_float64x2& v) +{ + union { uint64 iv; double dv; } d; + d.iv = v_extract_n(v_reinterpret_as_u64(v)); + return d.dv; +} + +template +inline v_int32x4 v_broadcast_element(const v_int32x4& v) +{ + return v_int32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i))); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& v) +{ + return v_uint32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i))); +} + +template +inline v_float32x4 v_broadcast_element(const v_float32x4& v) +{ + return v_float32x4(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE((char)i,(char)i,(char)i,(char)i))); +} + +////////////// FP16 support /////////////////////////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ +#if CV_FP16 + return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr))); +#else + const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000); + const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000); + const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000)); + __m128i bits = _mm_unpacklo_epi16(z, _mm_loadl_epi64((const __m128i*)ptr)); // h << 16 + __m128i e = _mm_and_si128(bits, maxexp), sign = _mm_and_si128(bits, signmask); + __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_xor_si128(bits, sign), 3), delta); // ((h & 0x7fff) << 13) + delta + __m128i zt = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_add_epi32(t, _mm_set1_epi32(1 << 23))), deltaf)); + + t = _mm_add_epi32(t, _mm_and_si128(delta, _mm_cmpeq_epi32(maxexp, e))); + __m128i zmask = _mm_cmpeq_epi32(e, z); + __m128i ft = v_select_si128(zmask, zt, t); + return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign))); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ +#if CV_FP16 + __m128i fp16_value = _mm_cvtps_ph(v.val, 0); + _mm_storel_epi64((__m128i*)ptr, fp16_value); +#else + const __m128i signmask = _mm_set1_epi32(0x80000000); + const __m128i rval = _mm_set1_epi32(0x3f000000); + + __m128i t = _mm_castps_si128(v.val); + __m128i sign = _mm_srai_epi32(_mm_and_si128(t, signmask), 16); + t = _mm_andnot_si128(signmask, t); + + __m128i finitemask = _mm_cmpgt_epi32(_mm_set1_epi32(0x47800000), t); + __m128i isnan = _mm_cmpgt_epi32(t, _mm_set1_epi32(0x7f800000)); + __m128i naninf = v_select_si128(isnan, _mm_set1_epi32(0x7e00), _mm_set1_epi32(0x7c00)); + __m128i tinymask = _mm_cmpgt_epi32(_mm_set1_epi32(0x38800000), t); + __m128i tt = _mm_castps_si128(_mm_add_ps(_mm_castsi128_ps(t), _mm_castsi128_ps(rval))); + tt = _mm_sub_epi32(tt, rval); + __m128i odd = _mm_and_si128(_mm_srli_epi32(t, 13), _mm_set1_epi32(1)); + __m128i nt = _mm_add_epi32(t, _mm_set1_epi32(0xc8000fff)); + nt = _mm_srli_epi32(_mm_add_epi32(nt, odd), 13); + t = v_select_si128(tinymask, tt, nt); + t = v_select_si128(finitemask, t, naninf); + t = _mm_or_si128(t, sign); + t = _mm_packs_epi32(t, t); + _mm_storel_epi64((__m128i*)ptr, t); +#endif +} + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_sse_em.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_sse_em.hpp new file mode 100644 index 0000000..6fb0881 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_sse_em.hpp @@ -0,0 +1,180 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP +#define OPENCV_HAL_INTRIN_SSE_EM_HPP + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \ + inline tp _v128_##fun(const tp& a) \ + { return _mm_##fun(a); } + +#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \ + inline tp _v128_##fun(const tp& a, const tp& b) \ + { return _mm_##fun(a, b); } + +#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \ + inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \ + { return _mm_##fun(a, b, c); } + +///////////////////////////// XOP ///////////////////////////// + +// [todo] define CV_XOP +#if 1 // CV_XOP +inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b) +{ + const __m128i delta = _mm_set1_epi32((int)0x80000000); + return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta)); +} +// wrapping XOP +#else +OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i) +#endif // !CV_XOP + +///////////////////////////// SSE4.1 ///////////////////////////// + +#if !CV_SSE4_1 + +/** Swizzle **/ +inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask) +{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); } + +/** Convert **/ +// 8 >> 16 +inline __m128i _v128_cvtepu8_epi16(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi8(a, z); +} +inline __m128i _v128_cvtepi8_epi16(const __m128i& a) +{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); } +// 8 >> 32 +inline __m128i _v128_cvtepu8_epi32(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); +} +inline __m128i _v128_cvtepi8_epi32(const __m128i& a) +{ + __m128i r = _mm_unpacklo_epi8(a, a); + r = _mm_unpacklo_epi8(r, r); + return _mm_srai_epi32(r, 24); +} +// 16 >> 32 +inline __m128i _v128_cvtepu16_epi32(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(a, z); +} +inline __m128i _v128_cvtepi16_epi32(const __m128i& a) +{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); } +// 32 >> 64 +inline __m128i _v128_cvtepu32_epi64(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(a, z); +} +inline __m128i _v128_cvtepi32_epi64(const __m128i& a) +{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); } + +/** Arithmetic **/ +inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b) +{ + __m128i c0 = _mm_mul_epu32(a, b); + __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32)); + __m128i d0 = _mm_unpacklo_epi32(c0, c1); + __m128i d1 = _mm_unpackhi_epi32(c0, c1); + return _mm_unpacklo_epi64(d0, d1); +} + +/** Math **/ +inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b) +{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); } + +// wrapping SSE4.1 +#else +OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i) +OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i) +OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i) +OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i) +OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i) +#endif // !CV_SSE4_1 + +///////////////////////////// Revolutionary ///////////////////////////// + +/** Convert **/ +// 16 << 8 +inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi8(a, z); +} +inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a) +{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); } +// 32 << 16 +inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi16(a, z); +} +inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a) +{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); } +// 64 << 32 +inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a) +{ + const __m128i z = _mm_setzero_si128(); + return _mm_unpackhi_epi32(a, z); +} +inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a) +{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); } + +/** Miscellaneous **/ +inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b) +{ + const __m128i m = _mm_set1_epi32(65535); + __m128i am = _v128_min_epu32(a, m); + __m128i bm = _v128_min_epu32(b, m); +#if CV_SSE4_1 + return _mm_packus_epi32(am, bm); +#else + const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768); + am = _mm_sub_epi32(am, d); + bm = _mm_sub_epi32(bm, d); + am = _mm_packs_epi32(am, bm); + return _mm_sub_epi16(am, nd); +#endif +} + +template +inline int64 _v128_extract_epi64(const __m128i& a) +{ +#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/)) +#define CV__SIMD_NATIVE_mm_extract_epi64 1 + return _mm_extract_epi64(a, i); +#else + CV_DECL_ALIGNED(16) int64 tmp[2]; + _mm_store_si128((__m128i*)tmp, a); + return tmp[i]; +#endif +} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} // cv:: + +#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_vsx.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_vsx.hpp new file mode 100644 index 0000000..b198643 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_vsx.hpp @@ -0,0 +1,1608 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_VSX_HPP +#define OPENCV_HAL_VSX_HPP + +#include +#include "opencv2/core/utility.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +///////// Types //////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + vec_uchar16 val; + + explicit v_uint8x16(const vec_uchar16& v) : val(v) + {} + v_uint8x16() + {} + v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v)) + {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)) + {} + + static inline v_uint8x16 zero() { return v_uint8x16(vec_uchar16_z); } + + uchar get0() const + { return vec_extract(val, 0); } +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + vec_char16 val; + + explicit v_int8x16(const vec_char16& v) : val(v) + {} + v_int8x16() + {} + v_int8x16(vec_bchar16 v) : val(vec_char16_c(v)) + {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)) + {} + + static inline v_int8x16 zero() { return v_int8x16(vec_char16_z); } + + schar get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + vec_ushort8 val; + + explicit v_uint16x8(const vec_ushort8& v) : val(v) + {} + v_uint16x8() + {} + v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v)) + {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7)) + {} + + static inline v_uint16x8 zero() { return v_uint16x8(vec_ushort8_z); } + + ushort get0() const + { return vec_extract(val, 0); } +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + vec_short8 val; + + explicit v_int16x8(const vec_short8& v) : val(v) + {} + v_int16x8() + {} + v_int16x8(vec_bshort8 v) : val(vec_short8_c(v)) + {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7)) + {} + + static inline v_int16x8 zero() { return v_int16x8(vec_short8_z); } + + short get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + vec_uint4 val; + + explicit v_uint32x4(const vec_uint4& v) : val(v) + {} + v_uint32x4() + {} + v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v)) + {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3)) + {} + + static inline v_uint32x4 zero() { return v_uint32x4(vec_uint4_z); } + + uint get0() const + { return vec_extract(val, 0); } +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + vec_int4 val; + + explicit v_int32x4(const vec_int4& v) : val(v) + {} + v_int32x4() + {} + v_int32x4(vec_bint4 v) : val(vec_int4_c(v)) + {} + v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3)) + {} + + static inline v_int32x4 zero() { return v_int32x4(vec_int4_z); } + + int get0() const + { return vec_extract(val, 0); } +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + vec_float4 val; + + explicit v_float32x4(const vec_float4& v) : val(v) + {} + v_float32x4() + {} + v_float32x4(vec_bint4 v) : val(vec_float4_c(v)) + {} + v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3)) + {} + + static inline v_float32x4 zero() { return v_float32x4(vec_float4_z); } + + float get0() const + { return vec_extract(val, 0); } +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + vec_udword2 val; + + explicit v_uint64x2(const vec_udword2& v) : val(v) + {} + v_uint64x2() + {} + v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v)) + {} + v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1)) + {} + + static inline v_uint64x2 zero() { return v_uint64x2(vec_udword2_z); } + + uint64 get0() const + { return vec_extract(val, 0); } +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + vec_dword2 val; + + explicit v_int64x2(const vec_dword2& v) : val(v) + {} + v_int64x2() + {} + v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v)) + {} + v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1)) + {} + + static inline v_int64x2 zero() { return v_int64x2(vec_dword2_z); } + + int64 get0() const + { return vec_extract(val, 0); } +}; + +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + vec_double2 val; + + explicit v_float64x2(const vec_double2& v) : val(v) + {} + v_float64x2() + {} + v_float64x2(vec_bdword2 v) : val(vec_double2_c(v)) + {} + v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1)) + {} + + static inline v_float64x2 zero() { return v_float64x2(vec_double2_z); } + + double get0() const + { return vec_extract(val, 0); } +}; + +#define OPENCV_HAL_IMPL_VSX_EXTRACT_N(_Tpvec, _Tp) \ +template inline _Tp v_extract_n(VSX_UNUSED(_Tpvec v)) { return vec_extract(v.val, i); } + +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint8x16, uchar) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int8x16, schar) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint16x8, ushort) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int16x8, short) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint32x4, uint) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int32x4, int) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint64x2, uint64) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int64x2, int64) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float32x4, float) +OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double) + +//////////////// Load and store operations /////////////// + +/* + * clang-5 aborted during parse "vec_xxx_c" only if it's + * inside a function template which is defined by preprocessor macro. + * + * if vec_xxx_c defined as C++ cast, clang-5 will pass it +*/ +#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \ +{ return _Tpvec((cast)a.val); } + +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2) +OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2) +OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4) +OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2) + +#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(ld(0, ptr)); } \ +inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \ +{ return _Tpvec(ld_a(0, ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vec_ld_l8(ptr)); } \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ st(a.val, 0, ptr); } \ +inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ +inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \ +{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ vec_st_l8(a.val, ptr); } \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ vec_st_h8(a.val, ptr); } + +// working around gcc bug for aligned ld/st +// if runtime check for vec_ld/st fail we failback to unaligned ld/st +// https://github.com/opencv/opencv/issues/13211 +#ifdef CV_COMPILER_VSX_BROKEN_ALIGNED + #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \ + OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vsx_ld, vsx_st, vsx_st) +#else + #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \ + OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st) +#endif + +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float) + +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) + +//////////////// Value reordering /////////////// + +/* de&interleave */ +#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec) \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b) \ +{ vec_ld_deinterleave(ptr, a.val, b.val);} \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \ + _Tpvec& b, _Tpvec& c) \ +{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); } \ +inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \ + _Tpvec& c, _Tpvec& d) \ +{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, ptr); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \ + const _Tpvec& b, const _Tpvec& c, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, c.val, ptr); } \ +inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \ + const _Tpvec& c, const _Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); } + +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64, v_int64x2) +OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64, v_uint64x2) + +/* Expand */ +#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = fh(a.val); \ + b1.val = fl(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ return _Tpwvec(fh(a.val)); } \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ return _Tpwvec(fl(a.val)); } \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ return _Tpwvec(fh(vec_ld_l8(ptr))); } + +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh) +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh) +OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu) +OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh) + +/* Load and zero expand a 4 byte value into the second dword, first is don't care. */ +#if !defined(CV_COMPILER_VSX_BROKEN_ASM) + #define _LXSIWZX(out, ptr, T) __asm__ ("lxsiwzx %x0, 0, %1\r\n" : "=wa"(out) : "r" (ptr) : "memory"); +#else + /* This is compiler-agnostic, but will introduce an unneeded splat on the critical path. */ + #define _LXSIWZX(out, ptr, T) out = (T)vec_udword2_sp(*(uint32_t*)(ptr)); +#endif + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + // Zero-extend the extra 24B instead of unpacking. Usually faster in small kernel + // Likewise note, value is zero extended and upper 4 bytes are zero'ed. + vec_uchar16 pmu = {8, 12, 12, 12, 9, 12, 12, 12, 10, 12, 12, 12, 11, 12, 12, 12}; + vec_uchar16 out; + + _LXSIWZX(out, ptr, vec_uchar16); + out = vec_perm(out, out, pmu); + return v_uint32x4((vec_uint4)out); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + vec_char16 out; + vec_short8 outs; + vec_int4 outw; + + _LXSIWZX(out, ptr, vec_char16); + outs = vec_unpackl(out); + outw = vec_unpackh(outs); + return v_int32x4(outw); +} + +/* pack */ +#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \ +inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + return _Tpvec(pkfnc(a.val, b.val)); \ +} \ +inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + vec_st_l8(pkfnc(a.val, a.val), ptr); \ +} \ +template \ +inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ +{ \ + const __vector _Tpvn vn = vec_splats((_Tpvn)n); \ + const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \ + return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn))); \ +} \ +template \ +inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ +{ \ + const __vector _Tpvn vn = vec_splats((_Tpvn)n); \ + const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \ + vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr); \ +} + +OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short, + vec_sr, vec_packs, vec_adds, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short, + vec_sra, vec_packs, vec_adds, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int, + vec_sr, vec_packs, vec_add, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int, + vec_sra, vec_packs, vec_add, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long, + vec_sr, vec_pack, vec_add, pack) +OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long, + vec_sra, vec_pack, vec_add, pack) + +OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short, + vec_sra, vec_packsu, vec_adds, pack_u) +OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int, + vec_sra, vec_packsu, vec_add, pack_u) +// Following variant is not implemented on other platforms: +//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long, +// vec_sra, vec_packsu, vec_add, pack_u) + +// pack boolean +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_uchar16 ab = vec_pack(a.val, b.val); + return v_uint8x16(ab); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + vec_ushort8 ab = vec_pack(a.val, b.val); + vec_ushort8 cd = vec_pack(c.val, d.val); + return v_uint8x16(vec_pack(ab, cd)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + vec_uint4 ab = vec_pack(a.val, b.val); + vec_uint4 cd = vec_pack(c.val, d.val); + vec_uint4 ef = vec_pack(e.val, f.val); + vec_uint4 gh = vec_pack(g.val, h.val); + + vec_ushort8 abcd = vec_pack(ab, cd); + vec_ushort8 efgh = vec_pack(ef, gh); + return v_uint8x16(vec_pack(abcd, efgh)); +} + +/* Recombine */ +template +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) +{ + b0.val = vec_mergeh(a0.val, a1.val); + b1.val = vec_mergel(a0.val, a1.val); +} + +template +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(vec_mergesql(a.val, b.val)); } + +template +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) +{ return _Tpvec(vec_mergesqh(a.val, b.val)); } + +template +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) +{ + c.val = vec_mergesqh(a.val, b.val); + d.val = vec_mergesql(a.val, b.val); +} + +////////// Arithmetic, bitwise and comparison operations ///////// + +/* Element-wise binary and unary operations */ +/** Arithmetics **/ +#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(intrin(a.val, b.val)); } \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ a.val = intrin(a.val, b.val); return a; } + +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul) +OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub) + +// saturating multiply +#define OPENCV_HAL_IMPL_VSX_MUL_SAT(_Tpvec, _Tpwvec) \ + inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ + { \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ + } \ + inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ + { a = a * b; return a; } + +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int16x8, v_int32x4) +OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint16x8, v_uint32x4) + +template +inline void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d) +{ + Twvec p0 = Twvec(vec_mule(a.val, b.val)); + Twvec p1 = Twvec(vec_mulo(a.val, b.val)); + v_zip(p0, p1, c, d); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + vec_int4 p0 = vec_mule(a.val, b.val); + vec_int4 p1 = vec_mulo(a.val, b.val); + static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; + return v_int16x8(vec_perm(vec_short8_c(p0), vec_short8_c(p1), perm)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_uint4 p0 = vec_mule(a.val, b.val); + vec_uint4 p1 = vec_mulo(a.val, b.val); + static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; + return v_uint16x8(vec_perm(vec_ushort8_c(p0), vec_ushort8_c(p1), perm)); +} + +/** Non-saturating arithmetics **/ +#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \ +template \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(intrin(a.val, b.val)); } + +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_mul_wrap, vec_mul) + +/** Bitwise shifts **/ +#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc) \ +inline _Tpvec operator << (const _Tpvec& a, int imm) \ +{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \ +inline _Tpvec operator >> (const _Tpvec& a, int imm) \ +{ return _Tpvec(shr(a.val, splfunc(imm))); } \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ return _Tpvec(shr(a.val, splfunc(imm))); } + +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_sr, vec_uchar16_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_sr, vec_ushort8_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_sr, vec_uint4_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_sr, vec_udword2_sp) +// algebraic right shift +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_sra, vec_uchar16_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_sra, vec_ushort8_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_sra, vec_uint4_sp) +OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_sra, vec_udword2_sp) + +/** Bitwise logic **/ +#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or) \ +OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ return _Tpvec(vec_not(a.val)); } + +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4) +OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2) + +/** Bitwise select **/ +#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); } + +OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c) +OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c) + +/** Comparison **/ +#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpeq(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpne(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmplt(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpgt(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmple(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_cmpge(a.val, b.val)); } + +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return v_float32x4(vec_cmpeq(a.val, a.val)); } +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return v_float64x2(vec_cmpeq(a.val, a.val)); } + +/** min/max **/ +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min) +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max) + +/** Rotate **/ +#define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast) \ +template \ +inline _Tpvec v_rotate_##suffix(const _Tpvec& a) \ +{ \ + const int wd = imm * sizeof(typename _Tpvec::lane_type); \ + if (wd > 15) \ + return _Tpvec::zero(); \ + return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3))); \ +} + +#define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast) \ +OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \ +OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast) + +OPENCV_IMPL_VSX_ROTATE_LR(v_uint8x16, vec_uchar16) +OPENCV_IMPL_VSX_ROTATE_LR(v_int8x16, vec_char16) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8) +OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8, vec_short8) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4) +OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4, vec_int4) +OPENCV_IMPL_VSX_ROTATE_LR(v_float32x4, vec_float4) +OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2) +OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2, vec_dword2) +OPENCV_IMPL_VSX_ROTATE_LR(v_float64x2, vec_double2) + +template +inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) +{ + enum { CV_SHIFT = 16 - imm * (sizeof(typename _Tpvec::lane_type)) }; + if (CV_SHIFT == 16) + return a; +#ifdef __IBMCPP__ + return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15)); +#else + return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT)); +#endif +} + +template +inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) +{ + enum { CV_SHIFT = imm * (sizeof(typename _Tpvec::lane_type)) }; + if (CV_SHIFT == 16) + return b; + return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT)); +} + +#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2) \ +template \ +inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \ +{ \ + if (imm == 1) \ + return _Tpvec(vec_permi(rg1.val, rg2.val, 2)); \ + return imm ? b : a; \ +} + +#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec) \ +OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left, b, a) \ +OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b) + +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2) +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2) +OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2) + +/* Reverse */ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + static const vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_uint8x16(vec_perm(vec, vec, perm)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + static const vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u16(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + static const vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u32(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + static const vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u64(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + +/* Extract */ +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ return v_rotate_right(a, b); } + +////////// Reduce and mask ///////// + +/** Reduce **/ +inline uint v_reduce_sum(const v_uint8x16& a) +{ + const vec_uint4 zero4 = vec_uint4_z; + vec_uint4 sum4 = vec_sum4s(a.val, zero4); + return (uint)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + const vec_int4 zero4 = vec_int4_z; + vec_int4 sum4 = vec_sum4s(a.val, zero4); + return (int)vec_extract(vec_sums(sum4, zero4), 3); +} +inline int v_reduce_sum(const v_int16x8& a) +{ + const vec_int4 zero = vec_int4_z; + return saturate_cast(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3)); +} +inline uint v_reduce_sum(const v_uint16x8& a) +{ + const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8)))); + return saturate_cast(vec_extract(vec_sums(v4, vec_int4_z), 3)); +} + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min) + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} +inline double v_reduce_sum(const v_float64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + rs = func(rs, vec_sld(rs, rs, 4)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min) + +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + rs = func(rs, vec_sld(rs, rs, 4)); \ + rs = func(rs, vec_sld(rs, rs, 2)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, min, vec_min) + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val)); + ac = vec_add(ac, vec_sld(ac, ac, 8)); + + vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val)); + bd = vec_add(bd, vec_sld(bd, bd, 8)); + return v_float32x4(vec_mergeh(ac, bd)); +} + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + const vec_uint4 zero4 = vec_uint4_z; + vec_uint4 sum4 = vec_sum4s(vec_absd(a.val, b.val), zero4); + return (unsigned)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + const vec_int4 zero4 = vec_int4_z; + vec_char16 ad = vec_abss(vec_subs(a.val, b.val)); + vec_int4 sum4 = vec_sum4s(ad, zero4); + return (unsigned)vec_extract(vec_sums(sum4, zero4), 3); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + vec_ushort8 ad = vec_absd(a.val, b.val); + VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z); + return (unsigned)vec_extract(sum, 3); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + const vec_int4 zero4 = vec_int4_z; + vec_short8 ad = vec_abss(vec_subs(a.val, b.val)); + vec_int4 sum4 = vec_sum4s(ad, zero4); + return (unsigned)vec_extract(vec_sums(sum4, zero4), 3); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + const vec_uint4 ad = vec_absd(a.val, b.val); + const vec_uint4 rd = vec_add(ad, vec_sld(ad, ad, 8)); + return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + vec_int4 ad = vec_abss(vec_sub(a.val, b.val)); + return (unsigned)vec_extract(vec_sums(ad, vec_int4_z), 3); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + const vec_float4 ad = vec_abs(vec_sub(a.val, b.val)); + const vec_float4 rd = vec_add(ad, vec_sld(ad, ad, 8)); + return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0); +} + +/** Popcount **/ +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } + +/** Mask **/ +inline int v_signmask(const v_uint8x16& a) +{ + static const vec_uchar16 qperm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } + +inline int v_signmask(const v_int16x8& a) +{ + static const vec_uchar16 qperm = {112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_uint16x8& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } + +inline int v_signmask(const v_int32x4& a) +{ + static const vec_uchar16 qperm = {96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}; + return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2); +} +inline int v_signmask(const v_uint32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } + +inline int v_signmask(const v_int64x2& a) +{ + VSX_UNUSED(const vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63)); + return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1; +} +inline int v_signmask(const v_uint64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); } + +template +inline bool v_check_all(const _Tpvec& a) +{ return vec_all_lt(a.val, _Tpvec::zero().val); } +inline bool v_check_all(const v_uint8x16& a) +{ return v_check_all(v_reinterpret_as_s8(a)); } +inline bool v_check_all(const v_uint16x8& a) +{ return v_check_all(v_reinterpret_as_s16(a)); } +inline bool v_check_all(const v_uint32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_uint64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_s32(a)); } +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_s64(a)); } + +template +inline bool v_check_any(const _Tpvec& a) +{ return vec_any_lt(a.val, _Tpvec::zero().val); } +inline bool v_check_any(const v_uint8x16& a) +{ return v_check_any(v_reinterpret_as_s8(a)); } +inline bool v_check_any(const v_uint16x8& a) +{ return v_check_any(v_reinterpret_as_s16(a)); } +inline bool v_check_any(const v_uint32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_uint64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_s32(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_s64(a)); } + +////////// Other math ///////// + +/** Some frequent operations **/ +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ return v_float32x4(vec_sqrt(x.val)); } +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ return v_float64x2(vec_sqrt(x.val)); } + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ return v_float32x4(vec_rsqrt(x.val)); } +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ return v_float64x2(vec_rsqrt(x.val)); } + +#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec) \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); } \ +inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ return _Tpvec(vec_madd(a.val, b.val, c.val)); } + +OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4) +OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2) + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ return a * b + c; } + +// TODO: exp, log, sin, cos + +/** Absolute values **/ +inline v_uint8x16 v_abs(const v_int8x16& x) +{ return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); } + +inline v_uint16x8 v_abs(const v_int16x8& x) +{ return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); } + +inline v_uint32x4 v_abs(const v_int32x4& x) +{ return v_uint32x4(vec_uint4_c(vec_abs(x.val))); } + +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(vec_abs(x.val)); } + +inline v_float64x2 v_abs(const v_float64x2& x) +{ return v_float64x2(vec_abs(x.val)); } + +/** Absolute difference **/ +// unsigned +OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd) + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ return v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); } +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); } +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ return v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); } + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ return v_abs(a - b); } +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ return v_abs(a - b); } + +/** Absolute difference for signed integers **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ return v_int8x16(vec_abss(vec_subs(a.val, b.val))); } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_int16x8(vec_abss(vec_subs(a.val, b.val))); } + +////////// Conversions ///////// + +/** Rounding **/ +inline v_int32x4 v_round(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_rint(a.val))); } + +inline v_int32x4 v_round(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); } + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); } + +inline v_int32x4 v_floor(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_floor(a.val))); } + +inline v_int32x4 v_floor(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); } + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ return v_int32x4(vec_cts(vec_ceil(a.val))); } + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); } + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(vec_cts(a.val)); } + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); } + +/** To float **/ +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ return v_float32x4(vec_ctf(a.val)); } + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); } + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_cvfo(b.val))); } + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ return v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ return v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); } + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ return v_float64x2(vec_ctd(a.val)); } + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + return v_reinterpret_as_s8(v_int16x8(*(const short*)(tab+idx[0]), *(const short*)(tab+idx[1]), *(const short*)(tab+idx[2]), *(const short*)(tab+idx[3]), + *(const short*)(tab+idx[4]), *(const short*)(tab+idx[5]), *(const short*)(tab+idx[6]), *(const short*)(tab+idx[7]))); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + return v_reinterpret_as_s8(v_int32x4(*(const int*)(tab+idx[0]), *(const int*)(tab+idx[1]), *(const int*)(tab+idx[2]), *(const int*)(tab+idx[3]))); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + return v_reinterpret_as_s16(v_int32x4(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_reinterpret_as_s16(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1]))); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_reinterpret_as_s32(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1]))); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(vsx_ld(0, tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(vsx_ld2(0, tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int*)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_load(tab + *idx); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_load(tab + *idx); } + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_uint32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + const int idx[4] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1), + vec_extract(idxvec.val, 2), + vec_extract(idxvec.val, 3) + }; + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + const int idx[2] = { + vec_extract(idxvec.val, 0), + vec_extract(idxvec.val, 1) + }; + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + vec_float4 xy0 = vec_ld_l8(tab + vec_extract(idxvec.val, 0)); + vec_float4 xy1 = vec_ld_l8(tab + vec_extract(idxvec.val, 1)); + vec_float4 xy2 = vec_ld_l8(tab + vec_extract(idxvec.val, 2)); + vec_float4 xy3 = vec_ld_l8(tab + vec_extract(idxvec.val, 3)); + vec_float4 xy02 = vec_mergeh(xy0, xy2); // x0, x2, y0, y2 + vec_float4 xy13 = vec_mergeh(xy1, xy3); // x1, x3, y1, y3 + x.val = vec_mergeh(xy02, xy13); + y.val = vec_mergel(xy02, xy13); +} +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + vec_double2 xy0 = vsx_ld(vec_extract(idxvec.val, 0), tab); + vec_double2 xy1 = vsx_ld(vec_extract(idxvec.val, 1), tab); + x.val = vec_mergeh(xy0, xy1); + y.val = vec_mergel(xy0, xy1); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } + +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 4,5, 2,3, 6,7, 8,9, 12,13, 10,11, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } + +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + static const vec_uchar16 perm = {0,1,2,3, 8,9,10,11, 4,5,6,7, 12,13,14,15}; + return v_int32x4(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) +{ return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) +{ return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + static const vec_uchar16 perm = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 15, 15, 15}; + return v_int8x16(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) +{ return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + static const vec_uchar16 perm = {0,1, 2,3, 4,5, 8,9, 10,11, 12,13, 14,15, 14,15}; + return v_int16x8(vec_perm(vec.val, vec.val, perm)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) +{ return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) +{ return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) +{ return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) +{ return vec; } + +/////// FP16 support //////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr); +#if CV_VSX3 && defined(vec_extract_fp_from_shorth) + return v_float32x4(vec_extract_fp_from_shorth(vf16)); +#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_float4 vf32; + __asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wa" (vf32) : "wa" (vec_mergeh(vf16, vf16))); + return v_float32x4(vf32); +#else + const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000); + const vec_int4 signmask = vec_int4_sp(0x80000000); + const vec_int4 maxexp = vec_int4_sp(0x7c000000); + const vec_float4 deltaf = vec_float4_c(vec_int4_sp(0x38800000)); + + vec_int4 bits = vec_int4_c(vec_mergeh(vec_short8_c(z), vec_short8_c(vf16))); + vec_int4 e = vec_and(bits, maxexp), sign = vec_and(bits, signmask); + vec_int4 t = vec_add(vec_sr(vec_xor(bits, sign), vec_uint4_sp(3)), delta); // ((h & 0x7fff) << 13) + delta + vec_int4 zt = vec_int4_c(vec_sub(vec_float4_c(vec_add(t, vec_int4_sp(1 << 23))), deltaf)); + + t = vec_add(t, vec_and(delta, vec_cmpeq(maxexp, e))); + vec_bint4 zmask = vec_cmpeq(e, z); + vec_int4 ft = vec_sel(t, zt, zmask); + return v_float32x4(vec_float4_c(vec_or(ft, sign))); +#endif +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ +// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"? +#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_ushort8 vf16; + __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (v.val)); + vec_st_l8(vec_mergesqe(vf16, vf16), ptr); +#else + const vec_int4 signmask = vec_int4_sp(0x80000000); + const vec_int4 rval = vec_int4_sp(0x3f000000); + + vec_int4 t = vec_int4_c(v.val); + vec_int4 sign = vec_sra(vec_and(t, signmask), vec_uint4_sp(16)); + t = vec_and(vec_nor(signmask, signmask), t); + + vec_bint4 finitemask = vec_cmpgt(vec_int4_sp(0x47800000), t); + vec_bint4 isnan = vec_cmpgt(t, vec_int4_sp(0x7f800000)); + vec_int4 naninf = vec_sel(vec_int4_sp(0x7c00), vec_int4_sp(0x7e00), isnan); + vec_bint4 tinymask = vec_cmpgt(vec_int4_sp(0x38800000), t); + vec_int4 tt = vec_int4_c(vec_add(vec_float4_c(t), vec_float4_c(rval))); + tt = vec_sub(tt, rval); + vec_int4 odd = vec_and(vec_sr(t, vec_uint4_sp(13)), vec_int4_sp(1)); + vec_int4 nt = vec_add(t, vec_int4_sp(0xc8000fff)); + nt = vec_sr(vec_add(nt, odd), vec_uint4_sp(13)); + t = vec_sel(nt, tt, tinymask); + t = vec_sel(naninf, t, finitemask); + t = vec_or(t, sign); + vec_st_l8(vec_packs(t, t), ptr); +#endif +} + +inline void v_cleanup() {} + + +/** Reinterpret **/ +/** its up there with load and store operations **/ + +////////// Matrix operations ///////// + +//////// Dot Product //////// +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); } +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(vec_msum(a.val, b.val, c.val)); } + +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + vec_dword2 even = vec_mule(a.val, b.val); + vec_dword2 odd = vec_mulo(a.val, b.val); + return v_int64x2(vec_add(even, odd)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b) + c; } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_uint32x4(vec_msum(a.val, b.val, c.val)); } +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)); } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + const vec_ushort8 eight = vec_ushort8_sp(8); + vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even + vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd + vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight); + vec_short8 b1 = vec_sra((vec_short8)b.val, eight); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z))); +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + const vec_ushort8 eight = vec_ushort8_sp(8); + vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even + vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd + vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight); + vec_short8 b1 = vec_sra((vec_short8)b.val, eight); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, c.val))); +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + const vec_uint4 zero = vec_uint4_z; + vec_uint4 even = vec_mule(a.val, b.val); + vec_uint4 odd = vec_mulo(a.val, b.val); + vec_udword2 e0 = (vec_udword2)vec_mergee(even, zero); + vec_udword2 e1 = (vec_udword2)vec_mergeo(even, zero); + vec_udword2 o0 = (vec_udword2)vec_mergee(odd, zero); + vec_udword2 o1 = (vec_udword2)vec_mergeo(odd, zero); + vec_udword2 s0 = vec_add(e0, o0); + vec_udword2 s1 = vec_add(e1, o1); + return v_uint64x2(vec_add(s0, s1)); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return v_int64x2(vec_add(vec_mergeh(c.val, d.val), vec_mergel(c.val, d.val))); +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)) + c; } +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)) + c; } + +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + vec_short8 a0 = vec_unpackh(a.val); + vec_short8 a1 = vec_unpackl(a.val); + vec_short8 b0 = vec_unpackh(b.val); + vec_short8 b1 = vec_unpackl(b.val); + return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z))); +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 prod = v_dotprod(a, b); + v_int64x2 c, d; + v_expand(prod, c, d); + return c + d; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + const vec_float4 v0 = vec_splat(v.val, 0); + const vec_float4 v1 = vec_splat(v.val, 1); + const vec_float4 v2 = vec_splat(v.val, 2); + VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3); + return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val))))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + const vec_float4 v0 = vec_splat(v.val, 0); + const vec_float4 v1 = vec_splat(v.val, 1); + const vec_float4 v2 = vec_splat(v.val, 2); + return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val)))); +} + +#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \ +{ \ + _Tpvec2 a02 = vec_mergeh(a0.val, a2.val); \ + _Tpvec2 a13 = vec_mergeh(a1.val, a3.val); \ + b0.val = vec_mergeh(a02, a13); \ + b1.val = vec_mergel(a02, a13); \ + a02 = vec_mergel(a0.val, a2.val); \ + a13 = vec_mergel(a1.val, a3.val); \ + b2.val = vec_mergeh(a02, a13); \ + b3.val = vec_mergel(a02, a13); \ +} +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4) +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4) +OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4) + +template +inline Tvec v_broadcast_element(const Tvec& v) +{ return Tvec(vec_splat(v.val, i)); } + + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif // OPENCV_HAL_VSX_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_wasm.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_wasm.hpp new file mode 100644 index 0000000..b4178af --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/intrin_wasm.hpp @@ -0,0 +1,2782 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_HAL_INTRIN_WASM_HPP +#define OPENCV_HAL_INTRIN_WASM_HPP + +#include +#include +#include +#include "opencv2/core/saturate.hpp" + +#define CV_SIMD128 1 +#define CV_SIMD128_64F 0 // Now all implementation of f64 use fallback, so disable it. +#define CV_SIMD128_FP16 0 + +namespace cv +{ + +//! @cond IGNORED + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + +#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) < (1038046) +// handle renames: https://github.com/emscripten-core/emscripten/pull/9440 (https://github.com/emscripten-core/emscripten/commit/755d5b46cb84d0aa120c10981b11d05646c29673) +#define wasm_i32x4_trunc_saturate_f32x4 wasm_trunc_saturate_i32x4_f32x4 +#define wasm_u32x4_trunc_saturate_f32x4 wasm_trunc_saturate_u32x4_f32x4 +#define wasm_i64x2_trunc_saturate_f64x2 wasm_trunc_saturate_i64x2_f64x2 +#define wasm_u64x2_trunc_saturate_f64x2 wasm_trunc_saturate_u64x2_f64x2 +#define wasm_f32x4_convert_i32x4 wasm_convert_f32x4_i32x4 +#define wasm_f32x4_convert_u32x4 wasm_convert_f32x4_u32x4 +#define wasm_f64x2_convert_i64x2 wasm_convert_f64x2_i64x2 +#define wasm_f64x2_convert_u64x2 wasm_convert_f64x2_u64x2 +#endif // COMPATIBILITY: <1.38.46 + +///////// Types /////////// + +struct v_uint8x16 +{ + typedef uchar lane_type; + typedef v128_t vector_type; + enum { nlanes = 16 }; + + v_uint8x16() {} + explicit v_uint8x16(v128_t v) : val(v) {} + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = wasm_v128_load(v); + } + + uchar get0() const + { + return (uchar)wasm_i8x16_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + typedef v128_t vector_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(v128_t v) : val(v) {} + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + val = wasm_v128_load(v); + } + + schar get0() const + { + return wasm_i8x16_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + typedef v128_t vector_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(v128_t v) : val(v) {} + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = wasm_v128_load(v); + } + + ushort get0() const + { + return (ushort)wasm_i16x8_extract_lane(val, 0); // wasm_u16x8_extract_lane() unimplemented yet + } + + v128_t val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + typedef v128_t vector_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(v128_t v) : val(v) {} + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + val = wasm_v128_load(v); + } + + short get0() const + { + return wasm_i16x8_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(v128_t v) : val(v) {} + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + + unsigned get0() const + { + return (unsigned)wasm_i32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(v128_t v) : val(v) {} + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + + int get0() const + { + return wasm_i32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + typedef v128_t vector_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(v128_t v) : val(v) {} + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + val = wasm_v128_load(v); + } + + float get0() const + { + return wasm_f32x4_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(v128_t v) : val(v) {} + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + val = wasm_v128_load(v); + } + + uint64 get0() const + { + return (uint64)wasm_i64x2_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(v128_t v) : val(v) {} + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + val = wasm_v128_load(v); + } + + int64 get0() const + { + return wasm_i64x2_extract_lane(val, 0); + } + + v128_t val; +}; + +struct v_float64x2 +{ + typedef double lane_type; + typedef v128_t vector_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(v128_t v) : val(v) {} + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + val = wasm_v128_load(v); + } + + double get0() const + { + return wasm_f64x2_extract_lane(val, 0); + } + + v128_t val; +}; + +namespace +{ +#define OPENCV_HAL_IMPL_REINTERPRET_INT(ft, tt) \ +inline tt reinterpret_int(ft x) { union { ft l; tt i; } v; v.l = x; return v.i; } +OPENCV_HAL_IMPL_REINTERPRET_INT(uchar, schar) +OPENCV_HAL_IMPL_REINTERPRET_INT(schar, schar) +OPENCV_HAL_IMPL_REINTERPRET_INT(ushort, short) +OPENCV_HAL_IMPL_REINTERPRET_INT(short, short) +OPENCV_HAL_IMPL_REINTERPRET_INT(unsigned, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(int, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(float, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(uint64, int64) +OPENCV_HAL_IMPL_REINTERPRET_INT(int64, int64) +OPENCV_HAL_IMPL_REINTERPRET_INT(double, int64) + +static const unsigned char popCountTable[] = +{ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, +}; +} // namespace + +static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); +} + +static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23); +} + +static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23); +} + +static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); +} + +static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31); +} + +static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31); +} + +static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31); +} + +static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) { + return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +/** Convert **/ +// 8 >> 16 +inline v128_t v128_cvtu8x16_i16x8(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i8x16(a, z); +} +inline v128_t v128_cvti8x16_i16x8(const v128_t& a) +{ return wasm_i16x8_shr(wasm_unpacklo_i8x16(a, a), 8); } +// 8 >> 32 +inline v128_t v128_cvtu8x16_i32x4(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i16x8(wasm_unpacklo_i8x16(a, z), z); +} +inline v128_t v128_cvti8x16_i32x4(const v128_t& a) +{ + v128_t r = wasm_unpacklo_i8x16(a, a); + r = wasm_unpacklo_i8x16(r, r); + return wasm_i32x4_shr(r, 24); +} +// 16 >> 32 +inline v128_t v128_cvtu16x8_i32x4(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i16x8(a, z); +} +inline v128_t v128_cvti16x8_i32x4(const v128_t& a) +{ return wasm_i32x4_shr(wasm_unpacklo_i16x8(a, a), 16); } +// 32 >> 64 +inline v128_t v128_cvtu32x4_i64x2(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpacklo_i32x4(a, z); +} +inline v128_t v128_cvti32x4_i64x2(const v128_t& a) +{ return wasm_unpacklo_i32x4(a, wasm_i32x4_shr(a, 31)); } + +// 16 << 8 +inline v128_t v128_cvtu8x16_i16x8_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i8x16(a, z); +} +inline v128_t v128_cvti8x16_i16x8_high(const v128_t& a) +{ return wasm_i16x8_shr(wasm_unpackhi_i8x16(a, a), 8); } +// 32 << 16 +inline v128_t v128_cvtu16x8_i32x4_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i16x8(a, z); +} +inline v128_t v128_cvti16x8_i32x4_high(const v128_t& a) +{ return wasm_i32x4_shr(wasm_unpackhi_i16x8(a, a), 16); } +// 64 << 32 +inline v128_t v128_cvtu32x4_i64x2_high(const v128_t& a) +{ + const v128_t z = wasm_i8x16_splat(0); + return wasm_unpackhi_i32x4(a, z); +} +inline v128_t v128_cvti32x4_i64x2_high(const v128_t& a) +{ return wasm_unpackhi_i32x4(a, wasm_i32x4_shr(a, 31)); } + +#define OPENCV_HAL_IMPL_WASM_INITVEC(_Tpvec, _Tp, suffix, zsuffix, _Tps) \ +inline _Tpvec v_setzero_##suffix() { return _Tpvec(wasm_##zsuffix##_splat((_Tps)0)); } \ +inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(wasm_##zsuffix##_splat((_Tps)v)); } \ +template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ +{ return _Tpvec(a.val); } + +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint8x16, uchar, u8, i8x16, schar) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int8x16, schar, s8, i8x16, schar) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint16x8, ushort, u16, i16x8, short) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int16x8, short, s16, i16x8, short) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint32x4, unsigned, u32, i32x4, int) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int32x4, int, s32, i32x4, int) +OPENCV_HAL_IMPL_WASM_INITVEC(v_float32x4, float, f32, f32x4, float) +OPENCV_HAL_IMPL_WASM_INITVEC(v_uint64x2, uint64, u64, i64x2, int64) +OPENCV_HAL_IMPL_WASM_INITVEC(v_int64x2, int64, s64, i64x2, int64) +OPENCV_HAL_IMPL_WASM_INITVEC(v_float64x2, double, f64, f64x2, double) + +//////////////// PACK /////////////// +inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); + return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); + return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); + return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b) +{ + return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); + return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); + return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} + +template +inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_u16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); + return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_u32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval)); + return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +template +inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i16x8_splat(-32768); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); + return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} +template +inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n); + return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +template +inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n); + return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); +} +template +inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); + return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} +template +inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); + return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); +} + +inline void v_pack_store(uchar* ptr, const v_uint16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(schar* ptr, const v_int16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + schar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(ushort* ptr, const v_uint32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(short* ptr, const v_int32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + short t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + unsigned t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_store(int* ptr, const v_int64x2& a) +{ + v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + int t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) +{ + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); + v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} + +template +inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a) +{ + v128_t delta = wasm_i16x8_splat((short)(1 << (n-1))); + v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(127); + v128_t minval = wasm_i16x8_splat(-128); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + schar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); + v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(short* ptr, const v_int32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(32767); + v128_t minval = wasm_i32x4_splat(-32768); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + short t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + unsigned t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_store(int* ptr, const v_int64x2& a) +{ + v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); + v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); + v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + int t_ptr[4]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<2; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) +{ + v128_t delta = wasm_i16x8_splat(((short)1 << (n-1))); + v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n); + v128_t maxval = wasm_i16x8_splat(255); + v128_t minval = wasm_i16x8_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + uchar t_ptr[16]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<8; ++i) { + ptr[i] = t_ptr[i]; + } +} +template +inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) +{ + v128_t delta = wasm_i32x4_splat(((int)1 << (n-1))); + v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n); + v128_t maxval = wasm_i32x4_splat(65535); + v128_t minval = wasm_i32x4_splat(0); + v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); + v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); + v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + ushort t_ptr[8]; + wasm_v128_store(t_ptr, r); + for (int i=0; i<4; ++i) { + ptr[i] = t_ptr[i]; + } +} + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t maxval = wasm_i16x8_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); + return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + v128_t maxval = wasm_i32x4_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); + v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval)); + v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval)); + v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + v128_t maxval = wasm_i32x4_splat(255); + v128_t a1 = wasm_v128_bitselect(maxval, a.val, ((__u64x2)(a.val) > (__u64x2)maxval)); + v128_t b1 = wasm_v128_bitselect(maxval, b.val, ((__u64x2)(b.val) > (__u64x2)maxval)); + v128_t c1 = wasm_v128_bitselect(maxval, c.val, ((__u64x2)(c.val) > (__u64x2)maxval)); + v128_t d1 = wasm_v128_bitselect(maxval, d.val, ((__u64x2)(d.val) > (__u64x2)maxval)); + v128_t e1 = wasm_v128_bitselect(maxval, e.val, ((__u64x2)(e.val) > (__u64x2)maxval)); + v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval)); + v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval)); + v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval)); + v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); +} + +inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& m3) +{ + v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0)); + v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1)); + v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2)); + v128_t v3 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 3)); + v0 = wasm_f32x4_mul(v0, m0.val); + v1 = wasm_f32x4_mul(v1, m1.val); + v2 = wasm_f32x4_mul(v2, m2.val); + v3 = wasm_f32x4_mul(v3, m3.val); + + return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, v3))); +} + +inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, + const v_float32x4& m1, const v_float32x4& m2, + const v_float32x4& a) +{ + v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0)); + v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1)); + v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2)); + v0 = wasm_f32x4_mul(v0, m0.val); + v1 = wasm_f32x4_mul(v1, m1.val); + v2 = wasm_f32x4_mul(v2, m2.val); + + return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, a.val))); +} + +#define OPENCV_HAL_IMPL_WASM_BIN_OP(bin_op, _Tpvec, intrin) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a.val = intrin(a.val, b.val); \ + return a; \ +} + +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint8x16, wasm_u8x16_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint8x16, wasm_u8x16_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int8x16, wasm_i8x16_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int8x16, wasm_i8x16_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint16x8, wasm_u16x8_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint16x8, wasm_u16x8_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int16x8, wasm_i16x8_add_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int16x8, wasm_i16x8_sub_saturate) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint32x4, wasm_i32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint32x4, wasm_i32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_uint32x4, wasm_i32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int32x4, wasm_i32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int32x4, wasm_i32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_int32x4, wasm_i32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float32x4, wasm_f32x4_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float32x4, wasm_f32x4_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float32x4, wasm_f32x4_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float32x4, wasm_f32x4_div) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint64x2, wasm_i64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint64x2, wasm_i64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int64x2, wasm_i64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int64x2, wasm_i64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float64x2, wasm_f64x2_add) +OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float64x2, wasm_f64x2_sub) +OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float64x2, wasm_f64x2_mul) +OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float64x2, wasm_f64x2_div) + +// saturating multiply 8-bit, 16-bit +#define OPENCV_HAL_IMPL_WASM_MUL_SAT(_Tpvec, _Tpwvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _Tpwvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{ a = a * b; return a; } + +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int16x8, v_int32x4) + +// Multiply and expand +inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b, + v_uint16x8& c, v_uint16x8& d) +{ + v_uint16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b, + v_int16x8& c, v_int16x8& d) +{ + v_int16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c = v_mul_wrap(a0, b0); + d = v_mul_wrap(a1, b1); +} + +inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, + v_int32x4& c, v_int32x4& d) +{ + v_int32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = wasm_i32x4_mul(a0.val, b0.val); + d.val = wasm_i32x4_mul(a1.val, b1.val); +} + +inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, + v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = wasm_i32x4_mul(a0.val, b0.val); + d.val = wasm_i32x4_mul(a1.val, b1.val); +} + +inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, + v_uint64x2& c, v_uint64x2& d) +{ + v_uint64x2 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + c.val = ((__u64x2)(a0.val) * (__u64x2)(b0.val)); + d.val = ((__u64x2)(a1.val) * (__u64x2)(b1.val)); +} + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + v_int32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + v128_t c = wasm_i32x4_mul(a0.val, b0.val); + v128_t d = wasm_i32x4_mul(a1.val, b1.val); + return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + v128_t c = wasm_i32x4_mul(a0.val, b0.val); + v128_t d = wasm_i32x4_mul(a1.val, b1.val); + return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); +} + +//////// Dot Product //////// + +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16); + v128_t a1 = wasm_i32x4_shr(a.val, 16); + v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16); + v128_t b1 = wasm_i32x4_shr(b.val, 16); + v128_t c = wasm_i32x4_mul(a0, b0); + v128_t d = wasm_i32x4_mul(a1, b1); + return v_int32x4(wasm_i32x4_add(c, d)); +} + +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b) + c; } + +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) +{ + v128_t a0 = wasm_i64x2_shr(wasm_i64x2_shl(a.val, 32), 32); + v128_t a1 = wasm_i64x2_shr(a.val, 32); + v128_t b0 = wasm_i64x2_shr(wasm_i64x2_shl(b.val, 32), 32); + v128_t b1 = wasm_i64x2_shr(b.val, 32); + v128_t c = (v128_t)((__i64x2)a0 * (__i64x2)b0); + v128_t d = (v128_t)((__i64x2)a1 * (__i64x2)b1); + return v_int64x2(wasm_i64x2_add(c, d)); +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + return v_dotprod(a, b) + c; +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + v128_t a0 = wasm_u16x8_shr(wasm_i16x8_shl(a.val, 8), 8); + v128_t a1 = wasm_u16x8_shr(a.val, 8); + v128_t b0 = wasm_u16x8_shr(wasm_i16x8_shl(b.val, 8), 8); + v128_t b1 = wasm_u16x8_shr(b.val, 8); + return v_uint32x4(( + v_dotprod(v_int16x8(a0), v_int16x8(b0)) + + v_dotprod(v_int16x8(a1), v_int16x8(b1))).val + ); +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + v128_t a0 = wasm_i16x8_shr(wasm_i16x8_shl(a.val, 8), 8); + v128_t a1 = wasm_i16x8_shr(a.val, 8); + v128_t b0 = wasm_i16x8_shr(wasm_i16x8_shl(b.val, 8), 8); + v128_t b1 = wasm_i16x8_shr(b.val, 8); + return v_int32x4( + v_dotprod(v_int16x8(a0), v_int16x8(b0)) + + v_dotprod(v_int16x8(a1), v_int16x8(b1)) + ); +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b) + c; } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + v128_t a0 = wasm_u32x4_shr(wasm_i32x4_shl(a.val, 16), 16); + v128_t a1 = wasm_u32x4_shr(a.val, 16); + v128_t b0 = wasm_u32x4_shr(wasm_i32x4_shl(b.val, 16), 16); + v128_t b1 = wasm_u32x4_shr(b.val, 16); + return v_uint64x2(( + v_dotprod(v_int32x4(a0), v_int32x4(b0)) + + v_dotprod(v_int32x4(a1), v_int32x4(b1))).val + ); +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16); + v128_t a1 = wasm_i32x4_shr(a.val, 16); + v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16); + v128_t b1 = wasm_i32x4_shr(b.val, 16); + return v_int64x2(( + v_dotprod(v_int32x4(a0), v_int32x4(b0)) + + v_dotprod(v_int32x4(a1), v_int32x4(b1))) + ); +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod(a, b); } +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ return v_dotprod(a, b, c); } + +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod(a, b); } +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ return v_dotprod(a, b, c); } + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ return v_dotprod_expand(a, b); } +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ return v_dotprod_expand(a, b, c); } + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ return v_dotprod_expand(a, b, c); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ return v_dotprod_expand(a, b); } +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ return v_dotprod_expand(a, b, c); } + +// 32 >> 64f +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_dotprod_expand(a, b); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand(a, b, c); } + +#define OPENCV_HAL_IMPL_WASM_LOGIC_OP(_Tpvec) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(&, _Tpvec, wasm_v128_and) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(|, _Tpvec, wasm_v128_or) \ +OPENCV_HAL_IMPL_WASM_BIN_OP(^, _Tpvec, wasm_v128_xor) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + return _Tpvec(wasm_v128_not(a.val)); \ +} + +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint8x16) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int8x16) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint16x8) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int16x8) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint64x2) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int64x2) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float32x4) +OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float64x2) + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + return v_float32x4(wasm_f32x4_sqrt(x.val)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + const v128_t _1_0 = wasm_f32x4_splat(1.0); + return v_float32x4(wasm_f32x4_div(_1_0, wasm_f32x4_sqrt(x.val))); +} + +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + return v_float64x2(wasm_f64x2_sqrt(x.val)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + const v128_t _1_0 = wasm_f64x2_splat(1.0); + return v_float64x2(wasm_f64x2_div(_1_0, wasm_f64x2_sqrt(x.val))); +} + +#define OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(_Tpuvec, _Tpsvec, suffix, zsuffix, shiftWidth) \ +inline _Tpuvec v_abs(const _Tpsvec& x) \ +{ \ + v128_t s = wasm_##suffix##_shr(x.val, shiftWidth); \ + v128_t f = wasm_##zsuffix##_shr(x.val, shiftWidth); \ + return _Tpuvec(wasm_##zsuffix##_add(wasm_v128_xor(x.val, f), s)); \ +} + +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint8x16, v_int8x16, u8x16, i8x16, 7) +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint16x8, v_int16x8, u16x8, i16x8, 15) +OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint32x4, v_int32x4, u32x4, i32x4, 31) + +inline v_float32x4 v_abs(const v_float32x4& x) +{ return v_float32x4(wasm_f32x4_abs(x.val)); } +inline v_float64x2 v_abs(const v_float64x2& x) +{ + return v_float64x2(wasm_f64x2_abs(x.val)); +} + +// TODO: exp, log, sin, cos + +#define OPENCV_HAL_IMPL_WASM_BIN_FUNC(_Tpvec, func, intrin) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(intrin(a.val, b.val)); \ +} + +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_min, wasm_f32x4_min) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_max, wasm_f32x4_max) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_min, wasm_f64x2_min) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_max, wasm_f64x2_max) + +#define OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(_Tpvec, suffix) \ +inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(b.val, a.val, wasm_##suffix##_gt(a.val, b.val))); \ +} \ +inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, wasm_##suffix##_gt(a.val, b.val))); \ +} + +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int8x16, i8x16) +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int16x8, i16x8) +OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int32x4, i32x4) + +#define OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(_Tpvec, suffix, deltaNum) \ +inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t delta = wasm_##suffix##_splat(deltaNum); \ + v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \ + return _Tpvec(wasm_v128_bitselect(b.val, a.val, mask)); \ +} \ +inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t delta = wasm_##suffix##_splat(deltaNum); \ + v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask)); \ +} + +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint8x16, i8x16, (schar)0x80) +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint16x8, i16x8, (short)0x8000) +OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint32x4, i32x4, (int)0x80000000) + +#define OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(_Tpvec, suffix, esuffix) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##esuffix##_eq(a.val, b.val)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##esuffix##_ne(a.val, b.val)); } \ +inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_lt(a.val, b.val)); } \ +inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_gt(a.val, b.val)); } \ +inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_le(a.val, b.val)); } \ +inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ +{ return _Tpvec(wasm_##suffix##_ge(a.val, b.val)); } + +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint8x16, u8x16, i8x16) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int8x16, i8x16, i8x16) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint16x8, u16x8, i16x8) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int16x8, i16x8, i16x8) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint32x4, u32x4, i32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int32x4, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float32x4, f32x4, f32x4) +OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float64x2, f64x2, f64x2) + +#define OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(_Tpvec, cast) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); } + +OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64) +OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64) + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ + v128_t z = wasm_i32x4_splat(0x7fffffff); + v128_t t = wasm_i32x4_splat(0x7f800000); + return v_float32x4(wasm_u32x4_lt(wasm_v128_and(a.val, z), t)); +} +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ + v128_t z = wasm_i64x2_splat(0x7fffffffffffffff); + v128_t t = wasm_i64x2_splat(0x7ff0000000000000); + return v_float64x2((__u64x2)(wasm_v128_and(a.val, z)) < (__u64x2)t); +} + +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_add_wrap, wasm_i8x16_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_add_wrap, wasm_i8x16_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_add_wrap, wasm_i16x8_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_add_wrap, wasm_i16x8_add) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_sub_wrap, wasm_i8x16_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_sub_wrap, wasm_i8x16_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_sub_wrap, wasm_i16x8_sub) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_sub_wrap, wasm_i16x8_sub) +#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) >= (1039012) +// details: https://github.com/opencv/opencv/issues/18097 ( https://github.com/emscripten-core/emscripten/issues/12018 ) +// 1.39.12: https://github.com/emscripten-core/emscripten/commit/cd801d0f110facfd694212a3c8b2ed2ffcd630e2 +inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b) +{ + uchar a_[16], b_[16]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + for (int i = 0; i < 16; i++) + a_[i] = (uchar)(a_[i] * b_[i]); + return v_uint8x16(wasm_v128_load(a_)); +} +inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b) +{ + schar a_[16], b_[16]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + for (int i = 0; i < 16; i++) + a_[i] = (schar)(a_[i] * b_[i]); + return v_int8x16(wasm_v128_load(a_)); +} +#else +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_mul_wrap, wasm_i8x16_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_mul_wrap, wasm_i8x16_mul) +#endif +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_mul_wrap, wasm_i16x8_mul) +OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_mul_wrap, wasm_i16x8_mul) + + +/** Absolute difference **/ + +inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b) +{ return v_add_wrap(a - b, b - a); } +inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b) +{ return v_max(a, b) - v_min(a, b); } + +inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = v_sub_wrap(a, b); + v_int8x16 m = a < b; + return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m)); +} +inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b) +{ + return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); +} +inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b) +{ + v_int32x4 d = a - b; + v_int32x4 m = a < b; + return v_reinterpret_as_u32((d ^ m) - m); +} + +/** Saturating absolute difference **/ +inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b) +{ + v_int8x16 d = a - b; + v_int8x16 m = a < b; + return (d ^ m) - m; + } +inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b) +{ return v_max(a, b) - v_min(a, b); } + + +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return a * b + c; +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return a * b + c; +} + +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return a * b + c; +} + +inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +{ + v128_t absmask_vec = wasm_i32x4_splat(0x7fffffff); + return v_float32x4(wasm_v128_and(wasm_f32x4_sub(a.val, b.val), absmask_vec)); +} +inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +{ + v128_t absmask_vec = wasm_u64x2_shr(wasm_i32x4_splat(-1), 1); + return v_float64x2(wasm_v128_and(wasm_f64x2_sub(a.val, b.val), absmask_vec)); +} + +#define OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(_Tpvec, suffix) \ +inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \ + v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \ + return _Tpvec(wasm_##suffix##_sqrt(wasm_##suffix##_add(a_Square, b_Square))); \ +} \ +inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \ +{ \ + v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \ + v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \ + return _Tpvec(wasm_##suffix##_add(a_Square, b_Square)); \ +} \ +inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ +{ \ + return _Tpvec(wasm_##suffix##_add(wasm_##suffix##_mul(a.val, b.val), c.val)); \ +} + +OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float32x4, f32x4) +OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float64x2, f64x2) + +#define OPENCV_HAL_IMPL_WASM_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, ssuffix) \ +inline _Tpuvec operator << (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +inline _Tpsvec operator << (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \ +{ \ + return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ +} \ +inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \ +{ \ + return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shl(const _Tpuvec& a) \ +{ \ + return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shl(const _Tpsvec& a) \ +{ \ + return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ +} \ +template \ +inline _Tpuvec v_shr(const _Tpuvec& a) \ +{ \ + return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ +} \ +template \ +inline _Tpsvec v_shr(const _Tpsvec& a) \ +{ \ + return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \ +} + +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint8x16, v_int8x16, i8x16, u8x16) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint16x8, v_int16x8, i16x8, u16x8) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint32x4, v_int32x4, i32x4, u32x4) +OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint64x2, v_int64x2, i64x2, u64x2) + +namespace hal_wasm_internal +{ + template 16)), + bool is_first = (imm == 0), + bool is_second = (imm == 16), + bool is_other = (((imm > 0) && (imm < 16)))> + class v_wasm_palignr_u8_class; + + template + class v_wasm_palignr_u8_class; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t& a, const v128_t&) const + { + return a; + } + }; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t&, const v128_t& b) const + { + return b; + } + }; + + template + class v_wasm_palignr_u8_class + { + public: + inline v128_t operator()(const v128_t& a, const v128_t& b) const + { + enum { imm2 = (sizeof(v128_t) - imm) }; + return wasm_v8x16_shuffle(a, b, + imm, imm+1, imm+2, imm+3, + imm+4, imm+5, imm+6, imm+7, + imm+8, imm+9, imm+10, imm+11, + imm+12, imm+13, imm+14, imm+15); + } + }; + + template + inline v128_t v_wasm_palignr_u8(const v128_t& a, const v128_t& b) + { + CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_wasm_palignr_u8."); + return v_wasm_palignr_u8_class()(a, b); + } +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a) +{ + using namespace hal_wasm_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + v128_t z = wasm_i8x16_splat(0); + return _Tpvec(v_wasm_palignr_u8(a.val, z)); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a) +{ + using namespace hal_wasm_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + v128_t z = wasm_i8x16_splat(0); + return _Tpvec(v_wasm_palignr_u8(z, a.val)); +} + +template +inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_wasm_internal; + enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_wasm_palignr_u8(a.val, b.val)); +} + +template +inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) +{ + using namespace hal_wasm_internal; + enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) }; + return _Tpvec(v_wasm_palignr_u8(b.val, a.val)); +} + +#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(_Tpvec, _Tp) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ return _Tpvec(wasm_v128_load(ptr)); } \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ return _Tpvec(wasm_v128_load(ptr)); } \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ + _Tp tmp[_Tpvec::nlanes] = {0}; \ + for (int i=0; i<_Tpvec::nlanes/2; ++i) { \ + tmp[i] = ptr[i]; \ + } \ + return _Tpvec(wasm_v128_load(tmp)); \ +} \ +inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ +{ \ + _Tp tmp[_Tpvec::nlanes]; \ + for (int i=0; i<_Tpvec::nlanes/2; ++i) { \ + tmp[i] = ptr0[i]; \ + tmp[i+_Tpvec::nlanes/2] = ptr1[i]; \ + } \ + return _Tpvec(wasm_v128_load(tmp)); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ wasm_v128_store(ptr, a.val); } \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ \ + wasm_v128_store(ptr, a.val); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \ + ptr[i] = a_[i]; \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ +{ \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \ + ptr[i] = a_[i + (_Tpvec::nlanes / 2)]; \ +} + +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint8x16, uchar) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int8x16, schar) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint16x8, ushort) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int16x8, short) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int32x4, int) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint64x2, uint64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int64x2, int64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float32x4, float) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double) + + +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); } + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); } + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \ + return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ +} + +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_uint32x4, unsigned, v128_t, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_int32x4, int, v128_t, i32x4, i32x4) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_float32x4, float, v128_t, f32x4, f32x4) + +// To do: Optimize v_reduce_sum with wasm intrin. +// Now use fallback implementation as there is no widening op in wasm intrin. + +#define OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(_Tpvec, scalartype) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + scalartype c = a_[0]; \ + for (int i = 1; i < _Tpvec::nlanes; i++) \ + c += a_[i]; \ + return c; \ +} + +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint8x16, unsigned) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int8x16, int) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint16x8, unsigned) +OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int) + + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + regtype val = a.val; \ + val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ + return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ +} +OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_uint64x2, uint64, v128_t, i64x2, i64x2) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_int64x2, int64, v128_t, i64x2, i64x2) +OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_float64x2, double, v128_t, f64x2,f64x2) + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + v128_t ac = wasm_f32x4_add(wasm_unpacklo_i32x4(a.val, c.val), wasm_unpackhi_i32x4(a.val, c.val)); + v128_t bd = wasm_f32x4_add(wasm_unpacklo_i32x4(b.val, d.val), wasm_unpackhi_i32x4(b.val, d.val)); + return v_float32x4(wasm_f32x4_add(wasm_unpacklo_i32x4(ac, bd), wasm_unpackhi_i32x4(ac, bd))); +} + +#define OPENCV_HAL_IMPL_WASM_REDUCE_OP(_Tpvec, scalartype, func, scalar_func) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + scalartype buf[_Tpvec::nlanes]; \ + v_store(buf, a); \ + scalartype tmp = buf[0]; \ + for (int i=1; i<_Tpvec::nlanes; ++i) { \ + tmp = scalar_func(tmp, buf[i]); \ + } \ + return tmp; \ +} + +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, min, std::min) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, max, std::max) +OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, min, std::min) + +inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b) +{ + v_uint16x8 l16, h16; + v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32; + v_expand(v_absdiff(a, b), l16, h16); + v_expand(l16, l16_l32, l16_h32); + v_expand(h16, h16_l32, h16_h32); + return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32); +} +inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b) +{ + v_uint16x8 l16, h16; + v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32; + v_expand(v_absdiff(a, b), l16, h16); + v_expand(l16, l16_l32, l16_h32); + v_expand(h16, h16_l32, h16_h32); + return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32); +} +inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b) +{ + v_uint32x4 l, h; + v_expand(v_absdiff(a, b), l, h); + return v_reduce_sum(l + h); +} +inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} +inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) +{ + return v_reduce_sum(v_absdiff(a, b)); +} + +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ + v128_t m1 = wasm_i32x4_splat(0x55555555); + v128_t m2 = wasm_i32x4_splat(0x33333333); + v128_t m4 = wasm_i32x4_splat(0x0f0f0f0f); + v128_t p = a.val; + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 1), m1), wasm_v128_and(p, m1)); + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 2), m2), wasm_v128_and(p, m2)); + p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 4), m4), wasm_v128_and(p, m4)); + return v_uint8x16(p); +} +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff); +} +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff); +} +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ + uint64 a_[2], b_[2] = { 0 }; + wasm_v128_store(a_, a.val); + for (int i = 0; i < 16; i++) + b_[i / 8] += popCountTable[((uint8_t*)a_)[i]]; + return v_uint64x2(wasm_v128_load(b_)); +} +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } + +#define OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(_Tpvec, suffix, scalarType) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + int mask = 0; \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + mask |= (reinterpret_int(a_[i]) < 0) << i; \ + return mask; \ +} \ +inline bool v_check_all(const _Tpvec& a) \ +{ return wasm_i8x16_all_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0))); } \ +inline bool v_check_any(const _Tpvec& a) \ +{ return wasm_i8x16_any_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0)));; } + +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint8x16, i8x16, schar) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int8x16, i8x16, schar) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint16x8, i16x8, short) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int16x8, i16x8, short) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint32x4, i32x4, int) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int32x4, i32x4, int) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float32x4, i32x4, float) +OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float64x2, f64x2, double) + +#define OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(_Tpvec, suffix, esuffix) \ +inline bool v_check_all(const _Tpvec& a) \ +{ \ + v128_t masked = v_reinterpret_as_##esuffix(a).val; \ + masked = wasm_i32x4_replace_lane(masked, 0, 0xffffffff); \ + masked = wasm_i32x4_replace_lane(masked, 2, 0xffffffff); \ + return wasm_i8x16_all_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \ +} \ +inline bool v_check_any(const _Tpvec& a) \ +{ \ + v128_t masked = v_reinterpret_as_##esuffix(a).val; \ + masked = wasm_i32x4_replace_lane(masked, 0, 0x0); \ + masked = wasm_i32x4_replace_lane(masked, 2, 0x0); \ + return wasm_i8x16_any_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \ +} \ + +OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(v_int64x2, i32x4, s32) +OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(v_uint64x2, i32x4, u32) + + +inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } +inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } +inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; } +inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } +inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } + +#define OPENCV_HAL_IMPL_WASM_SELECT(_Tpvec) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask.val)); \ +} + +OPENCV_HAL_IMPL_WASM_SELECT(v_uint8x16) +OPENCV_HAL_IMPL_WASM_SELECT(v_int8x16) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint16x8) +OPENCV_HAL_IMPL_WASM_SELECT(v_int16x8) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_int32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_uint64x2) +OPENCV_HAL_IMPL_WASM_SELECT(v_int64x2) +OPENCV_HAL_IMPL_WASM_SELECT(v_float32x4) +OPENCV_HAL_IMPL_WASM_SELECT(v_float64x2) + +#define OPENCV_HAL_IMPL_WASM_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = intrin(a.val); \ + b1.val = __CV_CAT(intrin, _high)(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ return _Tpwvec(intrin(a.val)); } \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + v128_t a = wasm_v128_load(ptr); \ + return _Tpwvec(intrin(a)); \ +} + +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint8x16, v_uint16x8, uchar, v128_cvtu8x16_i16x8) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int8x16, v_int16x8, schar, v128_cvti8x16_i16x8) +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint16x8, v_uint32x4, ushort, v128_cvtu16x8_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int16x8, v_int32x4, short, v128_cvti16x8_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND(v_uint32x4, v_uint64x2, unsigned, v128_cvtu32x4_i64x2) +OPENCV_HAL_IMPL_WASM_EXPAND(v_int32x4, v_int64x2, int, v128_cvti32x4_i64x2) + +#define OPENCV_HAL_IMPL_WASM_EXPAND_Q(_Tpvec, _Tp, intrin) \ +inline _Tpvec v_load_expand_q(const _Tp* ptr) \ +{ \ + v128_t a = wasm_v128_load(ptr); \ + return _Tpvec(intrin(a)); \ +} + +OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_uint32x4, uchar, v128_cvtu8x16_i32x4) +OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_int32x4, schar, v128_cvti8x16_i32x4) + +#define OPENCV_HAL_IMPL_WASM_UNPACKS(_Tpvec, suffix) \ +inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \ +{ \ + b0.val = wasm_unpacklo_##suffix(a0.val, a1.val); \ + b1.val = wasm_unpackhi_##suffix(a0.val, a1.val); \ +} \ +inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_unpacklo_i64x2(a.val, b.val)); \ +} \ +inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(wasm_unpackhi_i64x2(a.val, b.val)); \ +} \ +inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \ +{ \ + c.val = wasm_unpacklo_i64x2(a.val, b.val); \ + d.val = wasm_unpackhi_i64x2(a.val, b.val); \ +} + +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint8x16, i8x16) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int8x16, i8x16) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint16x8, i16x8) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int16x8, i16x8) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_int32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_float32x4, i32x4) +OPENCV_HAL_IMPL_WASM_UNPACKS(v_float64x2, i64x2) + +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ + return v_rotate_right(a, b); +} + +inline v_int32x4 v_round(const v_float32x4& a) +{ + v128_t h = wasm_f32x4_splat(0.5); + return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(wasm_f32x4_add(a.val, h))); +} + +inline v_int32x4 v_floor(const v_float32x4& a) +{ + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_lt(a.val, wasm_f32x4_convert_i32x4(a1)); + return v_int32x4(wasm_i32x4_add(a1, mask)); +} + +inline v_int32x4 v_ceil(const v_float32x4& a) +{ + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_gt(a.val, wasm_f32x4_convert_i32x4(a1)); + return v_int32x4(wasm_i32x4_sub(a1, mask)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); } + +#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc) \ +inline v_int32x4 func(const v_float64x2& a) \ +{ \ + double a_[2]; \ + wasm_v128_store(a_, a.val); \ + int c_[4]; \ + c_[0] = cfunc(a_[0]); \ + c_[1] = cfunc(a_[1]); \ + c_[2] = 0; \ + c_[3] = 0; \ + return v_int32x4(wasm_v128_load(c_)); \ +} + +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int) + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + double a_[2], b_[2]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + int c_[4]; + c_[0] = cvRound(a_[0]); + c_[1] = cvRound(a_[1]); + c_[2] = cvRound(b_[0]); + c_[3] = cvRound(b_[1]); + return v_int32x4(wasm_v128_load(c_)); +} + +#define OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(_Tpvec, suffix) \ +inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \ + const _Tpvec& a2, const _Tpvec& a3, \ + _Tpvec& b0, _Tpvec& b1, \ + _Tpvec& b2, _Tpvec& b3) \ +{ \ + v128_t t0 = wasm_unpacklo_##suffix(a0.val, a1.val); \ + v128_t t1 = wasm_unpacklo_##suffix(a2.val, a3.val); \ + v128_t t2 = wasm_unpackhi_##suffix(a0.val, a1.val); \ + v128_t t3 = wasm_unpackhi_##suffix(a2.val, a3.val); \ +\ + b0.val = wasm_unpacklo_i64x2(t0, t1); \ + b1.val = wasm_unpackhi_i64x2(t0, t1); \ + b2.val = wasm_unpacklo_i64x2(t2, t3); \ + b3.val = wasm_unpackhi_i64x2(t2, t3); \ +} + +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_uint32x4, i32x4) +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_int32x4, i32x4) +OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_float32x4, i32x4) + +// load deinterleave +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) +{ + v128_t t00 = wasm_v128_load(ptr); + v128_t t01 = wasm_v128_load(ptr + 16); + + a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30); + b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) +{ + v128_t t00 = wasm_v128_load(ptr); + v128_t t01 = wasm_v128_load(ptr + 16); + v128_t t02 = wasm_v128_load(ptr + 32); + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31); +} + +inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) +{ + v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ... + v128_t u1 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... + v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ... + v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ... + + v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 a2 b2 a3 b3 + v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7 + b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7 +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 b1 c1 a2 b2 + v128_t t01 = wasm_v128_load(ptr + 8); // c2 a3 b3 c3 a4 b4 c4 a5 + v128_t t02 = wasm_v128_load(ptr + 16); // b5 c5 a6 b6 c6 a7 b7 c7 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31); +} + +inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) +{ + v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1 + v128_t u1 = wasm_v128_load(ptr + 8); // a2 b2 c2 d2 ... + v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... + v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ... + + v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3 + v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7 + v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3 + v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 + v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 + v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 + v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); +} + +inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) +{ + v_uint32x4 s0(wasm_v128_load(ptr)); // a0 b0 c0 d0 + v_uint32x4 s1(wasm_v128_load(ptr + 4)); // a1 b1 c1 d1 + v_uint32x4 s2(wasm_v128_load(ptr + 8)); // a2 b2 c2 d2 + v_uint32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) +{ + v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 + v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3 + + a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) +{ + v128_t t00 = wasm_v128_load(ptr); // a0 b0 c0 a1 + v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 + v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 + + v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + + a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); +} + +inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) +{ + v_float32x4 s0(wasm_v128_load(ptr)); // a0 b0 c0 d0 + v_float32x4 s1(wasm_v128_load(ptr + 4)); // a1 b1 c1 d1 + v_float32x4 s2(wasm_v128_load(ptr + 8)); // a2 b2 c2 d2 + v_float32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3 + + v_transpose4x4(s0, s1, s2, s3, a, b, c, d); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b) +{ + v128_t t0 = wasm_v128_load(ptr); // a0 b0 + v128_t t1 = wasm_v128_load(ptr + 2); // a1 b1 + + a.val = wasm_unpacklo_i64x2(t0, t1); + b.val = wasm_unpackhi_i64x2(t0, t1); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c) +{ + v128_t t0 = wasm_v128_load(ptr); // a0, b0 + v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1 + v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1 + + a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); + b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23); + c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); +} + +inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, + v_uint64x2& b, v_uint64x2& c, v_uint64x2& d) +{ + v128_t t0 = wasm_v128_load(ptr); // a0 b0 + v128_t t1 = wasm_v128_load(ptr + 2); // c0 d0 + v128_t t2 = wasm_v128_load(ptr + 4); // a1 b1 + v128_t t3 = wasm_v128_load(ptr + 6); // c1 d1 + + a.val = wasm_unpacklo_i64x2(t0, t2); + b.val = wasm_unpackhi_i64x2(t0, t2); + c.val = wasm_unpacklo_i64x2(t1, t3); + d.val = wasm_unpackhi_i64x2(t1, t3); +} + +// store interleave + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i8x16(a.val, b.val); + v128_t v1 = wasm_unpackhi_i8x16(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 16, v1); +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 16, t11); + wasm_v128_store(ptr + 32, t12); +} + +inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, + const v_uint8x16& c, const v_uint8x16& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + v128_t u0 = wasm_unpacklo_i8x16(a.val, c.val); // a0 c0 a1 c1 ... + v128_t u1 = wasm_unpackhi_i8x16(a.val, c.val); // a8 c8 a9 c9 ... + v128_t u2 = wasm_unpacklo_i8x16(b.val, d.val); // b0 d0 b1 d1 ... + v128_t u3 = wasm_unpackhi_i8x16(b.val, d.val); // b8 d8 b9 d9 ... + + v128_t v0 = wasm_unpacklo_i8x16(u0, u2); // a0 b0 c0 d0 ... + v128_t v1 = wasm_unpackhi_i8x16(u0, u2); // a4 b4 c4 d4 ... + v128_t v2 = wasm_unpacklo_i8x16(u1, u3); // a8 b8 c8 d8 ... + v128_t v3 = wasm_unpackhi_i8x16(u1, u3); // a12 b12 c12 d12 ... + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 16, v1); + wasm_v128_store(ptr + 32, v2); + wasm_v128_store(ptr + 48, v3); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i16x8(a.val, b.val); + v128_t v1 = wasm_unpackhi_i16x8(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 8, v1); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, + const v_uint16x8& b, const v_uint16x8& c, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 8, t11); + wasm_v128_store(ptr + 16, t12); +} + +inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, + const v_uint16x8& c, const v_uint16x8& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + // a0 a1 a2 a3 .... + // b0 b1 b2 b3 .... + // c0 c1 c2 c3 .... + // d0 d1 d2 d3 .... + v128_t u0 = wasm_unpacklo_i16x8(a.val, c.val); // a0 c0 a1 c1 ... + v128_t u1 = wasm_unpackhi_i16x8(a.val, c.val); // a4 c4 a5 c5 ... + v128_t u2 = wasm_unpacklo_i16x8(b.val, d.val); // b0 d0 b1 d1 ... + v128_t u3 = wasm_unpackhi_i16x8(b.val, d.val); // b4 d4 b5 d5 ... + + v128_t v0 = wasm_unpacklo_i16x8(u0, u2); // a0 b0 c0 d0 ... + v128_t v1 = wasm_unpackhi_i16x8(u0, u2); // a2 b2 c2 d2 ... + v128_t v2 = wasm_unpacklo_i16x8(u1, u3); // a4 b4 c4 d4 ... + v128_t v3 = wasm_unpackhi_i16x8(u1, u3); // a6 b6 c6 d6 ... + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 8, v1); + wasm_v128_store(ptr + 16, v2); + wasm_v128_store(ptr + 24, v3); +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val); + v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 4, v1); +} + +inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 4, t11); + wasm_v128_store(ptr + 8, t12); +} + +inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v_uint32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + wasm_v128_store(ptr, v0.val); + wasm_v128_store(ptr + 4, v1.val); + wasm_v128_store(ptr + 8, v2.val); + wasm_v128_store(ptr + 12, v3.val); +} + +// 2-channel, float only +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val); + v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 4, v1); +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + + v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + + wasm_v128_store(ptr, t10); + wasm_v128_store(ptr + 4, t11); + wasm_v128_store(ptr + 8, t12); +} + +inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v_float32x4 v0, v1, v2, v3; + v_transpose4x4(a, b, c, d, v0, v1, v2, v3); + + wasm_v128_store(ptr, v0.val); + wasm_v128_store(ptr + 4, v1.val); + wasm_v128_store(ptr + 8, v2.val); + wasm_v128_store(ptr + 12, v3.val); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val); + v128_t v1 = wasm_unpackhi_i64x2(a.val, b.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); + wasm_v128_store(ptr + 4, v2); +} + +inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, + const v_uint64x2& c, const v_uint64x2& d, + hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) +{ + v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val); + v128_t v1 = wasm_unpacklo_i64x2(c.val, d.val); + v128_t v2 = wasm_unpackhi_i64x2(a.val, b.val); + v128_t v3 = wasm_unpackhi_i64x2(c.val, d.val); + + wasm_v128_store(ptr, v0); + wasm_v128_store(ptr + 2, v1); + wasm_v128_store(ptr + 4, v2); + wasm_v128_store(ptr + 6, v3); +} + +#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \ +{ \ + _Tpvec1 a1, b1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \ +{ \ + _Tpvec1 a1, b1, c1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ +} \ +inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \ +{ \ + _Tpvec1 a1, b1, c1, d1; \ + v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \ + a0 = v_reinterpret_as_##suffix0(a1); \ + b0 = v_reinterpret_as_##suffix0(b1); \ + c0 = v_reinterpret_as_##suffix0(c1); \ + d0 = v_reinterpret_as_##suffix0(d1); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \ +} \ +inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \ + const _Tpvec0& c0, const _Tpvec0& d0, \ + hal::StoreMode mode = hal::STORE_UNALIGNED ) \ +{ \ + _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \ + _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \ + _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \ + _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \ + v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \ +} + +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64) + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + return v_float32x4(wasm_f32x4_convert_i32x4(a.val)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + double a_[2]; + wasm_v128_store(a_, a.val); + float c_[4]; + c_[0] = (float)(a_[0]); + c_[1] = (float)(a_[1]); + c_[2] = 0; + c_[3] = 0; + return v_float32x4(wasm_v128_load(c_)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + double a_[2], b_[2]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + float c_[4]; + c_[0] = (float)(a_[0]); + c_[1] = (float)(a_[1]); + c_[2] = (float)(b_[0]); + c_[3] = (float)(b_[1]); + return v_float32x4(wasm_v128_load(c_)); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t p = v128_cvti32x4_i64x2(a.val); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); +#else + int a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return v_float64x2(wasm_v128_load(c_)); +#endif +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ +#ifdef __wasm_unimplemented_simd128__ + v128_t p = v128_cvti32x4_i64x2_high(a.val); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); +#else + int a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[2]); + c_[1] = (double)(a_[3]); + return v_float64x2(wasm_v128_load(c_)); +#endif +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + float a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return v_float64x2(wasm_v128_load(c_)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + float a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[2]); + c_[1] = (double)(a_[3]); + return v_float64x2(wasm_v128_load(c_)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ +#ifdef __wasm_unimplemented_simd128__ + return v_float64x2(wasm_f64x2_convert_i64x2(a.val)); +#else + int64 a_[2]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return v_float64x2(wasm_v128_load(c_)); +#endif +} + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]], + tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1], tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1], + tab[idx[4]], tab[idx[4]+1], tab[idx[5]], tab[idx[5]+1], tab[idx[6]], tab[idx[6]+1], tab[idx[7]], tab[idx[7]+1]); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3], tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3], + tab[idx[2]], tab[idx[2]+1], tab[idx[2]+2], tab[idx[2]+3], tab[idx[3]], tab[idx[3]+1], tab[idx[3]+2], tab[idx[3]+3]); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], + tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1], + tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1]); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3], + tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3]); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[1]], + tab[idx[2]], tab[idx[3]]); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + return v_int32x4(tab[idx[0]], tab[idx[0]+1], + tab[idx[1]], tab[idx[1]+1]); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + return v_int32x4(wasm_v128_load(tab + idx[0])); +} +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + return v_int64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx) +{ + return v_int64x2(wasm_v128_load(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); } +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); } + +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + return v_float64x2(tab[idx[0]], tab[idx[1]]); +} +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + return v_float64x2(wasm_v128_load(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + return v_int32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + return v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + return v_float64x2(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)]); +} + +// loads pairs from the table and deinterleaves them, e.g. returns: +// x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]), +// y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1]) +// note that the indices are float's indices, not the float-pair indices. +// in theory, this function can be used to implement bilinear interpolation, +// when idxvec are the offsets within the image. +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + x = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)]); + y = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 1)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 2)+1], + tab[wasm_i32x4_extract_lane(idxvec.val, 3)+1]); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + v128_t xy0 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 0)); + v128_t xy1 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 1)); + x.val = wasm_unpacklo_i64x2(xy0, xy1); + y.val = wasm_unpacklo_i64x2(xy0, xy1); +} + +inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15)); +} +inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } +inline v_int8x16 v_interleave_quads(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); +} +inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15)); +} +inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } +inline v_int16x8 v_interleave_quads(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15)); +} +inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) +{ + return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); +} +inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } +inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) +{ + return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); +} + +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } + +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } + +template +inline typename _Tp::lane_type v_extract_n(const _Tp& a) +{ + return v_rotate_right(a).get0(); +} + +template +inline v_uint32x4 v_broadcast_element(const v_uint32x4& a) +{ + return v_setall_u32(v_extract_n(a)); +} +template +inline v_int32x4 v_broadcast_element(const v_int32x4& a) +{ + return v_setall_s32(v_extract_n(a)); +} +template +inline v_float32x4 v_broadcast_element(const v_float32x4& a) +{ + return v_setall_f32(v_extract_n(a)); +} + + +////////////// FP16 support /////////////////////////// + +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + float a[4]; + for (int i = 0; i < 4; i++) + a[i] = ptr[i]; + return v_float32x4(wasm_v128_load(a)); +} + +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) +{ + double v_[4]; + wasm_v128_store(v_, v.val); + ptr[0] = float16_t(v_[0]); + ptr[1] = float16_t(v_[1]); + ptr[2] = float16_t(v_[2]); + ptr[3] = float16_t(v_[3]); +} + +inline void v_cleanup() {} + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + +//! @endcond + +} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/msa_macros.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/msa_macros.h new file mode 100644 index 0000000..fad8c5a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/msa_macros.h @@ -0,0 +1,1558 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_HAL_MSA_MACROS_H +#define OPENCV_CORE_HAL_MSA_MACROS_H + +#ifdef __mips_msa +#include "msa.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Define 64 bits vector types */ +typedef signed char v8i8 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned char v8u8 __attribute__ ((vector_size(8), aligned(8))); +typedef short v4i16 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned short v4u16 __attribute__ ((vector_size(8), aligned(8))); +typedef int v2i32 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned int v2u32 __attribute__ ((vector_size(8), aligned(8))); +typedef long long v1i64 __attribute__ ((vector_size(8), aligned(8))); +typedef unsigned long long v1u64 __attribute__ ((vector_size(8), aligned(8))); +typedef float v2f32 __attribute__ ((vector_size(8), aligned(8))); +typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); + + +/* Load values from the given memory a 64-bit vector. */ +#define msa_ld1_s8(__a) (*((v8i8*)(__a))) +#define msa_ld1_s16(__a) (*((v4i16*)(__a))) +#define msa_ld1_s32(__a) (*((v2i32*)(__a))) +#define msa_ld1_s64(__a) (*((v1i64*)(__a))) +#define msa_ld1_u8(__a) (*((v8u8*)(__a))) +#define msa_ld1_u16(__a) (*((v4u16*)(__a))) +#define msa_ld1_u32(__a) (*((v2u32*)(__a))) +#define msa_ld1_u64(__a) (*((v1u64*)(__a))) +#define msa_ld1_f32(__a) (*((v2f32*)(__a))) +#define msa_ld1_f64(__a) (*((v1f64*)(__a))) + +/* Load values from the given memory address to a 128-bit vector */ +#define msa_ld1q_s8(__a) ((v16i8)__builtin_msa_ld_b(__a, 0)) +#define msa_ld1q_s16(__a) ((v8i16)__builtin_msa_ld_h(__a, 0)) +#define msa_ld1q_s32(__a) ((v4i32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_s64(__a) ((v2i64)__builtin_msa_ld_d(__a, 0)) +#define msa_ld1q_u8(__a) ((v16u8)__builtin_msa_ld_b(__a, 0)) +#define msa_ld1q_u16(__a) ((v8u16)__builtin_msa_ld_h(__a, 0)) +#define msa_ld1q_u32(__a) ((v4u32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_u64(__a) ((v2u64)__builtin_msa_ld_d(__a, 0)) +#define msa_ld1q_f32(__a) ((v4f32)__builtin_msa_ld_w(__a, 0)) +#define msa_ld1q_f64(__a) ((v2f64)__builtin_msa_ld_d(__a, 0)) + +/* Store 64bits vector elements values to the given memory address. */ +#define msa_st1_s8(__a, __b) (*((v8i8*)(__a)) = __b) +#define msa_st1_s16(__a, __b) (*((v4i16*)(__a)) = __b) +#define msa_st1_s32(__a, __b) (*((v2i32*)(__a)) = __b) +#define msa_st1_s64(__a, __b) (*((v1i64*)(__a)) = __b) +#define msa_st1_u8(__a, __b) (*((v8u8*)(__a)) = __b) +#define msa_st1_u16(__a, __b) (*((v4u16*)(__a)) = __b) +#define msa_st1_u32(__a, __b) (*((v2u32*)(__a)) = __b) +#define msa_st1_u64(__a, __b) (*((v1u64*)(__a)) = __b) +#define msa_st1_f32(__a, __b) (*((v2f32*)(__a)) = __b) +#define msa_st1_f64(__a, __b) (*((v1f64*)(__a)) = __b) + +/* Store the values of elements in the 128 bits vector __a to the given memory address __a. */ +#define msa_st1q_s8(__a, __b) (__builtin_msa_st_b((v16i8)(__b), __a, 0)) +#define msa_st1q_s16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0)) +#define msa_st1q_s32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_s64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) +#define msa_st1q_u8(__a, __b) (__builtin_msa_st_b((v16i8)(__b), __a, 0)) +#define msa_st1q_u16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0)) +#define msa_st1q_u32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_u64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) +#define msa_st1q_f32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0)) +#define msa_st1q_f64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0)) + +/* Store the value of the element with the index __c in vector __a to the given memory address __a. */ +#define msa_st1_lane_s8(__a, __b, __c) (*((int8_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s16(__a, __b, __c) (*((int16_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s32(__a, __b, __c) (*((int32_t*)(__a)) = __b[__c]) +#define msa_st1_lane_s64(__a, __b, __c) (*((int64_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u8(__a, __b, __c) (*((uint8_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u16(__a, __b, __c) (*((uint16_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u32(__a, __b, __c) (*((uint32_t*)(__a)) = __b[__c]) +#define msa_st1_lane_u64(__a, __b, __c) (*((uint64_t*)(__a)) = __b[__c]) +#define msa_st1_lane_f32(__a, __b, __c) (*((float*)(__a)) = __b[__c]) +#define msa_st1_lane_f64(__a, __b, __c) (*((double*)(__a)) = __b[__c]) +#define msa_st1q_lane_s8(__a, __b, __c) (*((int8_t*)(__a)) = (int8_t)__builtin_msa_copy_s_b(__b, __c)) +#define msa_st1q_lane_s16(__a, __b, __c) (*((int16_t*)(__a)) = (int16_t)__builtin_msa_copy_s_h(__b, __c)) +#define msa_st1q_lane_s32(__a, __b, __c) (*((int32_t*)(__a)) = __builtin_msa_copy_s_w(__b, __c)) +#define msa_st1q_lane_s64(__a, __b, __c) (*((int64_t*)(__a)) = __builtin_msa_copy_s_d(__b, __c)) +#define msa_st1q_lane_u8(__a, __b, __c) (*((uint8_t*)(__a)) = (uint8_t)__builtin_msa_copy_u_b((v16i8)(__b), __c)) +#define msa_st1q_lane_u16(__a, __b, __c) (*((uint16_t*)(__a)) = (uint16_t)__builtin_msa_copy_u_h((v8i16)(__b), __c)) +#define msa_st1q_lane_u32(__a, __b, __c) (*((uint32_t*)(__a)) = __builtin_msa_copy_u_w((v4i32)(__b), __c)) +#define msa_st1q_lane_u64(__a, __b, __c) (*((uint64_t*)(__a)) = __builtin_msa_copy_u_d((v2i64)(__b), __c)) +#define msa_st1q_lane_f32(__a, __b, __c) (*((float*)(__a)) = __b[__c]) +#define msa_st1q_lane_f64(__a, __b, __c) (*((double*)(__a)) = __b[__c]) + +/* Duplicate elements for 64-bit doubleword vectors */ +#define msa_dup_n_s8(__a) ((v8i8)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0)) +#define msa_dup_n_s16(__a) ((v4i16)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0)) +#define msa_dup_n_s32(__a) ((v2i32){__a, __a}) +#define msa_dup_n_s64(__a) ((v1i64){__a}) +#define msa_dup_n_u8(__a) ((v8u8)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0)) +#define msa_dup_n_u16(__a) ((v4u16)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0)) +#define msa_dup_n_u32(__a) ((v2u32){__a, __a}) +#define msa_dup_n_u64(__a) ((v1u64){__a}) +#define msa_dup_n_f32(__a) ((v2f32){__a, __a}) +#define msa_dup_n_f64(__a) ((v1f64){__a}) + +/* Duplicate elements for 128-bit quadword vectors */ +#define msa_dupq_n_s8(__a) (__builtin_msa_fill_b((int32_t)(__a))) +#define msa_dupq_n_s16(__a) (__builtin_msa_fill_h((int32_t)(__a))) +#define msa_dupq_n_s32(__a) (__builtin_msa_fill_w((int32_t)(__a))) +#define msa_dupq_n_s64(__a) (__builtin_msa_fill_d((int64_t)(__a))) +#define msa_dupq_n_u8(__a) ((v16u8)__builtin_msa_fill_b((int32_t)(__a))) +#define msa_dupq_n_u16(__a) ((v8u16)__builtin_msa_fill_h((int32_t)(__a))) +#define msa_dupq_n_u32(__a) ((v4u32)__builtin_msa_fill_w((int32_t)(__a))) +#define msa_dupq_n_u64(__a) ((v2u64)__builtin_msa_fill_d((int64_t)(__a))) +#define msa_dupq_n_f32(__a) ((v4f32){__a, __a, __a, __a}) +#define msa_dupq_n_f64(__a) ((v2f64){__a, __a}) +#define msa_dupq_lane_s8(__a, __b) (__builtin_msa_splat_b(__a, __b)) +#define msa_dupq_lane_s16(__a, __b) (__builtin_msa_splat_h(__a, __b)) +#define msa_dupq_lane_s32(__a, __b) (__builtin_msa_splat_w(__a, __b)) +#define msa_dupq_lane_s64(__a, __b) (__builtin_msa_splat_d(__a, __b)) +#define msa_dupq_lane_u8(__a, __b) ((v16u8)__builtin_msa_splat_b((v16i8)(__a), __b)) +#define msa_dupq_lane_u16(__a, __b) ((v8u16)__builtin_msa_splat_h((v8i16)(__a), __b)) +#define msa_dupq_lane_u32(__a, __b) ((v4u32)__builtin_msa_splat_w((v4i32)(__a), __b)) +#define msa_dupq_lane_u64(__a, __b) ((v2u64)__builtin_msa_splat_d((v2i64)(__a), __b)) + +/* Create a 64 bits vector */ +#define msa_create_s8(__a) ((v8i8)((uint64_t)(__a))) +#define msa_create_s16(__a) ((v4i16)((uint64_t)(__a))) +#define msa_create_s32(__a) ((v2i32)((uint64_t)(__a))) +#define msa_create_s64(__a) ((v1i64)((uint64_t)(__a))) +#define msa_create_u8(__a) ((v8u8)((uint64_t)(__a))) +#define msa_create_u16(__a) ((v4u16)((uint64_t)(__a))) +#define msa_create_u32(__a) ((v2u32)((uint64_t)(__a))) +#define msa_create_u64(__a) ((v1u64)((uint64_t)(__a))) +#define msa_create_f32(__a) ((v2f32)((uint64_t)(__a))) +#define msa_create_f64(__a) ((v1f64)((uint64_t)(__a))) + +/* Sign extends or zero extends each element in a 64 bits vector to twice its original length, and places the results in a 128 bits vector. */ +/*Transform v8i8 to v8i16*/ +#define msa_movl_s8(__a) \ +((v8i16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \ + (__a)[4], (__a)[5], (__a)[6], (__a)[7]}) + +/*Transform v8u8 to v8u16*/ +#define msa_movl_u8(__a) \ +((v8u16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \ + (__a)[4], (__a)[5], (__a)[6], (__a)[7]}) + +/*Transform v4i16 to v8i16*/ +#define msa_movl_s16(__a) ((v4i32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]}) + +/*Transform v2i32 to v4i32*/ +#define msa_movl_s32(__a) ((v2i64){(__a)[0], (__a)[1]}) + +/*Transform v4u16 to v8u16*/ +#define msa_movl_u16(__a) ((v4u32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]}) + +/*Transform v2u32 to v4u32*/ +#define msa_movl_u32(__a) ((v2u64){(__a)[0], (__a)[1]}) + +/* Copies the least significant half of each element of a 128 bits vector into the corresponding elements of a 64 bits vector. */ +#define msa_movn_s16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_s32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_s64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_movn_u64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* qmovn */ +#define msa_qmovn_s16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_s32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_s64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u16(__a) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u32(__a) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_qmovn_u64(__a) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* qmovun */ +#define msa_qmovun_s16(__a) \ +({ \ + v8i16 __d = __builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qmovun_s32(__a) \ +({ \ + v4i32 __d = __builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qmovun_s64(__a) \ +({ \ + v2i64 __d = __builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */ +#define msa_shrn_n_s16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__b))); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_s32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__b))); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_s64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__b))); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__b))); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__b))); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_shrn_n_u64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__b))); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */ +#define msa_rshrn_n_s16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)__b)); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_s32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)__b)); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_s64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)__b)); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u16(__a, __b) \ +({ \ + v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)__b)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u32(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)__b)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +#define msa_rshrn_n_u64(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)__b)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector. */ +#define msa_qrshrn_n_s16(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__b)), 7); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \ + (v8i8)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_s32(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__b)), 15); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \ + (v4i16)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_s64(__a, __b) \ +({ \ + v2i64 __d = __builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__b)), 31); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \ + (v2i32)__builtin_msa_copy_s_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u16(__a, __b) \ +({ \ + v8u16 __d = __builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__b)), 7); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u32(__a, __b) \ +({ \ + v4u32 __d = __builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__b)), 15); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrn_n_u64(__a, __b) \ +({ \ + v2u64 __d = __builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__b)), 31); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector. + Input is signed and output is unsigned. */ +#define msa_qrshrun_n_s16(__a, __b) \ +({ \ + v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__b)); \ + v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ + (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrun_n_s32(__a, __b) \ +({ \ + v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__b)); \ + v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ + (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +#define msa_qrshrun_n_s64(__a, __b) \ +({ \ + v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__b)); \ + v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ + (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \ +}) + +/* pack */ +#define msa_pack_s16(__a, __b) (__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a))) +#define msa_pack_s32(__a, __b) (__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a))) +#define msa_pack_s64(__a, __b) (__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a))) +#define msa_pack_u16(__a, __b) ((v16u8)__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a))) +#define msa_pack_u32(__a, __b) ((v8u16)__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a))) +#define msa_pack_u64(__a, __b) ((v4u32)__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a))) + +/* qpack */ +#define msa_qpack_s16(__a, __b) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h((v8i16)(__b), 7), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7))) +#define msa_qpack_s32(__a, __b) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w((v4i32)(__b), 15), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15))) +#define msa_qpack_s64(__a, __b) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d((v2i64)(__b), 31), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31))) +#define msa_qpack_u16(__a, __b) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__b), 7), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7))) +#define msa_qpack_u32(__a, __b) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__b), 15), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15))) +#define msa_qpack_u64(__a, __b) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__b), 31), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31))) + +/* qpacku */ +#define msa_qpacku_s16(__a, __b) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b))), 7), \ + (v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a))), 7))) +#define msa_qpacku_s32(__a, __b) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b))), 15), \ + (v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a))), 15))) +#define msa_qpacku_s64(__a, __b) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b))), 31), \ + (v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a))), 31))) + +/* packr */ +#define msa_packr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_srai_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__c)))) +#define msa_packr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_srai_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__c)))) +#define msa_packr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_srai_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__c)))) +#define msa_packr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srli_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__c)))) +#define msa_packr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srli_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__c)))) +#define msa_packr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srli_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__c)))) + +/* rpackr */ +#define msa_rpackr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)(__c)))) +#define msa_rpackr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)(__c)))) +#define msa_rpackr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)(__c)))) +#define msa_rpackr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c)))) +#define msa_rpackr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c)))) +#define msa_rpackr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c)))) + +/* qrpackr */ +#define msa_qrpackr_s16(__a, __b, __c) \ +(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), 7), \ + (v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__c)), 7))) +#define msa_qrpackr_s32(__a, __b, __c) \ +(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), 15), \ + (v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__c)), 15))) +#define msa_qrpackr_s64(__a, __b, __c) \ +(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), 31), \ + (v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__c)), 31))) +#define msa_qrpackr_u16(__a, __b, __c) \ +((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), 7), \ + (v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c)), 7))) +#define msa_qrpackr_u32(__a, __b, __c) \ +((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), 15), \ + (v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c)), 15))) +#define msa_qrpackr_u64(__a, __b, __c) \ +((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), 31), \ + (v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c)), 31))) + +/* qrpackru */ +#define msa_qrpackru_s16(__a, __b, __c) \ +({ \ + v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__c)); \ + v8i16 __e = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b)), (int)(__c)); \ + (v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__e, 7), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \ +}) + +#define msa_qrpackru_s32(__a, __b, __c) \ +({ \ + v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__c)); \ + v4i32 __e = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b)), (int)(__c)); \ + (v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__e, 15), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \ +}) + +#define msa_qrpackru_s64(__a, __b, __c) \ +({ \ + v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__c)); \ + v2i64 __e = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b)), (int)(__c)); \ + (v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__e, 31), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \ +}) + +/* Minimum values between corresponding elements in the two vectors are written to the returned vector. */ +#define msa_minq_s8(__a, __b) (__builtin_msa_min_s_b(__a, __b)) +#define msa_minq_s16(__a, __b) (__builtin_msa_min_s_h(__a, __b)) +#define msa_minq_s32(__a, __b) (__builtin_msa_min_s_w(__a, __b)) +#define msa_minq_s64(__a, __b) (__builtin_msa_min_s_d(__a, __b)) +#define msa_minq_u8(__a, __b) ((v16u8)__builtin_msa_min_u_b(__a, __b)) +#define msa_minq_u16(__a, __b) ((v8u16)__builtin_msa_min_u_h(__a, __b)) +#define msa_minq_u32(__a, __b) ((v4u32)__builtin_msa_min_u_w(__a, __b)) +#define msa_minq_u64(__a, __b) ((v2u64)__builtin_msa_min_u_d(__a, __b)) +#define msa_minq_f32(__a, __b) (__builtin_msa_fmin_w(__a, __b)) +#define msa_minq_f64(__a, __b) (__builtin_msa_fmin_d(__a, __b)) + +/* Maximum values between corresponding elements in the two vectors are written to the returned vector. */ +#define msa_maxq_s8(__a, __b) (__builtin_msa_max_s_b(__a, __b)) +#define msa_maxq_s16(__a, __b) (__builtin_msa_max_s_h(__a, __b)) +#define msa_maxq_s32(__a, __b) (__builtin_msa_max_s_w(__a, __b)) +#define msa_maxq_s64(__a, __b) (__builtin_msa_max_s_d(__a, __b)) +#define msa_maxq_u8(__a, __b) ((v16u8)__builtin_msa_max_u_b(__a, __b)) +#define msa_maxq_u16(__a, __b) ((v8u16)__builtin_msa_max_u_h(__a, __b)) +#define msa_maxq_u32(__a, __b) ((v4u32)__builtin_msa_max_u_w(__a, __b)) +#define msa_maxq_u64(__a, __b) ((v2u64)__builtin_msa_max_u_d(__a, __b)) +#define msa_maxq_f32(__a, __b) (__builtin_msa_fmax_w(__a, __b)) +#define msa_maxq_f64(__a, __b) (__builtin_msa_fmax_d(__a, __b)) + +/* Vector type reinterpretion */ +#define MSA_TPV_REINTERPRET(_Tpv, Vec) ((_Tpv)(Vec)) + +/* Add the odd elements in vector __a with the even elements in vector __b to double width elements in the returned vector. */ +/* v8i16 msa_hadd_s16 ((v16i8)__a, (v16i8)__b) */ +#define msa_hadd_s16(__a, __b) (__builtin_msa_hadd_s_h((v16i8)(__a), (v16i8)(__b))) +/* v4i32 msa_hadd_s32 ((v8i16)__a, (v8i16)__b) */ +#define msa_hadd_s32(__a, __b) (__builtin_msa_hadd_s_w((v8i16)(__a), (v8i16)(__b))) +/* v2i64 msa_hadd_s64 ((v4i32)__a, (v4i32)__b) */ +#define msa_hadd_s64(__a, __b) (__builtin_msa_hadd_s_d((v4i32)(__a), (v4i32)(__b))) + +/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */ +#define msa_pckev_s8(__a, __b) (__builtin_msa_pckev_b((v16i8)(__a), (v16i8)(__b))) +#define msa_pckev_s16(__a, __b) (__builtin_msa_pckev_h((v8i16)(__a), (v8i16)(__b))) +#define msa_pckev_s32(__a, __b) (__builtin_msa_pckev_w((v4i32)(__a), (v4i32)(__b))) +#define msa_pckev_s64(__a, __b) (__builtin_msa_pckev_d((v2i64)(__a), (v2i64)(__b))) + +/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */ +#define msa_pckod_s8(__a, __b) (__builtin_msa_pckod_b((v16i8)(__a), (v16i8)(__b))) +#define msa_pckod_s16(__a, __b) (__builtin_msa_pckod_h((v8i16)(__a), (v8i16)(__b))) +#define msa_pckod_s32(__a, __b) (__builtin_msa_pckod_w((v4i32)(__a), (v4i32)(__b))) +#define msa_pckod_s64(__a, __b) (__builtin_msa_pckod_d((v2i64)(__a), (v2i64)(__b))) + +#ifdef _MIPSEB +#define LANE_IMM0_1(x) (0b1 - ((x) & 0b1)) +#define LANE_IMM0_3(x) (0b11 - ((x) & 0b11)) +#define LANE_IMM0_7(x) (0b111 - ((x) & 0b111)) +#define LANE_IMM0_15(x) (0b1111 - ((x) & 0b1111)) +#else +#define LANE_IMM0_1(x) ((x) & 0b1) +#define LANE_IMM0_3(x) ((x) & 0b11) +#define LANE_IMM0_7(x) ((x) & 0b111) +#define LANE_IMM0_15(x) ((x) & 0b1111) +#endif + +#define msa_get_lane_u8(__a, __b) ((uint8_t)(__a)[LANE_IMM0_7(__b)]) +#define msa_get_lane_s8(__a, __b) ((int8_t)(__a)[LANE_IMM0_7(__b)]) +#define msa_get_lane_u16(__a, __b) ((uint16_t)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_s16(__a, __b) ((int16_t)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_u32(__a, __b) ((uint32_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_s32(__a, __b) ((int32_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_f32(__a, __b) ((float)(__a)[LANE_IMM0_3(__b)]) +#define msa_get_lane_s64(__a, __b) ((int64_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_u64(__a, __b) ((uint64_t)(__a)[LANE_IMM0_1(__b)]) +#define msa_get_lane_f64(__a, __b) ((double)(__a)[LANE_IMM0_1(__b)]) +#define msa_getq_lane_u8(__a, imm0_15) ((uint8_t)__builtin_msa_copy_u_b((v16i8)(__a), imm0_15)) +#define msa_getq_lane_s8(__a, imm0_15) ((int8_t)__builtin_msa_copy_s_b(__a, imm0_15)) +#define msa_getq_lane_u16(__a, imm0_7) ((uint16_t)__builtin_msa_copy_u_h((v8i16)(__a), imm0_7)) +#define msa_getq_lane_s16(__a, imm0_7) ((int16_t)__builtin_msa_copy_s_h(__a, imm0_7)) +#define msa_getq_lane_u32(__a, imm0_3) __builtin_msa_copy_u_w((v4i32)(__a), imm0_3) +#define msa_getq_lane_s32 __builtin_msa_copy_s_w +#define msa_getq_lane_f32(__a, __b) ((float)(__a)[LANE_IMM0_3(__b)]) +#define msa_getq_lane_f64(__a, __b) ((double)(__a)[LANE_IMM0_1(__b)]) +#if (__mips == 64) +#define msa_getq_lane_u64(__a, imm0_1) __builtin_msa_copy_u_d((v2i64)(__a), imm0_1) +#define msa_getq_lane_s64 __builtin_msa_copy_s_d +#else +#define msa_getq_lane_u64(__a, imm0_1) ((uint64_t)(__a)[LANE_IMM0_1(imm0_1)]) +#define msa_getq_lane_s64(__a, imm0_1) ((int64_t)(__a)[LANE_IMM0_1(imm0_1)]) +#endif + +/* combine */ +#if (__mips == 64) +#define __COMBINE_64_64(__TYPE, a, b) ((__TYPE)((v2u64){((v1u64)(a))[0], ((v1u64)(b))[0]})) +#else +#define __COMBINE_64_64(__TYPE, a, b) ((__TYPE)((v4u32){((v2u32)(a))[0], ((v2u32)(a))[1], \ + ((v2u32)(b))[0], ((v2u32)(b))[1]})) +#endif + +/* v16i8 msa_combine_s8 (v8i8 __a, v8i8 __b) */ +#define msa_combine_s8(__a, __b) __COMBINE_64_64(v16i8, __a, __b) + +/* v8i16 msa_combine_s16(v4i16 __a, v4i16 __b) */ +#define msa_combine_s16(__a, __b) __COMBINE_64_64(v8i16, __a, __b) + +/* v4i32 msa_combine_s32(v2i32 __a, v2i32 __b) */ +#define msa_combine_s32(__a, __b) __COMBINE_64_64(v4i32, __a, __b) + +/* v2i64 msa_combine_s64(v1i64 __a, v1i64 __b) */ +#define msa_combine_s64(__a, __b) __COMBINE_64_64(v2i64, __a, __b) + +/* v4f32 msa_combine_f32(v2f32 __a, v2f32 __b) */ +#define msa_combine_f32(__a, __b) __COMBINE_64_64(v4f32, __a, __b) + +/* v16u8 msa_combine_u8(v8u8 __a, v8u8 __b) */ +#define msa_combine_u8(__a, __b) __COMBINE_64_64(v16u8, __a, __b) + +/* v8u16 msa_combine_u16(v4u16 __a, v4u16 __b) */ +#define msa_combine_u16(__a, __b) __COMBINE_64_64(v8u16, __a, __b) + +/* v4u32 msa_combine_u32(v2u32 __a, v2u32 __b) */ +#define msa_combine_u32(__a, __b) __COMBINE_64_64(v4u32, __a, __b) + +/* v2u64 msa_combine_u64(v1u64 __a, v1u64 __b) */ +#define msa_combine_u64(__a, __b) __COMBINE_64_64(v2u64, __a, __b) + +/* v2f64 msa_combine_f64(v1f64 __a, v1f64 __b) */ +#define msa_combine_f64(__a, __b) __COMBINE_64_64(v2f64, __a, __b) + +/* get_low, get_high */ +#if (__mips == 64) +#define __GET_LOW(__TYPE, a) ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 0)))) +#define __GET_HIGH(__TYPE, a) ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 1)))) +#else +#define __GET_LOW(__TYPE, a) ((__TYPE)(((v2u64)(a))[0])) +#define __GET_HIGH(__TYPE, a) ((__TYPE)(((v2u64)(a))[1])) +#endif + +/* v8i8 msa_get_low_s8(v16i8 __a) */ +#define msa_get_low_s8(__a) __GET_LOW(v8i8, __a) + +/* v4i16 msa_get_low_s16(v8i16 __a) */ +#define msa_get_low_s16(__a) __GET_LOW(v4i16, __a) + +/* v2i32 msa_get_low_s32(v4i32 __a) */ +#define msa_get_low_s32(__a) __GET_LOW(v2i32, __a) + +/* v1i64 msa_get_low_s64(v2i64 __a) */ +#define msa_get_low_s64(__a) __GET_LOW(v1i64, __a) + +/* v8u8 msa_get_low_u8(v16u8 __a) */ +#define msa_get_low_u8(__a) __GET_LOW(v8u8, __a) + +/* v4u16 msa_get_low_u16(v8u16 __a) */ +#define msa_get_low_u16(__a) __GET_LOW(v4u16, __a) + +/* v2u32 msa_get_low_u32(v4u32 __a) */ +#define msa_get_low_u32(__a) __GET_LOW(v2u32, __a) + +/* v1u64 msa_get_low_u64(v2u64 __a) */ +#define msa_get_low_u64(__a) __GET_LOW(v1u64, __a) + +/* v2f32 msa_get_low_f32(v4f32 __a) */ +#define msa_get_low_f32(__a) __GET_LOW(v2f32, __a) + +/* v1f64 msa_get_low_f64(v2f64 __a) */ +#define msa_get_low_f64(__a) __GET_LOW(v1f64, __a) + +/* v8i8 msa_get_high_s8(v16i8 __a) */ +#define msa_get_high_s8(__a) __GET_HIGH(v8i8, __a) + +/* v4i16 msa_get_high_s16(v8i16 __a) */ +#define msa_get_high_s16(__a) __GET_HIGH(v4i16, __a) + +/* v2i32 msa_get_high_s32(v4i32 __a) */ +#define msa_get_high_s32(__a) __GET_HIGH(v2i32, __a) + +/* v1i64 msa_get_high_s64(v2i64 __a) */ +#define msa_get_high_s64(__a) __GET_HIGH(v1i64, __a) + +/* v8u8 msa_get_high_u8(v16u8 __a) */ +#define msa_get_high_u8(__a) __GET_HIGH(v8u8, __a) + +/* v4u16 msa_get_high_u16(v8u16 __a) */ +#define msa_get_high_u16(__a) __GET_HIGH(v4u16, __a) + +/* v2u32 msa_get_high_u32(v4u32 __a) */ +#define msa_get_high_u32(__a) __GET_HIGH(v2u32, __a) + +/* v1u64 msa_get_high_u64(v2u64 __a) */ +#define msa_get_high_u64(__a) __GET_HIGH(v1u64, __a) + +/* v2f32 msa_get_high_f32(v4f32 __a) */ +#define msa_get_high_f32(__a) __GET_HIGH(v2f32, __a) + +/* v1f64 msa_get_high_f64(v2f64 __a) */ +#define msa_get_high_f64(__a) __GET_HIGH(v1f64, __a) + +/* ri = ai * b[lane] */ +/* v4f32 msa_mulq_lane_f32(v4f32 __a, v4f32 __b, const int __lane) */ +#define msa_mulq_lane_f32(__a, __b, __lane) ((__a) * msa_getq_lane_f32(__b, __lane)) + +/* ri = ai + bi * c[lane] */ +/* v4f32 msa_mlaq_lane_f32(v4f32 __a, v4f32 __b, v4f32 __c, const int __lane) */ +#define msa_mlaq_lane_f32(__a, __b, __c, __lane) ((__a) + ((__b) * msa_getq_lane_f32(__c, __lane))) + +/* uint16_t msa_sum_u16(v8u16 __a)*/ +#define msa_sum_u16(__a) \ +({ \ + v4u32 _b; \ + v2u64 _c; \ + _b = __builtin_msa_hadd_u_w(__a, __a); \ + _c = __builtin_msa_hadd_u_d(_b, _b); \ + (uint16_t)(_c[0] + _c[1]); \ +}) + +/* int16_t msa_sum_s16(v8i16 __a) */ +#define msa_sum_s16(__a) \ +({ \ + v4i32 _b; \ + v2i64 _c; \ + _b = __builtin_msa_hadd_s_w(__a, __a); \ + _c = __builtin_msa_hadd_s_d(_b, _b); \ + (int32_t)(_c[0] + _c[1]); \ +}) + + +/* uint32_t msa_sum_u32(v4u32 __a)*/ +#define msa_sum_u32(__a) \ +({ \ + v2u64 _b; \ + _b = __builtin_msa_hadd_u_d(__a, __a); \ + (uint32_t)(_b[0] + _b[1]); \ +}) + +/* int32_t msa_sum_s32(v4i32 __a)*/ +#define msa_sum_s32(__a) \ +({ \ + v2i64 _b; \ + _b = __builtin_msa_hadd_s_d(__a, __a); \ + (int64_t)(_b[0] + _b[1]); \ +}) + +/* uint8_t msa_sum_u8(v16u8 __a)*/ +#define msa_sum_u8(__a) \ +({ \ + v8u16 _b16; \ + v4u32 _c32; \ + _b16 = __builtin_msa_hadd_u_h(__a, __a); \ + _c32 = __builtin_msa_hadd_u_w(_b16, _b16); \ + (uint8_t)msa_sum_u32(_c32); \ +}) + +/* int8_t msa_sum_s8(v16s8 __a)*/ +#define msa_sum_s8(__a) \ +({ \ + v8i16 _b16; \ + v4i32 _c32; \ + _b16 = __builtin_msa_hadd_s_h(__a, __a); \ + _c32 = __builtin_msa_hadd_s_w(_b16, _b16); \ + (int16_t)msa_sum_s32(_c32); \ +}) + +/* float msa_sum_f32(v4f32 __a)*/ +#define msa_sum_f32(__a) ((__a)[0] + (__a)[1] + (__a)[2] + (__a)[3]) + +/* v8u16 msa_paddlq_u8(v16u8 __a) */ +#define msa_paddlq_u8(__a) (__builtin_msa_hadd_u_h(__a, __a)) + +/* v8i16 msa_paddlq_s8(v16i8 __a) */ +#define msa_paddlq_s8(__a) (__builtin_msa_hadd_s_h(__a, __a)) + +/* v4u32 msa_paddlq_u16 (v8u16 __a)*/ +#define msa_paddlq_u16(__a) (__builtin_msa_hadd_u_w(__a, __a)) + +/* v4i32 msa_paddlq_s16 (v8i16 __a)*/ +#define msa_paddlq_s16(__a) (__builtin_msa_hadd_s_w(__a, __a)) + +/* v2u64 msa_paddlq_u32(v4u32 __a) */ +#define msa_paddlq_u32(__a) (__builtin_msa_hadd_u_d(__a, __a)) + +/* v2i64 msa_paddlq_s32(v4i32 __a) */ +#define msa_paddlq_s32(__a) (__builtin_msa_hadd_s_d(__a, __a)) + +#define V8U8_2_V8U16(x) {(uint16_t)x[0], (uint16_t)x[1], (uint16_t)x[2], (uint16_t)x[3], \ + (uint16_t)x[4], (uint16_t)x[5], (uint16_t)x[6], (uint16_t)x[7]} +#define V8U8_2_V8I16(x) {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \ + (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]} +#define V8I8_2_V8I16(x) {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \ + (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]} +#define V4U16_2_V4U32(x) {(uint32_t)x[0], (uint32_t)x[1], (uint32_t)x[2], (uint32_t)x[3]} +#define V4U16_2_V4I32(x) {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]} +#define V4I16_2_V4I32(x) {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]} +#define V2U32_2_V2U64(x) {(uint64_t)x[0], (uint64_t)x[1]} +#define V2U32_2_V2I64(x) {(int64_t)x[0], (int64_t)x[1]} + +/* v8u16 msa_mull_u8(v8u8 __a, v8u8 __b) */ +#define msa_mull_u8(__a, __b) ((v8u16)__builtin_msa_mulv_h((v8i16)V8U8_2_V8I16(__a), (v8i16)V8U8_2_V8I16(__b))) + +/* v8i16 msa_mull_s8(v8i8 __a, v8i8 __b)*/ +#define msa_mull_s8(__a, __b) (__builtin_msa_mulv_h((v8i16)V8I8_2_V8I16(__a), (v8i16)V8I8_2_V8I16(__b))) + +/* v4u32 msa_mull_u16(v4u16 __a, v4u16 __b) */ +#define msa_mull_u16(__a, __b) ((v4u32)__builtin_msa_mulv_w((v4i32)V4U16_2_V4I32(__a), (v4i32)V4U16_2_V4I32(__b))) + +/* v4i32 msa_mull_s16(v4i16 __a, v4i16 __b) */ +#define msa_mull_s16(__a, __b) (__builtin_msa_mulv_w((v4i32)V4I16_2_V4I32(__a), (v4i32)V4I16_2_V4I32(__b))) + +/* v2u64 msa_mull_u32(v2u32 __a, v2u32 __b) */ +#define msa_mull_u32(__a, __b) ((v2u64)__builtin_msa_mulv_d((v2i64)V2U32_2_V2I64(__a), (v2i64)V2U32_2_V2I64(__b))) + +/* bitwise and: __builtin_msa_and_v */ +#define msa_andq_u8(__a, __b) ((v16u8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s8(__a, __b) ((v16i8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u16(__a, __b) ((v8u16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s16(__a, __b) ((v8i16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u32(__a, __b) ((v4u32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s32(__a, __b) ((v4i32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_u64(__a, __b) ((v2u64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) +#define msa_andq_s64(__a, __b) ((v2i64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise or: __builtin_msa_or_v */ +#define msa_orrq_u8(__a, __b) ((v16u8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s8(__a, __b) ((v16i8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u16(__a, __b) ((v8u16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s16(__a, __b) ((v8i16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u32(__a, __b) ((v4u32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s32(__a, __b) ((v4i32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_u64(__a, __b) ((v2u64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) +#define msa_orrq_s64(__a, __b) ((v2i64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise xor: __builtin_msa_xor_v */ +#define msa_eorq_u8(__a, __b) ((v16u8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s8(__a, __b) ((v16i8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u16(__a, __b) ((v8u16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s16(__a, __b) ((v8i16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u32(__a, __b) ((v4u32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s32(__a, __b) ((v4i32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_u64(__a, __b) ((v2u64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) +#define msa_eorq_s64(__a, __b) ((v2i64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b))) + +/* bitwise not: v16u8 __builtin_msa_xori_b (v16u8, 0xff) */ +#define msa_mvnq_u8(__a) ((v16u8)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s8(__a) ((v16i8)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u16(__a) ((v8u16)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s16(__a) ((v8i16)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u32(__a) ((v4u32)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s32(__a) ((v4i32)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_u64(__a) ((v2u64)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) +#define msa_mvnq_s64(__a) ((v2i64)__builtin_msa_xori_b((v16u8)(__a), 0xFF)) + +/* compare equal: ceq -> ri = ai == bi ? 1...1:0...0 */ +#define msa_ceqq_u8(__a, __b) ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b))) +#define msa_ceqq_s8(__a, __b) ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b))) +#define msa_ceqq_u16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b))) +#define msa_ceqq_s16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b))) +#define msa_ceqq_u32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b))) +#define msa_ceqq_s32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b))) +#define msa_ceqq_f32(__a, __b) ((v4u32)__builtin_msa_fceq_w((v4f32)(__a), (v4f32)(__b))) +#define msa_ceqq_u64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b))) +#define msa_ceqq_s64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b))) +#define msa_ceqq_f64(__a, __b) ((v2u64)__builtin_msa_fceq_d((v2f64)(__a), (v2f64)(__b))) + +/* Compare less-than: clt -> ri = ai < bi ? 1...1:0...0 */ +#define msa_cltq_u8(__a, __b) ((v16u8)__builtin_msa_clt_u_b((v16u8)(__a), (v16u8)(__b))) +#define msa_cltq_s8(__a, __b) ((v16u8)__builtin_msa_clt_s_b((v16i8)(__a), (v16i8)(__b))) +#define msa_cltq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__a), (v8u16)(__b))) +#define msa_cltq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__a), (v8i16)(__b))) +#define msa_cltq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__a), (v4u32)(__b))) +#define msa_cltq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__a), (v4i32)(__b))) +#define msa_cltq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__a), (v4f32)(__b))) +#define msa_cltq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__a), (v2u64)(__b))) +#define msa_cltq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__a), (v2i64)(__b))) +#define msa_cltq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__a), (v2f64)(__b))) + +/* compare greater-than: cgt -> ri = ai > bi ? 1...1:0...0 */ +#define msa_cgtq_u8(__a, __b) ((v16u8)__builtin_msa_clt_u_b((v16u8)(__b), (v16u8)(__a))) +#define msa_cgtq_s8(__a, __b) ((v16u8)__builtin_msa_clt_s_b((v16i8)(__b), (v16i8)(__a))) +#define msa_cgtq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__b), (v8u16)(__a))) +#define msa_cgtq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__b), (v8i16)(__a))) +#define msa_cgtq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__b), (v4u32)(__a))) +#define msa_cgtq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__b), (v4i32)(__a))) +#define msa_cgtq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__b), (v4f32)(__a))) +#define msa_cgtq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__b), (v2u64)(__a))) +#define msa_cgtq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__b), (v2i64)(__a))) +#define msa_cgtq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__b), (v2f64)(__a))) + +/* compare less-equal: cle -> ri = ai <= bi ? 1...1:0...0 */ +#define msa_cleq_u8(__a, __b) ((v16u8)__builtin_msa_cle_u_b((v16u8)(__a), (v16u8)(__b))) +#define msa_cleq_s8(__a, __b) ((v16u8)__builtin_msa_cle_s_b((v16i8)(__a), (v16i8)(__b))) +#define msa_cleq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__a), (v8u16)(__b))) +#define msa_cleq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__a), (v8i16)(__b))) +#define msa_cleq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__a), (v4u32)(__b))) +#define msa_cleq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__a), (v4i32)(__b))) +#define msa_cleq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__a), (v4f32)(__b))) +#define msa_cleq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__a), (v2u64)(__b))) +#define msa_cleq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__a), (v2i64)(__b))) +#define msa_cleq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__a), (v2f64)(__b))) + +/* compare greater-equal: cge -> ri = ai >= bi ? 1...1:0...0 */ +#define msa_cgeq_u8(__a, __b) ((v16u8)__builtin_msa_cle_u_b((v16u8)(__b), (v16u8)(__a))) +#define msa_cgeq_s8(__a, __b) ((v16u8)__builtin_msa_cle_s_b((v16i8)(__b), (v16i8)(__a))) +#define msa_cgeq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__b), (v8u16)(__a))) +#define msa_cgeq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__b), (v8i16)(__a))) +#define msa_cgeq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__b), (v4u32)(__a))) +#define msa_cgeq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__b), (v4i32)(__a))) +#define msa_cgeq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__b), (v4f32)(__a))) +#define msa_cgeq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__b), (v2u64)(__a))) +#define msa_cgeq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__b), (v2i64)(__a))) +#define msa_cgeq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__b), (v2f64)(__a))) + +/* Shift Left Logical: shl -> ri = ai << bi; */ +#define msa_shlq_u8(__a, __b) ((v16u8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shlq_s8(__a, __b) ((v16i8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shlq_u16(__a, __b) ((v8u16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shlq_s16(__a, __b) ((v8i16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shlq_u32(__a, __b) ((v4u32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shlq_s32(__a, __b) ((v4i32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shlq_u64(__a, __b) ((v2u64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b))) +#define msa_shlq_s64(__a, __b) ((v2i64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b))) + +/* Immediate Shift Left Logical: shl -> ri = ai << imm; */ +#define msa_shlq_n_u8(__a, __imm) ((v16u8)__builtin_msa_slli_b((v16i8)(__a), __imm)) +#define msa_shlq_n_s8(__a, __imm) ((v16i8)__builtin_msa_slli_b((v16i8)(__a), __imm)) +#define msa_shlq_n_u16(__a, __imm) ((v8u16)__builtin_msa_slli_h((v8i16)(__a), __imm)) +#define msa_shlq_n_s16(__a, __imm) ((v8i16)__builtin_msa_slli_h((v8i16)(__a), __imm)) +#define msa_shlq_n_u32(__a, __imm) ((v4u32)__builtin_msa_slli_w((v4i32)(__a), __imm)) +#define msa_shlq_n_s32(__a, __imm) ((v4i32)__builtin_msa_slli_w((v4i32)(__a), __imm)) +#define msa_shlq_n_u64(__a, __imm) ((v2u64)__builtin_msa_slli_d((v2i64)(__a), __imm)) +#define msa_shlq_n_s64(__a, __imm) ((v2i64)__builtin_msa_slli_d((v2i64)(__a), __imm)) + +/* shift right: shrq -> ri = ai >> bi; */ +#define msa_shrq_u8(__a, __b) ((v16u8)__builtin_msa_srl_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shrq_s8(__a, __b) ((v16i8)__builtin_msa_sra_b((v16i8)(__a), (v16i8)(__b))) +#define msa_shrq_u16(__a, __b) ((v8u16)__builtin_msa_srl_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shrq_s16(__a, __b) ((v8i16)__builtin_msa_sra_h((v8i16)(__a), (v8i16)(__b))) +#define msa_shrq_u32(__a, __b) ((v4u32)__builtin_msa_srl_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shrq_s32(__a, __b) ((v4i32)__builtin_msa_sra_w((v4i32)(__a), (v4i32)(__b))) +#define msa_shrq_u64(__a, __b) ((v2u64)__builtin_msa_srl_d((v2i64)(__a), (v2i64)(__b))) +#define msa_shrq_s64(__a, __b) ((v2i64)__builtin_msa_sra_d((v2i64)(__a), (v2i64)(__b))) + +/* Immediate Shift Right: shr -> ri = ai >> imm; */ +#define msa_shrq_n_u8(__a, __imm) ((v16u8)__builtin_msa_srli_b((v16i8)(__a), __imm)) +#define msa_shrq_n_s8(__a, __imm) ((v16i8)__builtin_msa_srai_b((v16i8)(__a), __imm)) +#define msa_shrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srli_h((v8i16)(__a), __imm)) +#define msa_shrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srai_h((v8i16)(__a), __imm)) +#define msa_shrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srli_w((v4i32)(__a), __imm)) +#define msa_shrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srai_w((v4i32)(__a), __imm)) +#define msa_shrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srli_d((v2i64)(__a), __imm)) +#define msa_shrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srai_d((v2i64)(__a), __imm)) + +/* Immediate Shift Right Rounded: shr -> ri = ai >> (rounded)imm; */ +#define msa_rshrq_n_u8(__a, __imm) ((v16u8)__builtin_msa_srlri_b((v16i8)(__a), __imm)) +#define msa_rshrq_n_s8(__a, __imm) ((v16i8)__builtin_msa_srari_b((v16i8)(__a), __imm)) +#define msa_rshrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srlri_h((v8i16)(__a), __imm)) +#define msa_rshrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srari_h((v8i16)(__a), __imm)) +#define msa_rshrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srlri_w((v4i32)(__a), __imm)) +#define msa_rshrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srari_w((v4i32)(__a), __imm)) +#define msa_rshrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srlri_d((v2i64)(__a), __imm)) +#define msa_rshrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srari_d((v2i64)(__a), __imm)) + +/* Vector saturating rounding shift left, qrshl -> ri = ai << bi; */ +#define msa_qrshrq_s32(a, b) ((v4i32)__msa_srar_w((v4i32)(a), (v4i32)(b))) + +/* Rename the msa builtin func to unify the name style for intrin_msa.hpp */ +#define msa_qaddq_u8 __builtin_msa_adds_u_b +#define msa_qaddq_s8 __builtin_msa_adds_s_b +#define msa_qaddq_u16 __builtin_msa_adds_u_h +#define msa_qaddq_s16 __builtin_msa_adds_s_h +#define msa_qaddq_u32 __builtin_msa_adds_u_w +#define msa_qaddq_s32 __builtin_msa_adds_s_w +#define msa_qaddq_u64 __builtin_msa_adds_u_d +#define msa_qaddq_s64 __builtin_msa_adds_s_d +#define msa_addq_u8(a, b) ((v16u8)__builtin_msa_addv_b((v16i8)(a), (v16i8)(b))) +#define msa_addq_s8 __builtin_msa_addv_b +#define msa_addq_u16(a, b) ((v8u16)__builtin_msa_addv_h((v8i16)(a), (v8i16)(b))) +#define msa_addq_s16 __builtin_msa_addv_h +#define msa_addq_u32(a, b) ((v4u32)__builtin_msa_addv_w((v4i32)(a), (v4i32)(b))) +#define msa_addq_s32 __builtin_msa_addv_w +#define msa_addq_f32 __builtin_msa_fadd_w +#define msa_addq_u64(a, b) ((v2u64)__builtin_msa_addv_d((v2i64)(a), (v2i64)(b))) +#define msa_addq_s64 __builtin_msa_addv_d +#define msa_addq_f64 __builtin_msa_fadd_d +#define msa_qsubq_u8 __builtin_msa_subs_u_b +#define msa_qsubq_s8 __builtin_msa_subs_s_b +#define msa_qsubq_u16 __builtin_msa_subs_u_h +#define msa_qsubq_s16 __builtin_msa_subs_s_h +#define msa_subq_u8(a, b) ((v16u8)__builtin_msa_subv_b((v16i8)(a), (v16i8)(b))) +#define msa_subq_s8 __builtin_msa_subv_b +#define msa_subq_u16(a, b) ((v8u16)__builtin_msa_subv_h((v8i16)(a), (v8i16)(b))) +#define msa_subq_s16 __builtin_msa_subv_h +#define msa_subq_u32(a, b) ((v4u32)__builtin_msa_subv_w((v4i32)(a), (v4i32)(b))) +#define msa_subq_s32 __builtin_msa_subv_w +#define msa_subq_f32 __builtin_msa_fsub_w +#define msa_subq_u64(a, b) ((v2u64)__builtin_msa_subv_d((v2i64)(a), (v2i64)(b))) +#define msa_subq_s64 __builtin_msa_subv_d +#define msa_subq_f64 __builtin_msa_fsub_d +#define msa_mulq_u8(a, b) ((v16u8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b))) +#define msa_mulq_s8(a, b) ((v16i8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b))) +#define msa_mulq_u16(a, b) ((v8u16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b))) +#define msa_mulq_s16(a, b) ((v8i16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b))) +#define msa_mulq_u32(a, b) ((v4u32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b))) +#define msa_mulq_s32(a, b) ((v4i32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b))) +#define msa_mulq_u64(a, b) ((v2u64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b))) +#define msa_mulq_s64(a, b) ((v2i64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b))) +#define msa_mulq_f32 __builtin_msa_fmul_w +#define msa_mulq_f64 __builtin_msa_fmul_d +#define msa_divq_f32 __builtin_msa_fdiv_w +#define msa_divq_f64 __builtin_msa_fdiv_d +#define msa_dotp_s_h __builtin_msa_dotp_s_h +#define msa_dotp_s_w __builtin_msa_dotp_s_w +#define msa_dotp_s_d __builtin_msa_dotp_s_d +#define msa_dotp_u_h __builtin_msa_dotp_u_h +#define msa_dotp_u_w __builtin_msa_dotp_u_w +#define msa_dotp_u_d __builtin_msa_dotp_u_d +#define msa_dpadd_s_h __builtin_msa_dpadd_s_h +#define msa_dpadd_s_w __builtin_msa_dpadd_s_w +#define msa_dpadd_s_d __builtin_msa_dpadd_s_d +#define msa_dpadd_u_h __builtin_msa_dpadd_u_h +#define msa_dpadd_u_w __builtin_msa_dpadd_u_w +#define msa_dpadd_u_d __builtin_msa_dpadd_u_d + +#define ILVRL_B2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_b((v16i8)(in0), (v16i8)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_b((v16i8)(in0), (v16i8)(in1)); \ + } while (0) +#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) +#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) +#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) +#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__) +#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__) + +#define ILVRL_H2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_h((v8i16)(in0), (v8i16)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_h((v8i16)(in0), (v8i16)(in1)); \ + } while (0) +#define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__) +#define ILVRL_H2_SB(...) ILVRL_H2(v16i8, __VA_ARGS__) +#define ILVRL_H2_UH(...) ILVRL_H2(v8u16, __VA_ARGS__) +#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__) +#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__) +#define ILVRL_H2_UW(...) ILVRL_H2(v4u32, __VA_ARGS__) + +#define ILVRL_W2(RTYPE, in0, in1, low, hi) do { \ + low = (RTYPE)__builtin_msa_ilvr_w((v4i32)(in0), (v4i32)(in1)); \ + hi = (RTYPE)__builtin_msa_ilvl_w((v4i32)(in0), (v4i32)(in1)); \ + } while (0) +#define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__) +#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__) +#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__) +#define ILVRL_W2_UW(...) ILVRL_W2(v4u32, __VA_ARGS__) + +/* absq, qabsq (r = |a|;) */ +#define msa_absq_s8(a) __builtin_msa_add_a_b(a, __builtin_msa_fill_b(0)) +#define msa_absq_s16(a) __builtin_msa_add_a_h(a, __builtin_msa_fill_h(0)) +#define msa_absq_s32(a) __builtin_msa_add_a_w(a, __builtin_msa_fill_w(0)) +#define msa_absq_s64(a) __builtin_msa_add_a_d(a, __builtin_msa_fill_d(0)) +#define msa_absq_f32(a) ((v4f32)__builtin_msa_bclri_w((v4u32)(a), 31)) +#define msa_absq_f64(a) ((v2f64)__builtin_msa_bclri_d((v2u64)(a), 63)) +#define msa_qabsq_s8(a) __builtin_msa_adds_a_b(a, __builtin_msa_fill_b(0)) +#define msa_qabsq_s16(a) __builtin_msa_adds_a_h(a, __builtin_msa_fill_h(0)) +#define msa_qabsq_s32(a) __builtin_msa_adds_a_w(a, __builtin_msa_fill_w(0)) +#define msa_qabsq_s64(a) __builtin_msa_adds_a_d(a, __builtin_msa_fill_d(0)) + +/* abdq, qabdq (r = |a - b|;) */ +#define msa_abdq_u8 __builtin_msa_asub_u_b +#define msa_abdq_s8 __builtin_msa_asub_s_b +#define msa_abdq_u16 __builtin_msa_asub_u_h +#define msa_abdq_s16 __builtin_msa_asub_s_h +#define msa_abdq_u32 __builtin_msa_asub_u_w +#define msa_abdq_s32 __builtin_msa_asub_s_w +#define msa_abdq_u64 __builtin_msa_asub_u_d +#define msa_abdq_s64 __builtin_msa_asub_s_d +#define msa_abdq_f32(a, b) msa_absq_f32(__builtin_msa_fsub_w(a, b)) +#define msa_abdq_f64(a, b) msa_absq_f64(__builtin_msa_fsub_d(a, b)) +#define msa_qabdq_s8(a, b) msa_qabsq_s8(__builtin_msa_subs_s_b(a, b)) +#define msa_qabdq_s16(a, b) msa_qabsq_s16(__builtin_msa_subs_s_h(a, b)) +#define msa_qabdq_s32(a, b) msa_qabsq_s32(__builtin_msa_subs_s_w(a, b)) +#define msa_qabdq_s64(a, b) msa_qabsq_s64(__builtin_msa_subs_s_d(a, b)) + +/* sqrtq, rsqrtq */ +#define msa_sqrtq_f32 __builtin_msa_fsqrt_w +#define msa_sqrtq_f64 __builtin_msa_fsqrt_d +#define msa_rsqrtq_f32 __builtin_msa_frsqrt_w +#define msa_rsqrtq_f64 __builtin_msa_frsqrt_d + + +/* mlaq: r = a + b * c; */ +__extension__ extern __inline v4i32 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_s32(v4i32 __a, v4i32 __b, v4i32 __c) +{ + __asm__ volatile("maddv.w %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v2i64 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_s64(v2i64 __a, v2i64 __b, v2i64 __c) +{ + __asm__ volatile("maddv.d %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v4f32 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_f32(v4f32 __a, v4f32 __b, v4f32 __c) +{ + __asm__ volatile("fmadd.w %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +__extension__ extern __inline v2f64 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_mlaq_f64(v2f64 __a, v2f64 __b, v2f64 __c) +{ + __asm__ volatile("fmadd.d %w[__a], %w[__b], %w[__c]\n" + // Outputs + : [__a] "+f"(__a) + // Inputs + : [__b] "f"(__b), [__c] "f"(__c)); + return __a; +} + +/* cntq */ +#define msa_cntq_s8 __builtin_msa_pcnt_b +#define msa_cntq_s16 __builtin_msa_pcnt_h +#define msa_cntq_s32 __builtin_msa_pcnt_w +#define msa_cntq_s64 __builtin_msa_pcnt_d + +/* bslq (a: mask; r = b(if a == 0); r = c(if a == 1);) */ +#define msa_bslq_u8 __builtin_msa_bsel_v + +/* ilvrq, ilvlq (For EL only, ilvrq: b0, a0, b1, a1; ilvlq: b2, a2, b3, a3;) */ +#define msa_ilvrq_s8 __builtin_msa_ilvr_b +#define msa_ilvrq_s16 __builtin_msa_ilvr_h +#define msa_ilvrq_s32 __builtin_msa_ilvr_w +#define msa_ilvrq_s64 __builtin_msa_ilvr_d +#define msa_ilvlq_s8 __builtin_msa_ilvl_b +#define msa_ilvlq_s16 __builtin_msa_ilvl_h +#define msa_ilvlq_s32 __builtin_msa_ilvl_w +#define msa_ilvlq_s64 __builtin_msa_ilvl_d + +/* ilvevq, ilvodq (ilvevq: b0, a0, b2, a2; ilvodq: b1, a1, b3, a3; ) */ +#define msa_ilvevq_s8 __builtin_msa_ilvev_b +#define msa_ilvevq_s16 __builtin_msa_ilvev_h +#define msa_ilvevq_s32 __builtin_msa_ilvev_w +#define msa_ilvevq_s64 __builtin_msa_ilvev_d +#define msa_ilvodq_s8 __builtin_msa_ilvod_b +#define msa_ilvodq_s16 __builtin_msa_ilvod_h +#define msa_ilvodq_s32 __builtin_msa_ilvod_w +#define msa_ilvodq_s64 __builtin_msa_ilvod_d + +/* extq (r = (a || b); a concatenation b and get elements from index c) */ +#ifdef _MIPSEB +#define msa_extq_s8(a, b, c) \ +(__builtin_msa_vshf_b(__builtin_msa_subv_b((v16i8)((v2i64){0x1716151413121110, 0x1F1E1D1C1B1A1918}), __builtin_msa_fill_b(c)), a, b)) +#define msa_extq_s16(a, b, c) \ +(__builtin_msa_vshf_h(__builtin_msa_subv_h((v8i16)((v2i64){0x000B000A00090008, 0x000F000E000D000C}), __builtin_msa_fill_h(c)), a, b)) +#define msa_extq_s32(a, b, c) \ +(__builtin_msa_vshf_w(__builtin_msa_subv_w((v4i32)((v2i64){0x0000000500000004, 0x0000000700000006}), __builtin_msa_fill_w(c)), a, b)) +#define msa_extq_s64(a, b, c) \ +(__builtin_msa_vshf_d(__builtin_msa_subv_d((v2i64){0x0000000000000002, 0x0000000000000003}, __builtin_msa_fill_d(c)), a, b)) +#else +#define msa_extq_s8(a, b, c) \ +(__builtin_msa_vshf_b(__builtin_msa_addv_b((v16i8)((v2i64){0x0706050403020100, 0x0F0E0D0C0B0A0908}), __builtin_msa_fill_b(c)), b, a)) +#define msa_extq_s16(a, b, c) \ +(__builtin_msa_vshf_h(__builtin_msa_addv_h((v8i16)((v2i64){0x0003000200010000, 0x0007000600050004}), __builtin_msa_fill_h(c)), b, a)) +#define msa_extq_s32(a, b, c) \ +(__builtin_msa_vshf_w(__builtin_msa_addv_w((v4i32)((v2i64){0x0000000100000000, 0x0000000300000002}), __builtin_msa_fill_w(c)), b, a)) +#define msa_extq_s64(a, b, c) \ +(__builtin_msa_vshf_d(__builtin_msa_addv_d((v2i64){0x0000000000000000, 0x0000000000000001}, __builtin_msa_fill_d(c)), b, a)) +#endif /* _MIPSEB */ + +/* cvttruncq, cvttintq, cvtrintq */ +#define msa_cvttruncq_u32_f32 __builtin_msa_ftrunc_u_w +#define msa_cvttruncq_s32_f32 __builtin_msa_ftrunc_s_w +#define msa_cvttruncq_u64_f64 __builtin_msa_ftrunc_u_d +#define msa_cvttruncq_s64_f64 __builtin_msa_ftrunc_s_d +#define msa_cvttintq_u32_f32 __builtin_msa_ftint_u_w +#define msa_cvttintq_s32_f32 __builtin_msa_ftint_s_w +#define msa_cvttintq_u64_f64 __builtin_msa_ftint_u_d +#define msa_cvttintq_s64_f64 __builtin_msa_ftint_s_d +#define msa_cvtrintq_f32 __builtin_msa_frint_w +#define msa_cvtrintq_f64 __builtin_msa_frint_d + +/* cvtfintq, cvtfq */ +#define msa_cvtfintq_f32_u32 __builtin_msa_ffint_u_w +#define msa_cvtfintq_f32_s32 __builtin_msa_ffint_s_w +#define msa_cvtfintq_f64_u64 __builtin_msa_ffint_u_d +#define msa_cvtfintq_f64_s64 __builtin_msa_ffint_s_d +#define msa_cvtfq_f32_f64 __builtin_msa_fexdo_w +#define msa_cvtflq_f64_f32 __builtin_msa_fexupr_d +#define msa_cvtfhq_f64_f32 __builtin_msa_fexupl_d + +#define msa_addl_u8(a, b) ((v8u16)__builtin_msa_addv_h((v8i16)V8U8_2_V8I16(a), (v8i16)V8U8_2_V8I16(b))) +#define msa_addl_s8(a, b) (__builtin_msa_addv_h((v8i16)V8I8_2_V8I16(a), (v8i16)V8I8_2_V8I16(b))) +#define msa_addl_u16(a, b) ((v4u32)__builtin_msa_addv_w((v4i32)V4U16_2_V4I32(a), (v4i32)V4U16_2_V4I32(b))) +#define msa_addl_s16(a, b) (__builtin_msa_addv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b))) +#define msa_subl_s16(a, b) (__builtin_msa_subv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b))) +#define msa_recpeq_f32 __builtin_msa_frcp_w +#define msa_recpsq_f32(a, b) (__builtin_msa_fsub_w(msa_dupq_n_f32(2.0f), __builtin_msa_fmul_w(a, b))) + +#define MSA_INTERLEAVED_IMPL_LOAD2_STORE2(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld2q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \ + *a = (_Tpv)__builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st2q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b) \ +{ \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df((_Tpvs)b, (_Tpvs)a)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint8_t, v16u8, v16i8, u8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int8_t, v16i8, v16i8, s8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint16_t, v8u16, v8i16, u16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int16_t, v8i16, v8i16, s16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint32_t, v4u32, v4i32, u32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int32_t, v4i32, v4i32, s32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(float, v4f32, v4i32, f32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint64_t, v2u64, v2i64, u64, d, 2) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int64_t, v2i64, v2i64, s64, d, 2) +MSA_INTERLEAVED_IMPL_LOAD2_STORE2(double, v2f64, v2i64, f64, d, 2) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \ + _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0704011F1F1F1F1F, 0x1F1C191613100D0A}), (_Tpvs)v0, (_Tpvs)v1); \ + *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150E0B080502, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0603001F1F1F1F1F, 0x1E1B1815120F0C09}), (_Tpvs)v0, (_Tpvs)v1); \ + *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150D0A070401, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x05021F1F1F1F1F1F, 0x1D1A1714110E0B08}), (_Tpvs)v0, (_Tpvs)v1); \ + *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x17160F0C09060300, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \ +} +#else +#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \ + _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15120F0C09060300, 0x00000000001E1B18}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1D1A1714110A0908}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1613100D0A070401, 0x00000000001F1C19}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1E1B1815120A0908}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1714110E0B080502, 0x0000000000001D1A}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1F1C191613100908}), (_Tpvs)v2, v3); \ +} +#endif + +MSA_INTERLEAVED_IMPL_LOAD3_8(uint8_t, v16u8, v16i8, u8) +MSA_INTERLEAVED_IMPL_LOAD3_8(int8_t, v16i8, v16i8, s8) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \ + _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00030000000F000F, 0x000F000C00090006}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000A00050002, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0002000F000F000F, 0x000E000B00080005}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000700040001, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000F000F000F, 0x000D000A00070004}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000600030000, 0x000F000E000D000C}), (_Tpvs)v2, v3); \ +} +#else +#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \ + _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0009000600030000, 0x00000000000F000C}), (_Tpvs)v1, (_Tpvs)v0); \ + *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000D000A00050004}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A000700040001, 0x000000000000000D}), (_Tpvs)v1, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000E000B00080004}), (_Tpvs)v2, v3); \ + v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000800050002, 0x000000000000000E}), (_Tpvs)v1, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000F000C00090004}), (_Tpvs)v2, v3); \ +} +#endif + +MSA_INTERLEAVED_IMPL_LOAD3_16(uint16_t, v8u16, v8i16, u16) +MSA_INTERLEAVED_IMPL_LOAD3_16(int16_t, v8i16, v8i16, s16) + +#define MSA_INTERLEAVED_IMPL_LOAD3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + _Tpv v00 = msa_ld1q_##suffix(ptr); \ + _Tpv v01 = msa_ld1q_##suffix(ptr + 4); \ + _Tpv v02 = msa_ld1q_##suffix(ptr + 8); \ + _Tpvs v10 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v01, (v2i64)v01), (_Tpvs)v00); \ + _Tpvs v11 = __builtin_msa_ilvr_w((_Tpvs)v02, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v00, (v2i64)v00)); \ + _Tpvs v12 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v02, (v2i64)v02), (_Tpvs)v01); \ + *a = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v11, (v2i64)v11), v10); \ + *b = (_Tpv)__builtin_msa_ilvr_w(v12, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v10, (v2i64)v10)); \ + *c = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v12, (v2i64)v12), v11); \ +} + +MSA_INTERLEAVED_IMPL_LOAD3_32(uint32_t, v4u32, v4i32, u32) +MSA_INTERLEAVED_IMPL_LOAD3_32(int32_t, v4i32, v4i32, s32) +MSA_INTERLEAVED_IMPL_LOAD3_32(float, v4f32, v4i32, f32) + +#define MSA_INTERLEAVED_IMPL_LOAD3_64(_Tp, _Tpv, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \ +{ \ + *((_Tp*)a) = *ptr; *((_Tp*)b) = *(ptr + 1); *((_Tp*)c) = *(ptr + 2); \ + *((_Tp*)a + 1) = *(ptr + 3); *((_Tp*)b + 1) = *(ptr + 4); *((_Tp*)c + 1) = *(ptr + 5); \ +} + +MSA_INTERLEAVED_IMPL_LOAD3_64(uint64_t, v2u64, u64) +MSA_INTERLEAVED_IMPL_LOAD3_64(int64_t, v2i64, s64) +MSA_INTERLEAVED_IMPL_LOAD3_64(double, v2f64, f64) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0F0E0D0C0B1F1F1F, 0x1F1E1D1C1B1A1F1F}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0D1C140C1B130B1A, 0x1F170F1E160E1D15}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A09080706051F1F, 0x19181716151F1F1F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1D14071C13061B12, 0x170A1F16091E1508}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x04030201001F1F1F, 0x14131211101F1F1F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15021C14011B1300, 0x051F17041E16031D}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000050403020100, 0x0000001413121110}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A02110901100800, 0x05140C04130B0312}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000A09080706, 0x00001A1918171615}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x170A011609001508, 0x0D04190C03180B02}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000F0E0D0C0B, 0x0000001F1E1D1C1B}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x021C09011B08001A, 0x1F0C041E0B031D0A}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_8(uint8_t, v16u8, v16i8, u8) +MSA_INTERLEAVED_IMPL_STORE3_8(int8_t, v16i8, v16i8, s8) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000700060005000F, 0x000F000E000D000F}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A0006000D0009, 0x000F000B0007000E}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00040003000F000F, 0x000C000B000A000F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000E000A0003000D, 0x0005000F000B0004}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000200010000000F, 0x00090008000F000F}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000E00090000, 0x000B0002000F000A}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000200010000, 0x0000000A00090008}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000800040000, 0x0006000200090005}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000500040003, 0x00000000000C000B}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B00040000000A, 0x0002000C00050001}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000000070006, 0x0000000F000E000D}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00050000000D0004, 0x000F00060001000E}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_16(uint16_t, v8u16, v8i16, u16) +MSA_INTERLEAVED_IMPL_STORE3_16(int16_t, v8i16, v8i16, s16) + +#ifdef _MIPSEB +#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000007, 0x0000000700000006}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000006, 0x0000000700000005}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000001, 0x0000000500000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000700000004, 0x0000000500000002}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000007, 0x0000000400000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000000, 0x0000000100000007}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ +} +#else +#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000100000000, 0x0000000000000004}), (_Tpvs)b, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000000, 0x0000000100000004}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000002, 0x0000000600000005}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000002, 0x0000000300000000}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \ + v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000003, 0x0000000000000007}), (_Tpvs)b, (_Tpvs)a); \ + v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000006, 0x0000000700000002}), (_Tpvs)c, (_Tpvs)v0); \ + msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \ +} +#endif + +MSA_INTERLEAVED_IMPL_STORE3_32(uint32_t, v4u32, v4i32, u32) +MSA_INTERLEAVED_IMPL_STORE3_32(int32_t, v4i32, v4i32, s32) +MSA_INTERLEAVED_IMPL_STORE3_32(float, v4f32, v4i32, f32) + +#define MSA_INTERLEAVED_IMPL_STORE3_64(_Tp, _Tpv, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \ +{ \ + *ptr = a[0]; *(ptr + 1) = b[0]; *(ptr + 2) = c[0]; \ + *(ptr + 3) = a[1]; *(ptr + 4) = b[1]; *(ptr + 5) = c[1]; \ +} + +MSA_INTERLEAVED_IMPL_STORE3_64(uint64_t, v2u64, u64) +MSA_INTERLEAVED_IMPL_STORE3_64(int64_t, v2i64, s64) +MSA_INTERLEAVED_IMPL_STORE3_64(double, v2f64, f64) + +#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + nlanes * 2); \ + _Tpv v3 = msa_ld1q_##suffix(ptr + nlanes * 3); \ + _Tpvs t0 = __builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \ + _Tpvs t1 = __builtin_msa_pckev_##df((_Tpvs)v3, (_Tpvs)v2); \ + _Tpvs t2 = __builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \ + _Tpvs t3 = __builtin_msa_pckod_##df((_Tpvs)v3, (_Tpvs)v2); \ + *a = (_Tpv)__builtin_msa_pckev_##df(t1, t0); \ + *b = (_Tpv)__builtin_msa_pckev_##df(t3, t2); \ + *c = (_Tpv)__builtin_msa_pckod_##df(t1, t0); \ + *d = (_Tpv)__builtin_msa_pckod_##df(t3, t2); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \ +{ \ + _Tpvs v0 = __builtin_msa_ilvr_##df((_Tpvs)c, (_Tpvs)a); \ + _Tpvs v1 = __builtin_msa_ilvr_##df((_Tpvs)d, (_Tpvs)b); \ + _Tpvs v2 = __builtin_msa_ilvl_##df((_Tpvs)c, (_Tpvs)a); \ + _Tpvs v3 = __builtin_msa_ilvl_##df((_Tpvs)d, (_Tpvs)b); \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df(v1, v0)); \ + msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df(v1, v0)); \ + msa_st1q_##suffix(ptr + 2 * nlanes, (_Tpv)__builtin_msa_ilvr_##df(v3, v2)); \ + msa_st1q_##suffix(ptr + 3 * nlanes, (_Tpv)__builtin_msa_ilvl_##df(v3, v2)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint8_t, v16u8, v16i8, u8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int8_t, v16i8, v16i8, s8, b, 16) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint16_t, v8u16, v8i16, u16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int16_t, v8i16, v8i16, s16, h, 8) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint32_t, v4u32, v4i32, u32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int32_t, v4i32, v4i32, s32, w, 4) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4(float, v4f32, v4i32, f32, w, 4) + +#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(_Tp, _Tpv, _Tpvs, suffix) \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \ +{ \ + _Tpv v0 = msa_ld1q_##suffix(ptr); \ + _Tpv v1 = msa_ld1q_##suffix(ptr + 2); \ + _Tpv v2 = msa_ld1q_##suffix(ptr + 4); \ + _Tpv v3 = msa_ld1q_##suffix(ptr + 6); \ + *a = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v2, (_Tpvs)v0); \ + *b = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v2, (_Tpvs)v0); \ + *c = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v3, (_Tpvs)v1); \ + *d = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v3, (_Tpvs)v1); \ +} \ +__extension__ extern __inline void \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \ +{ \ + msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + 2, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)d, (_Tpvs)c)); \ + msa_st1q_##suffix(ptr + 4, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)b, (_Tpvs)a)); \ + msa_st1q_##suffix(ptr + 6, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)d, (_Tpvs)c)); \ +} + +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(uint64_t, v2u64, v2i64, u64) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(int64_t, v2i64, v2i64, s64) +MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(double, v2f64, v2i64, f64) + +__extension__ extern __inline v8i16 +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +msa_qdmulhq_n_s16(v8i16 a, int16_t b) +{ + v8i16 a_lo, a_hi; + ILVRL_H2_SH(a, msa_dupq_n_s16(0), a_lo, a_hi); + return msa_packr_s32(msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_dupq_n_s32(b)), 1), + msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_hi), msa_dupq_n_s32(b)), 1), 16); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /*__mips_msa*/ +#endif /* OPENCV_CORE_MSA_MACROS_H */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/simd_utils.impl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/simd_utils.impl.hpp new file mode 100644 index 0000000..fff8f94 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/hal/simd_utils.impl.hpp @@ -0,0 +1,146 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This header is not standalone. Don't include directly, use "intrin.hpp" instead. +#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp + + +#if CV_SIMD128 || CV_SIMD128_CPP + +template struct Type2Vec128_Traits; +#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec128_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2); +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2); +#if CV_SIMD128_64F +CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2); +#endif + +template static inline +typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a); + +template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); } +template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const ushort& a) { return v_setall_u16(a); } +template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); } +template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); } +template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); } +template<> inline Type2Vec128_Traits::vec_type v_setall(const uint64& a) { return v_setall_u64(a); } +template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); } +template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); } +#if CV_SIMD128_64F +template<> inline Type2Vec128_Traits::vec_type v_setall(const double& a) { return v_setall_f64(a); } +#endif + +#endif // SIMD128 + + +#if CV_SIMD256 + +template struct Type2Vec256_Traits; +#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec256_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4); +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4); +#if CV_SIMD256_64F +CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4); +#endif + +template static inline +typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a); + +template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); } +template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const ushort& a) { return v256_setall_u16(a); } +template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); } +template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); } +template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); } +template<> inline Type2Vec256_Traits::vec_type v256_setall(const uint64& a) { return v256_setall_u64(a); } +template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); } +template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); } +#if CV_SIMD256_64F +template<> inline Type2Vec256_Traits::vec_type v256_setall(const double& a) { return v256_setall_f64(a); } +#endif + +#endif // SIMD256 + + +#if CV_SIMD512 + +template struct Type2Vec512_Traits; +#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \ + template<> struct Type2Vec512_Traits \ + { \ + typedef vec_type_ vec_type; \ + } + +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8); +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8); +#if CV_SIMD512_64F +CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8); +#endif + +template static inline +typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a); + +template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); } +template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const ushort& a) { return v512_setall_u16(a); } +template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); } +template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); } +template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); } +template<> inline Type2Vec512_Traits::vec_type v512_setall(const uint64& a) { return v512_setall_u64(a); } +template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); } +template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); } +#if CV_SIMD512_64F +template<> inline Type2Vec512_Traits::vec_type v512_setall(const double& a) { return v512_setall_f64(a); } +#endif + +#endif // SIMD512 + + +#if CV_SIMD_WIDTH == 16 +template static inline +typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); } +#elif CV_SIMD_WIDTH == 32 +template static inline +typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); } +#elif CV_SIMD_WIDTH == 64 +template static inline +typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); } +#else +#error "Build configuration error, unsupported CV_SIMD_WIDTH" +#endif + + +#endif // OPENCV_HAL_INTRIN_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/mat.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/mat.hpp new file mode 100644 index 0000000..2aba15c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/mat.hpp @@ -0,0 +1,3775 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_MAT_HPP +#define OPENCV_CORE_MAT_HPP + +#ifndef __cplusplus +# error mat.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/matx.hpp" +#include "opencv2/core/types.hpp" + +#include "opencv2/core/bufferpool.hpp" + +#include + +namespace cv +{ + +//! @addtogroup core_basic +//! @{ + +enum AccessFlag { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25, + ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 }; +CV_ENUM_FLAGS(AccessFlag) +__CV_ENUM_FLAGS_BITWISE_AND(AccessFlag, int, AccessFlag) + +CV__DEBUG_NS_BEGIN + +class CV_EXPORTS _OutputArray; + +//////////////////////// Input/Output Array Arguments ///////////////////////////////// + +/** @brief This is the proxy class for passing read-only input arrays into OpenCV functions. + +It is defined as: +@code + typedef const _InputArray& InputArray; +@endcode +where _InputArray is a class that can be constructed from `Mat`, `Mat_`, `Matx`, +`std::vector`, `std::vector >`, `std::vector`, `std::vector >`, +`UMat`, `std::vector` or `double`. It can also be constructed from a matrix expression. + +Since this is mostly implementation-level class, and its interface may change in future versions, we +do not describe it in details. There are a few key things, though, that should be kept in mind: + +- When you see in the reference manual or in OpenCV source code a function that takes + InputArray, it means that you can actually pass `Mat`, `Matx`, `vector` etc. (see above the + complete list). +- Optional input arguments: If some of the input arrays may be empty, pass cv::noArray() (or + simply cv::Mat() as you probably did before). +- The class is designed solely for passing parameters. That is, normally you *should not* + declare class members, local and global variables of this type. +- If you want to design your own function or a class method that can operate of arrays of + multiple types, you can use InputArray (or OutputArray) for the respective parameters. Inside + a function you should use _InputArray::getMat() method to construct a matrix header for the + array (without copying data). _InputArray::kind() can be used to distinguish Mat from + `vector<>` etc., but normally it is not needed. + +Here is how you can use a function that takes InputArray : +@code + std::vector vec; + // points or a circle + for( int i = 0; i < 30; i++ ) + vec.push_back(Point2f((float)(100 + 30*cos(i*CV_PI*2/5)), + (float)(100 - 30*sin(i*CV_PI*2/5)))); + cv::transform(vec, vec, cv::Matx23f(0.707, -0.707, 10, 0.707, 0.707, 20)); +@endcode +That is, we form an STL vector containing points, and apply in-place affine transformation to the +vector using the 2x3 matrix created inline as `Matx` instance. + +Here is how such a function can be implemented (for simplicity, we implement a very specific case of +it, according to the assertion statement inside) : +@code + void myAffineTransform(InputArray _src, OutputArray _dst, InputArray _m) + { + // get Mat headers for input arrays. This is O(1) operation, + // unless _src and/or _m are matrix expressions. + Mat src = _src.getMat(), m = _m.getMat(); + CV_Assert( src.type() == CV_32FC2 && m.type() == CV_32F && m.size() == Size(3, 2) ); + + // [re]create the output array so that it has the proper size and type. + // In case of Mat it calls Mat::create, in case of STL vector it calls vector::resize. + _dst.create(src.size(), src.type()); + Mat dst = _dst.getMat(); + + for( int i = 0; i < src.rows; i++ ) + for( int j = 0; j < src.cols; j++ ) + { + Point2f pt = src.at(i, j); + dst.at(i, j) = Point2f(m.at(0, 0)*pt.x + + m.at(0, 1)*pt.y + + m.at(0, 2), + m.at(1, 0)*pt.x + + m.at(1, 1)*pt.y + + m.at(1, 2)); + } + } +@endcode +There is another related type, InputArrayOfArrays, which is currently defined as a synonym for +InputArray: +@code + typedef InputArray InputArrayOfArrays; +@endcode +It denotes function arguments that are either vectors of vectors or vectors of matrices. A separate +synonym is needed to generate Python/Java etc. wrappers properly. At the function implementation +level their use is similar, but _InputArray::getMat(idx) should be used to get header for the +idx-th component of the outer vector and _InputArray::size().area() should be used to find the +number of components (vectors/matrices) of the outer vector. + +In general, type support is limited to cv::Mat types. Other types are forbidden. +But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc. +This data is not intended to be interpreted as an image data, or processed somehow like regular cv::Mat. +To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers. +Custom type is wrapped as Mat-compatible `CV_8UC` values (N = sizeof(T), N <= CV_CN_MAX). + */ +class CV_EXPORTS _InputArray +{ +public: + enum KindFlag { + KIND_SHIFT = 16, + FIXED_TYPE = 0x8000 << KIND_SHIFT, + FIXED_SIZE = 0x4000 << KIND_SHIFT, + KIND_MASK = 31 << KIND_SHIFT, + + NONE = 0 << KIND_SHIFT, + MAT = 1 << KIND_SHIFT, + MATX = 2 << KIND_SHIFT, + STD_VECTOR = 3 << KIND_SHIFT, + STD_VECTOR_VECTOR = 4 << KIND_SHIFT, + STD_VECTOR_MAT = 5 << KIND_SHIFT, +#if OPENCV_ABI_COMPATIBILITY < 500 + EXPR = 6 << KIND_SHIFT, //!< removed: https://github.com/opencv/opencv/pull/17046 +#endif + OPENGL_BUFFER = 7 << KIND_SHIFT, + CUDA_HOST_MEM = 8 << KIND_SHIFT, + CUDA_GPU_MAT = 9 << KIND_SHIFT, + UMAT =10 << KIND_SHIFT, + STD_VECTOR_UMAT =11 << KIND_SHIFT, + STD_BOOL_VECTOR =12 << KIND_SHIFT, + STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT, +#if OPENCV_ABI_COMPATIBILITY < 500 + STD_ARRAY =14 << KIND_SHIFT, //!< removed: https://github.com/opencv/opencv/issues/18897 +#endif + STD_ARRAY_MAT =15 << KIND_SHIFT + }; + + _InputArray(); + _InputArray(int _flags, void* _obj); + _InputArray(const Mat& m); + _InputArray(const MatExpr& expr); + _InputArray(const std::vector& vec); + template _InputArray(const Mat_<_Tp>& m); + template _InputArray(const std::vector<_Tp>& vec); + _InputArray(const std::vector& vec); + template _InputArray(const std::vector >& vec); + _InputArray(const std::vector >&) = delete; // not supported + template _InputArray(const std::vector >& vec); + template _InputArray(const _Tp* vec, int n); + template _InputArray(const Matx<_Tp, m, n>& matx); + _InputArray(const double& val); + _InputArray(const cuda::GpuMat& d_mat); + _InputArray(const std::vector& d_mat_array); + _InputArray(const ogl::Buffer& buf); + _InputArray(const cuda::HostMem& cuda_mem); + template _InputArray(const cudev::GpuMat_<_Tp>& m); + _InputArray(const UMat& um); + _InputArray(const std::vector& umv); + + template _InputArray(const std::array<_Tp, _Nm>& arr); + template _InputArray(const std::array& arr); + + template static _InputArray rawIn(const std::vector<_Tp>& vec); + template static _InputArray rawIn(const std::array<_Tp, _Nm>& arr); + + Mat getMat(int idx=-1) const; + Mat getMat_(int idx=-1) const; + UMat getUMat(int idx=-1) const; + void getMatVector(std::vector& mv) const; + void getUMatVector(std::vector& umv) const; + void getGpuMatVector(std::vector& gpumv) const; + cuda::GpuMat getGpuMat() const; + ogl::Buffer getOGlBuffer() const; + + int getFlags() const; + void* getObj() const; + Size getSz() const; + + _InputArray::KindFlag kind() const; + int dims(int i=-1) const; + int cols(int i=-1) const; + int rows(int i=-1) const; + Size size(int i=-1) const; + int sizend(int* sz, int i=-1) const; + bool sameSize(const _InputArray& arr) const; + size_t total(int i=-1) const; + int type(int i=-1) const; + int depth(int i=-1) const; + int channels(int i=-1) const; + bool isContinuous(int i=-1) const; + bool isSubmatrix(int i=-1) const; + bool empty() const; + void copyTo(const _OutputArray& arr) const; + void copyTo(const _OutputArray& arr, const _InputArray & mask) const; + size_t offset(int i=-1) const; + size_t step(int i=-1) const; + bool isMat() const; + bool isUMat() const; + bool isMatVector() const; + bool isUMatVector() const; + bool isMatx() const; + bool isVector() const; + bool isGpuMat() const; + bool isGpuMatVector() const; + ~_InputArray(); + +protected: + int flags; + void* obj; + Size sz; + + void init(int _flags, const void* _obj); + void init(int _flags, const void* _obj, Size _sz); +}; +CV_ENUM_FLAGS(_InputArray::KindFlag) +__CV_ENUM_FLAGS_BITWISE_AND(_InputArray::KindFlag, int, _InputArray::KindFlag) + +/** @brief This type is very similar to InputArray except that it is used for input/output and output function +parameters. + +Just like with InputArray, OpenCV users should not care about OutputArray, they just pass `Mat`, +`vector` etc. to the functions. The same limitation as for `InputArray`: *Do not explicitly +create OutputArray instances* applies here too. + +If you want to make your function polymorphic (i.e. accept different arrays as output parameters), +it is also not very difficult. Take the sample above as the reference. Note that +_OutputArray::create() needs to be called before _OutputArray::getMat(). This way you guarantee +that the output array is properly allocated. + +Optional output parameters. If you do not need certain output array to be computed and returned to +you, pass cv::noArray(), just like you would in the case of optional input array. At the +implementation level, use _OutputArray::needed() to check if certain output array needs to be +computed or not. + +There are several synonyms for OutputArray that are used to assist automatic Python/Java/... wrapper +generators: +@code + typedef OutputArray OutputArrayOfArrays; + typedef OutputArray InputOutputArray; + typedef OutputArray InputOutputArrayOfArrays; +@endcode + */ +class CV_EXPORTS _OutputArray : public _InputArray +{ +public: + enum DepthMask + { + DEPTH_MASK_8U = 1 << CV_8U, + DEPTH_MASK_8S = 1 << CV_8S, + DEPTH_MASK_16U = 1 << CV_16U, + DEPTH_MASK_16S = 1 << CV_16S, + DEPTH_MASK_32S = 1 << CV_32S, + DEPTH_MASK_32F = 1 << CV_32F, + DEPTH_MASK_64F = 1 << CV_64F, + DEPTH_MASK_16F = 1 << CV_16F, + DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1, + DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S, + DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1, + DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F + }; + + _OutputArray(); + _OutputArray(int _flags, void* _obj); + _OutputArray(Mat& m); + _OutputArray(std::vector& vec); + _OutputArray(cuda::GpuMat& d_mat); + _OutputArray(std::vector& d_mat); + _OutputArray(ogl::Buffer& buf); + _OutputArray(cuda::HostMem& cuda_mem); + template _OutputArray(cudev::GpuMat_<_Tp>& m); + template _OutputArray(std::vector<_Tp>& vec); + _OutputArray(std::vector& vec) = delete; // not supported + template _OutputArray(std::vector >& vec); + _OutputArray(std::vector >&) = delete; // not supported + template _OutputArray(std::vector >& vec); + template _OutputArray(Mat_<_Tp>& m); + template _OutputArray(_Tp* vec, int n); + template _OutputArray(Matx<_Tp, m, n>& matx); + _OutputArray(UMat& m); + _OutputArray(std::vector& vec); + + _OutputArray(const Mat& m); + _OutputArray(const std::vector& vec); + _OutputArray(const cuda::GpuMat& d_mat); + _OutputArray(const std::vector& d_mat); + _OutputArray(const ogl::Buffer& buf); + _OutputArray(const cuda::HostMem& cuda_mem); + template _OutputArray(const cudev::GpuMat_<_Tp>& m); + template _OutputArray(const std::vector<_Tp>& vec); + template _OutputArray(const std::vector >& vec); + template _OutputArray(const std::vector >& vec); + template _OutputArray(const Mat_<_Tp>& m); + template _OutputArray(const _Tp* vec, int n); + template _OutputArray(const Matx<_Tp, m, n>& matx); + _OutputArray(const UMat& m); + _OutputArray(const std::vector& vec); + + template _OutputArray(std::array<_Tp, _Nm>& arr); + template _OutputArray(const std::array<_Tp, _Nm>& arr); + template _OutputArray(std::array& arr); + template _OutputArray(const std::array& arr); + + template static _OutputArray rawOut(std::vector<_Tp>& vec); + template static _OutputArray rawOut(std::array<_Tp, _Nm>& arr); + + bool fixedSize() const; + bool fixedType() const; + bool needed() const; + Mat& getMatRef(int i=-1) const; + UMat& getUMatRef(int i=-1) const; + cuda::GpuMat& getGpuMatRef() const; + std::vector& getGpuMatVecRef() const; + ogl::Buffer& getOGlBufferRef() const; + cuda::HostMem& getHostMemRef() const; + void create(Size sz, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const; + void createSameSize(const _InputArray& arr, int mtype) const; + void release() const; + void clear() const; + void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const; + + void assign(const UMat& u) const; + void assign(const Mat& m) const; + + void assign(const std::vector& v) const; + void assign(const std::vector& v) const; + + void move(UMat& u) const; + void move(Mat& m) const; +}; + + +class CV_EXPORTS _InputOutputArray : public _OutputArray +{ +public: + _InputOutputArray(); + _InputOutputArray(int _flags, void* _obj); + _InputOutputArray(Mat& m); + _InputOutputArray(std::vector& vec); + _InputOutputArray(cuda::GpuMat& d_mat); + _InputOutputArray(ogl::Buffer& buf); + _InputOutputArray(cuda::HostMem& cuda_mem); + template _InputOutputArray(cudev::GpuMat_<_Tp>& m); + template _InputOutputArray(std::vector<_Tp>& vec); + _InputOutputArray(std::vector& vec) = delete; // not supported + template _InputOutputArray(std::vector >& vec); + template _InputOutputArray(std::vector >& vec); + template _InputOutputArray(Mat_<_Tp>& m); + template _InputOutputArray(_Tp* vec, int n); + template _InputOutputArray(Matx<_Tp, m, n>& matx); + _InputOutputArray(UMat& m); + _InputOutputArray(std::vector& vec); + + _InputOutputArray(const Mat& m); + _InputOutputArray(const std::vector& vec); + _InputOutputArray(const cuda::GpuMat& d_mat); + _InputOutputArray(const std::vector& d_mat); + _InputOutputArray(const ogl::Buffer& buf); + _InputOutputArray(const cuda::HostMem& cuda_mem); + template _InputOutputArray(const cudev::GpuMat_<_Tp>& m); + template _InputOutputArray(const std::vector<_Tp>& vec); + template _InputOutputArray(const std::vector >& vec); + template _InputOutputArray(const std::vector >& vec); + template _InputOutputArray(const Mat_<_Tp>& m); + template _InputOutputArray(const _Tp* vec, int n); + template _InputOutputArray(const Matx<_Tp, m, n>& matx); + _InputOutputArray(const UMat& m); + _InputOutputArray(const std::vector& vec); + + template _InputOutputArray(std::array<_Tp, _Nm>& arr); + template _InputOutputArray(const std::array<_Tp, _Nm>& arr); + template _InputOutputArray(std::array& arr); + template _InputOutputArray(const std::array& arr); + + template static _InputOutputArray rawInOut(std::vector<_Tp>& vec); + template _InputOutputArray rawInOut(std::array<_Tp, _Nm>& arr); + +}; + +/** Helper to wrap custom types. @see InputArray */ +template static inline _InputArray rawIn(_Tp& v); +/** Helper to wrap custom types. @see InputArray */ +template static inline _OutputArray rawOut(_Tp& v); +/** Helper to wrap custom types. @see InputArray */ +template static inline _InputOutputArray rawInOut(_Tp& v); + +CV__DEBUG_NS_END + +typedef const _InputArray& InputArray; +typedef InputArray InputArrayOfArrays; +typedef const _OutputArray& OutputArray; +typedef OutputArray OutputArrayOfArrays; +typedef const _InputOutputArray& InputOutputArray; +typedef InputOutputArray InputOutputArrayOfArrays; + +CV_EXPORTS InputOutputArray noArray(); + +/////////////////////////////////// MatAllocator ////////////////////////////////////// + +/** @brief Usage flags for allocator + + @warning All flags except `USAGE_DEFAULT` are experimental. + + @warning For the OpenCL allocator, `USAGE_ALLOCATE_SHARED_MEMORY` depends on + OpenCV's optional, experimental integration with OpenCL SVM. To enable this + integration, build OpenCV using the `WITH_OPENCL_SVM=ON` CMake option and, at + runtime, call `cv::ocl::Context::getDefault().setUseSVM(true);` or similar + code. Note that SVM is incompatible with OpenCL 1.x. +*/ +enum UMatUsageFlags +{ + USAGE_DEFAULT = 0, + + // buffer allocation policy is platform and usage specific + USAGE_ALLOCATE_HOST_MEMORY = 1 << 0, + USAGE_ALLOCATE_DEVICE_MEMORY = 1 << 1, + USAGE_ALLOCATE_SHARED_MEMORY = 1 << 2, // It is not equal to: USAGE_ALLOCATE_HOST_MEMORY | USAGE_ALLOCATE_DEVICE_MEMORY + + __UMAT_USAGE_FLAGS_32BIT = 0x7fffffff // Binary compatibility hint +}; + +struct CV_EXPORTS UMatData; + +/** @brief Custom array allocator +*/ +class CV_EXPORTS MatAllocator +{ +public: + MatAllocator() {} + virtual ~MatAllocator() {} + + // let's comment it off for now to detect and fix all the uses of allocator + //virtual void allocate(int dims, const int* sizes, int type, int*& refcount, + // uchar*& datastart, uchar*& data, size_t* step) = 0; + //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0; + virtual UMatData* allocate(int dims, const int* sizes, int type, + void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const = 0; + virtual bool allocate(UMatData* data, AccessFlag accessflags, UMatUsageFlags usageFlags) const = 0; + virtual void deallocate(UMatData* data) const = 0; + virtual void map(UMatData* data, AccessFlag accessflags) const; + virtual void unmap(UMatData* data) const; + virtual void download(UMatData* data, void* dst, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dststep[]) const; + virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[], + const size_t dstofs[], const size_t dststep[], + const size_t srcstep[]) const; + virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], bool sync) const; + + // default implementation returns DummyBufferPoolController + virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const; +}; + + +//////////////////////////////// MatCommaInitializer ////////////////////////////////// + +/** @brief Comma-separated Matrix Initializer + + The class instances are usually not created explicitly. + Instead, they are created on "matrix << firstValue" operator. + + The sample below initializes 2x2 rotation matrix: + + \code + double angle = 30, a = cos(angle*CV_PI/180), b = sin(angle*CV_PI/180); + Mat R = (Mat_(2,2) << a, -b, b, a); + \endcode +*/ +template class MatCommaInitializer_ +{ +public: + //! the constructor, created by "matrix << firstValue" operator, where matrix is cv::Mat + MatCommaInitializer_(Mat_<_Tp>* _m); + //! the operator that takes the next value and put it to the matrix + template MatCommaInitializer_<_Tp>& operator , (T2 v); + //! another form of conversion operator + operator Mat_<_Tp>() const; +protected: + MatIterator_<_Tp> it; +}; + + +/////////////////////////////////////// Mat /////////////////////////////////////////// + +// note that umatdata might be allocated together +// with the matrix data, not as a separate object. +// therefore, it does not have constructor or destructor; +// it should be explicitly initialized using init(). +struct CV_EXPORTS UMatData +{ + enum MemoryFlag { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2, + DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24, + USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64, + ASYNC_CLEANUP=128 + }; + UMatData(const MatAllocator* allocator); + ~UMatData(); + + // provide atomic access to the structure + void lock(); + void unlock(); + + bool hostCopyObsolete() const; + bool deviceCopyObsolete() const; + bool deviceMemMapped() const; + bool copyOnMap() const; + bool tempUMat() const; + bool tempCopiedUMat() const; + void markHostCopyObsolete(bool flag); + void markDeviceCopyObsolete(bool flag); + void markDeviceMemMapped(bool flag); + + const MatAllocator* prevAllocator; + const MatAllocator* currAllocator; + int urefcount; + int refcount; + uchar* data; + uchar* origdata; + size_t size; + + UMatData::MemoryFlag flags; + void* handle; + void* userdata; + int allocatorFlags_; + int mapcount; + UMatData* originalUMatData; + std::shared_ptr allocatorContext; +}; +CV_ENUM_FLAGS(UMatData::MemoryFlag) + + +struct CV_EXPORTS MatSize +{ + explicit MatSize(int* _p) CV_NOEXCEPT; + int dims() const CV_NOEXCEPT; + Size operator()() const; + const int& operator[](int i) const; + int& operator[](int i); + operator const int*() const CV_NOEXCEPT; // TODO OpenCV 4.0: drop this + bool operator == (const MatSize& sz) const CV_NOEXCEPT; + bool operator != (const MatSize& sz) const CV_NOEXCEPT; + + int* p; +}; + +struct CV_EXPORTS MatStep +{ + MatStep() CV_NOEXCEPT; + explicit MatStep(size_t s) CV_NOEXCEPT; + const size_t& operator[](int i) const CV_NOEXCEPT; + size_t& operator[](int i) CV_NOEXCEPT; + operator size_t() const; + MatStep& operator = (size_t s); + + size_t* p; + size_t buf[2]; +protected: + MatStep& operator = (const MatStep&); +}; + +/** @example samples/cpp/cout_mat.cpp +An example demonstrating the serial out capabilities of cv::Mat +*/ + + /** @brief n-dimensional dense array class \anchor CVMat_Details + +The class Mat represents an n-dimensional dense numerical single-channel or multi-channel array. It +can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel +volumes, vector fields, point clouds, tensors, histograms (though, very high-dimensional histograms +may be better stored in a SparseMat ). The data layout of the array `M` is defined by the array +`M.step[]`, so that the address of element \f$(i_0,...,i_{M.dims-1})\f$, where \f$0\leq i_k= M.step[i+1]` (in fact, `M.step[i] >= M.step[i+1]*M.size[i+1]` ). This means +that 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane, +and so on. M.step[M.dims-1] is minimal and always equal to the element size M.elemSize() . + +So, the data layout in Mat is compatible with the majority of dense array types from the standard +toolkits and SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, +that is, with any array that uses *steps* (or *strides*) to compute the position of a pixel. +Due to this compatibility, it is possible to make a Mat header for user-allocated data and process +it in-place using OpenCV functions. + +There are many different ways to create a Mat object. The most popular options are listed below: + +- Use the create(nrows, ncols, type) method or the similar Mat(nrows, ncols, type[, fillValue]) +constructor. A new array of the specified size and type is allocated. type has the same meaning as +in the cvCreateMat method. For example, CV_8UC1 means a 8-bit single-channel array, CV_32FC2 +means a 2-channel (complex) floating-point array, and so on. +@code + // make a 7x7 complex matrix filled with 1+3j. + Mat M(7,7,CV_32FC2,Scalar(1,3)); + // and now turn M to a 100x60 15-channel 8-bit matrix. + // The old content will be deallocated + M.create(100,60,CV_8UC(15)); +@endcode +As noted in the introduction to this chapter, create() allocates only a new array when the shape +or type of the current array are different from the specified ones. + +- Create a multi-dimensional array: +@code + // create a 100x100x100 8-bit array + int sz[] = {100, 100, 100}; + Mat bigCube(3, sz, CV_8U, Scalar::all(0)); +@endcode +It passes the number of dimensions =1 to the Mat constructor but the created array will be +2-dimensional with the number of columns set to 1. So, Mat::dims is always \>= 2 (can also be 0 +when the array is empty). + +- Use a copy constructor or assignment operator where there can be an array or expression on the +right side (see below). As noted in the introduction, the array assignment is an O(1) operation +because it only copies the header and increases the reference counter. The Mat::clone() method can +be used to get a full (deep) copy of the array when you need it. + +- Construct a header for a part of another array. It can be a single row, single column, several +rows, several columns, rectangular region in the array (called a *minor* in algebra) or a +diagonal. Such operations are also O(1) because the new header references the same data. You can +actually modify a part of the array using this feature, for example: +@code + // add the 5-th row, multiplied by 3 to the 3rd row + M.row(3) = M.row(3) + M.row(5)*3; + // now copy the 7-th column to the 1-st column + // M.col(1) = M.col(7); // this will not work + Mat M1 = M.col(1); + M.col(7).copyTo(M1); + // create a new 320x240 image + Mat img(Size(320,240),CV_8UC3); + // select a ROI + Mat roi(img, Rect(10,10,100,100)); + // fill the ROI with (0,255,0) (which is green in RGB space); + // the original 320x240 image will be modified + roi = Scalar(0,255,0); +@endcode +Due to the additional datastart and dataend members, it is possible to compute a relative +sub-array position in the main *container* array using locateROI(): +@code + Mat A = Mat::eye(10, 10, CV_32S); + // extracts A columns, 1 (inclusive) to 3 (exclusive). + Mat B = A(Range::all(), Range(1, 3)); + // extracts B rows, 5 (inclusive) to 9 (exclusive). + // that is, C \~ A(Range(5, 9), Range(1, 3)) + Mat C = B(Range(5, 9), Range::all()); + Size size; Point ofs; + C.locateROI(size, ofs); + // size will be (width=10,height=10) and the ofs will be (x=1, y=5) +@endcode +As in case of whole matrices, if you need a deep copy, use the `clone()` method of the extracted +sub-matrices. + +- Make a header for user-allocated data. It can be useful to do the following: + -# Process "foreign" data using OpenCV (for example, when you implement a DirectShow\* filter or + a processing module for gstreamer, and so on). For example: + @code + Mat process_video_frame(const unsigned char* pixels, + int width, int height, int step) + { + // wrap input buffer + Mat img(height, width, CV_8UC3, (unsigned char*)pixels, step); + + Mat result; + GaussianBlur(img, result, Size(7, 7), 1.5, 1.5); + + return result; + } + @endcode + -# Quickly initialize small matrices and/or get a super-fast element access. + @code + double m[3][3] = {{a, b, c}, {d, e, f}, {g, h, i}}; + Mat M = Mat(3, 3, CV_64F, m).inv(); + @endcode + . + +- Use MATLAB-style array initializers, zeros(), ones(), eye(), for example: +@code + // create a double-precision identity matrix and add it to M. + M += Mat::eye(M.rows, M.cols, CV_64F); +@endcode + +- Use a comma-separated initializer: +@code + // create a 3x3 double-precision identity matrix + Mat M = (Mat_(3,3) << 1, 0, 0, 0, 1, 0, 0, 0, 1); +@endcode +With this approach, you first call a constructor of the Mat class with the proper parameters, and +then you just put `<< operator` followed by comma-separated values that can be constants, +variables, expressions, and so on. Also, note the extra parentheses required to avoid compilation +errors. + +Once the array is created, it is automatically managed via a reference-counting mechanism. If the +array header is built on top of user-allocated data, you should handle the data by yourself. The +array data is deallocated when no one points to it. If you want to release the data pointed by a +array header before the array destructor is called, use Mat::release(). + +The next important thing to learn about the array class is element access. This manual already +described how to compute an address of each array element. Normally, you are not required to use the +formula directly in the code. If you know the array element type (which can be retrieved using the +method Mat::type() ), you can access the element \f$M_{ij}\f$ of a 2-dimensional array as: +@code + M.at(i,j) += 1.f; +@endcode +assuming that `M` is a double-precision floating-point array. There are several variants of the method +at for a different number of dimensions. + +If you need to process a whole row of a 2D array, the most efficient way is to get the pointer to +the row first, and then just use the plain C operator [] : +@code + // compute sum of positive matrix elements + // (assuming that M is a double-precision matrix) + double sum=0; + for(int i = 0; i < M.rows; i++) + { + const double* Mi = M.ptr(i); + for(int j = 0; j < M.cols; j++) + sum += std::max(Mi[j], 0.); + } +@endcode +Some operations, like the one above, do not actually depend on the array shape. They just process +elements of an array one by one (or elements from multiple arrays that have the same coordinates, +for example, array addition). Such operations are called *element-wise*. It makes sense to check +whether all the input/output arrays are continuous, namely, have no gaps at the end of each row. If +yes, process them as a long single row: +@code + // compute the sum of positive matrix elements, optimized variant + double sum=0; + int cols = M.cols, rows = M.rows; + if(M.isContinuous()) + { + cols *= rows; + rows = 1; + } + for(int i = 0; i < rows; i++) + { + const double* Mi = M.ptr(i); + for(int j = 0; j < cols; j++) + sum += std::max(Mi[j], 0.); + } +@endcode +In case of the continuous matrix, the outer loop body is executed just once. So, the overhead is +smaller, which is especially noticeable in case of small matrices. + +Finally, there are STL-style iterators that are smart enough to skip gaps between successive rows: +@code + // compute sum of positive matrix elements, iterator-based variant + double sum=0; + MatConstIterator_ it = M.begin(), it_end = M.end(); + for(; it != it_end; ++it) + sum += std::max(*it, 0.); +@endcode +The matrix iterators are random-access iterators, so they can be passed to any STL algorithm, +including std::sort(). + +@note Matrix Expressions and arithmetic see MatExpr +*/ +class CV_EXPORTS Mat +{ +public: + /** + These are various constructors that form a matrix. As noted in the AutomaticAllocation, often + the default constructor is enough, and the proper matrix will be allocated by an OpenCV function. + The constructed matrix can further be assigned to another matrix or matrix expression or can be + allocated with Mat::create . In the former case, the old content is de-referenced. + */ + Mat() CV_NOEXCEPT; + + /** @overload + @param rows Number of rows in a 2D array. + @param cols Number of columns in a 2D array. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(int rows, int cols, int type); + + /** @overload + @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the + number of columns go in the reverse order. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(Size size, int type); + + /** @overload + @param rows Number of rows in a 2D array. + @param cols Number of columns in a 2D array. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(int rows, int cols, int type, const Scalar& s); + + /** @overload + @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the + number of columns go in the reverse order. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(Size size, int type, const Scalar& s); + + /** @overload + @param ndims Array dimensionality. + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(int ndims, const int* sizes, int type); + + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + Mat(const std::vector& sizes, int type); + + /** @overload + @param ndims Array dimensionality. + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(int ndims, const int* sizes, int type, const Scalar& s); + + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param s An optional value to initialize each matrix element with. To set all the matrix elements to + the particular value after the construction, use the assignment operator + Mat::operator=(const Scalar& value) . + */ + Mat(const std::vector& sizes, int type, const Scalar& s); + + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + */ + Mat(const Mat& m); + + /** @overload + @param rows Number of rows in a 2D array. + @param cols Number of columns in a 2D array. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param step Number of bytes each matrix row occupies. The value should include the padding bytes at + the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed + and the actual step is calculated as cols*elemSize(). See Mat::elemSize. + */ + Mat(int rows, int cols, int type, void* data, size_t step=AUTO_STEP); + + /** @overload + @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the + number of columns go in the reverse order. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param step Number of bytes each matrix row occupies. The value should include the padding bytes at + the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed + and the actual step is calculated as cols*elemSize(). See Mat::elemSize. + */ + Mat(Size size, int type, void* data, size_t step=AUTO_STEP); + + /** @overload + @param ndims Array dimensionality. + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0); + + /** @overload + @param sizes Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + Mat(const std::vector& sizes, int type, void* data, const size_t* steps=0); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param rowRange Range of the m rows to take. As usual, the range start is inclusive and the range + end is exclusive. Use Range::all() to take all the rows. + @param colRange Range of the m columns to take. Use Range::all() to take all the columns. + */ + Mat(const Mat& m, const Range& rowRange, const Range& colRange=Range::all()); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param roi Region of interest. + */ + Mat(const Mat& m, const Rect& roi); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param ranges Array of selected ranges of m along each dimensionality. + */ + Mat(const Mat& m, const Range* ranges); + + /** @overload + @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied + by these constructors. Instead, the header pointing to m data or its sub-array is constructed and + associated with it. The reference counter, if any, is incremented. So, when you modify the matrix + formed using such a constructor, you also modify the corresponding elements of m . If you want to + have an independent copy of the sub-array, use Mat::clone() . + @param ranges Array of selected ranges of m along each dimensionality. + */ + Mat(const Mat& m, const std::vector& ranges); + + /** @overload + @param vec STL vector whose elements form the matrix. The matrix has a single column and the number + of rows equal to the number of vector elements. Type of the matrix matches the type of vector + elements. The constructor can handle arbitrary types, for which there is a properly declared + DataType . This means that the vector elements must be primitive numbers or uni-type numerical + tuples of numbers. Mixed-type structures are not supported. The corresponding constructor is + explicit. Since STL vectors are not automatically converted to Mat instances, you should write + Mat(vec) explicitly. Unless you copy the data into the matrix ( copyData=true ), no new elements + will be added to the vector because it can potentially yield vector data reallocation, and, thus, + the matrix data pointer will be invalid. + @param copyData Flag to specify whether the underlying data of the STL vector should be copied + to (true) or shared with (false) the newly constructed matrix. When the data is copied, the + allocated buffer is managed using Mat reference counting mechanism. While the data is shared, + the reference counter is NULL, and you should not deallocate the data until the matrix is + destructed. + */ + template explicit Mat(const std::vector<_Tp>& vec, bool copyData=false); + + /** @overload + */ + template::value>::type> + explicit Mat(const std::initializer_list<_Tp> list); + + /** @overload + */ + template explicit Mat(const std::initializer_list sizes, const std::initializer_list<_Tp> list); + + /** @overload + */ + template explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false); + + /** @overload + */ + template explicit Mat(const Vec<_Tp, n>& vec, bool copyData=true); + + /** @overload + */ + template explicit Mat(const Matx<_Tp, m, n>& mtx, bool copyData=true); + + /** @overload + */ + template explicit Mat(const Point_<_Tp>& pt, bool copyData=true); + + /** @overload + */ + template explicit Mat(const Point3_<_Tp>& pt, bool copyData=true); + + /** @overload + */ + template explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer); + + //! download data from GpuMat + explicit Mat(const cuda::GpuMat& m); + + //! destructor - calls release() + ~Mat(); + + /** @brief assignment operators + + These are available assignment operators. Since they all are very different, make sure to read the + operator parameters description. + @param m Assigned, right-hand-side matrix. Matrix assignment is an O(1) operation. This means that + no data is copied but the data is shared and the reference counter, if any, is incremented. Before + assigning new data, the old data is de-referenced via Mat::release . + */ + Mat& operator = (const Mat& m); + + /** @overload + @param expr Assigned matrix expression object. As opposite to the first form of the assignment + operation, the second form can reuse already allocated matrix if it has the right size and type to + fit the matrix expression result. It is automatically handled by the real function that the matrix + expressions is expanded to. For example, C=A+B is expanded to add(A, B, C), and add takes care of + automatic C reallocation. + */ + Mat& operator = (const MatExpr& expr); + + //! retrieve UMat from Mat + UMat getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const; + + /** @brief Creates a matrix header for the specified matrix row. + + The method makes a new header for the specified matrix row and returns it. This is an O(1) + operation, regardless of the matrix size. The underlying data of the new matrix is shared with the + original matrix. Here is the example of one of the classical basic matrix processing operations, + axpy, used by LU and many other algorithms: + @code + inline void matrix_axpy(Mat& A, int i, int j, double alpha) + { + A.row(i) += A.row(j)*alpha; + } + @endcode + @note In the current implementation, the following code does not work as expected: + @code + Mat A; + ... + A.row(i) = A.row(j); // will not work + @endcode + This happens because A.row(i) forms a temporary header that is further assigned to another header. + Remember that each of these operations is O(1), that is, no data is copied. Thus, the above + assignment is not true if you may have expected the j-th row to be copied to the i-th row. To + achieve that, you should either turn this simple assignment into an expression or use the + Mat::copyTo method: + @code + Mat A; + ... + // works, but looks a bit obscure. + A.row(i) = A.row(j) + 0; + // this is a bit longer, but the recommended method. + A.row(j).copyTo(A.row(i)); + @endcode + @param y A 0-based row index. + */ + Mat row(int y) const; + + /** @brief Creates a matrix header for the specified matrix column. + + The method makes a new header for the specified matrix column and returns it. This is an O(1) + operation, regardless of the matrix size. The underlying data of the new matrix is shared with the + original matrix. See also the Mat::row description. + @param x A 0-based column index. + */ + Mat col(int x) const; + + /** @brief Creates a matrix header for the specified row span. + + The method makes a new header for the specified row span of the matrix. Similarly to Mat::row and + Mat::col , this is an O(1) operation. + @param startrow An inclusive 0-based start index of the row span. + @param endrow An exclusive 0-based ending index of the row span. + */ + Mat rowRange(int startrow, int endrow) const; + + /** @overload + @param r Range structure containing both the start and the end indices. + */ + Mat rowRange(const Range& r) const; + + /** @brief Creates a matrix header for the specified column span. + + The method makes a new header for the specified column span of the matrix. Similarly to Mat::row and + Mat::col , this is an O(1) operation. + @param startcol An inclusive 0-based start index of the column span. + @param endcol An exclusive 0-based ending index of the column span. + */ + Mat colRange(int startcol, int endcol) const; + + /** @overload + @param r Range structure containing both the start and the end indices. + */ + Mat colRange(const Range& r) const; + + /** @brief Extracts a diagonal from a matrix + + The method makes a new header for the specified matrix diagonal. The new matrix is represented as a + single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation. + @param d index of the diagonal, with the following values: + - `d=0` is the main diagonal. + - `d<0` is a diagonal from the lower half. For example, d=-1 means the diagonal is set + immediately below the main one. + - `d>0` is a diagonal from the upper half. For example, d=1 means the diagonal is set + immediately above the main one. + For example: + @code + Mat m = (Mat_(3,3) << + 1,2,3, + 4,5,6, + 7,8,9); + Mat d0 = m.diag(0); + Mat d1 = m.diag(1); + Mat d_1 = m.diag(-1); + @endcode + The resulting matrices are + @code + d0 = + [1; + 5; + 9] + d1 = + [2; + 6] + d_1 = + [4; + 8] + @endcode + */ + Mat diag(int d=0) const; + + /** @brief creates a diagonal matrix + + The method creates a square diagonal matrix from specified main diagonal. + @param d One-dimensional matrix that represents the main diagonal. + */ + CV_NODISCARD_STD static Mat diag(const Mat& d); + + /** @brief Creates a full copy of the array and the underlying data. + + The method creates a full copy of the array. The original step[] is not taken into account. So, the + array copy is a continuous array occupying total()*elemSize() bytes. + */ + CV_NODISCARD_STD Mat clone() const; + + /** @brief Copies the matrix to another one. + + The method copies the matrix data to another matrix. Before copying the data, the method invokes : + @code + m.create(this->size(), this->type()); + @endcode + so that the destination matrix is reallocated if needed. While m.copyTo(m); works flawlessly, the + function does not handle the case of a partial overlap between the source and the destination + matrices. + + When the operation mask is specified, if the Mat::create call shown above reallocates the matrix, + the newly allocated matrix is initialized with all zeros before copying the data. + @param m Destination matrix. If it does not have a proper size or type before the operation, it is + reallocated. + */ + void copyTo( OutputArray m ) const; + + /** @overload + @param m Destination matrix. If it does not have a proper size or type before the operation, it is + reallocated. + @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix + elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels. + */ + void copyTo( OutputArray m, InputArray mask ) const; + + /** @brief Converts an array to another data type with optional scaling. + + The method converts source pixel values to the target data type. saturate_cast\<\> is applied at + the end to avoid possible overflows: + + \f[m(x,y) = saturate \_ cast( \alpha (*this)(x,y) + \beta )\f] + @param m output matrix; if it does not have a proper size or type before the operation, it is + reallocated. + @param rtype desired output matrix type or, rather, the depth since the number of channels are the + same as the input has; if rtype is negative, the output matrix will have the same type as the input. + @param alpha optional scale factor. + @param beta optional delta added to the scaled values. + */ + void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const; + + /** @brief Provides a functional form of convertTo. + + This is an internally used method called by the @ref MatrixExpressions engine. + @param m Destination array. + @param type Desired destination array depth (or -1 if it should be the same as the source type). + */ + void assignTo( Mat& m, int type=-1 ) const; + + /** @brief Sets all or some of the array elements to the specified value. + @param s Assigned scalar converted to the actual array type. + */ + Mat& operator = (const Scalar& s); + + /** @brief Sets all or some of the array elements to the specified value. + + This is an advanced variant of the Mat::operator=(const Scalar& s) operator. + @param value Assigned scalar converted to the actual array type. + @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix + elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels + */ + Mat& setTo(InputArray value, InputArray mask=noArray()); + + /** @brief Changes the shape and/or the number of channels of a 2D matrix without copying the data. + + The method makes a new matrix header for \*this elements. The new matrix may have a different size + and/or different number of channels. Any combination is possible if: + - No extra elements are included into the new matrix and no elements are excluded. Consequently, + the product rows\*cols\*channels() must stay the same after the transformation. + - No data is copied. That is, this is an O(1) operation. Consequently, if you change the number of + rows, or the operation changes the indices of elements row in some other way, the matrix must be + continuous. See Mat::isContinuous . + + For example, if there is a set of 3D points stored as an STL vector, and you want to represent the + points as a 3xN matrix, do the following: + @code + std::vector vec; + ... + Mat pointMat = Mat(vec). // convert vector to Mat, O(1) operation + reshape(1). // make Nx3 1-channel matrix out of Nx1 3-channel. + // Also, an O(1) operation + t(); // finally, transpose the Nx3 matrix. + // This involves copying all the elements + @endcode + @param cn New number of channels. If the parameter is 0, the number of channels remains the same. + @param rows New number of rows. If the parameter is 0, the number of rows remains the same. + */ + Mat reshape(int cn, int rows=0) const; + + /** @overload */ + Mat reshape(int cn, int newndims, const int* newsz) const; + + /** @overload */ + Mat reshape(int cn, const std::vector& newshape) const; + + /** @brief Transposes a matrix. + + The method performs matrix transposition by means of matrix expressions. It does not perform the + actual transposition but returns a temporary matrix transposition object that can be further used as + a part of more complex matrix expressions or can be assigned to a matrix: + @code + Mat A1 = A + Mat::eye(A.size(), A.type())*lambda; + Mat C = A1.t()*A1; // compute (A + lambda*I)^t * (A + lamda*I) + @endcode + */ + MatExpr t() const; + + /** @brief Inverses a matrix. + + The method performs a matrix inversion by means of matrix expressions. This means that a temporary + matrix inversion object is returned by the method and can be used further as a part of more complex + matrix expressions or can be assigned to a matrix. + @param method Matrix inversion method. One of cv::DecompTypes + */ + MatExpr inv(int method=DECOMP_LU) const; + + /** @brief Performs an element-wise multiplication or division of the two matrices. + + The method returns a temporary object encoding per-element array multiplication, with optional + scale. Note that this is not a matrix multiplication that corresponds to a simpler "\*" operator. + + Example: + @code + Mat C = A.mul(5/B); // equivalent to divide(A, B, C, 5) + @endcode + @param m Another array of the same type and the same size as \*this, or a matrix expression. + @param scale Optional scale factor. + */ + MatExpr mul(InputArray m, double scale=1) const; + + /** @brief Computes a cross-product of two 3-element vectors. + + The method computes a cross-product of two 3-element vectors. The vectors must be 3-element + floating-point vectors of the same shape and size. The result is another 3-element vector of the + same shape and type as operands. + @param m Another cross-product operand. + */ + Mat cross(InputArray m) const; + + /** @brief Computes a dot-product of two vectors. + + The method computes a dot-product of two matrices. If the matrices are not single-column or + single-row vectors, the top-to-bottom left-to-right scan ordering is used to treat them as 1D + vectors. The vectors must have the same size and type. If the matrices have more than one channel, + the dot products from all the channels are summed together. + @param m another dot-product operand. + */ + double dot(InputArray m) const; + + /** @brief Returns a zero array of the specified size and type. + + The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant + array as a function parameter, part of a matrix expression, or as a matrix initializer: + @code + Mat A; + A = Mat::zeros(3, 3, CV_32F); + @endcode + In the example above, a new matrix is allocated only if A is not a 3x3 floating-point matrix. + Otherwise, the existing matrix A is filled with zeros. + @param rows Number of rows. + @param cols Number of columns. + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr zeros(int rows, int cols, int type); + + /** @overload + @param size Alternative to the matrix size specification Size(cols, rows) . + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr zeros(Size size, int type); + + /** @overload + @param ndims Array dimensionality. + @param sz Array of integers specifying the array shape. + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr zeros(int ndims, const int* sz, int type); + + /** @brief Returns an array of all 1's of the specified size and type. + + The method returns a Matlab-style 1's array initializer, similarly to Mat::zeros. Note that using + this method you can initialize an array with an arbitrary value, using the following Matlab idiom: + @code + Mat A = Mat::ones(100, 100, CV_8U)*3; // make 100x100 matrix filled with 3. + @endcode + The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it + just remembers the scale factor (3 in this case) and use it when actually invoking the matrix + initializer. + @note In case of multi-channels type, only the first channel will be initialized with 1's, the + others will be set to 0's. + @param rows Number of rows. + @param cols Number of columns. + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr ones(int rows, int cols, int type); + + /** @overload + @param size Alternative to the matrix size specification Size(cols, rows) . + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr ones(Size size, int type); + + /** @overload + @param ndims Array dimensionality. + @param sz Array of integers specifying the array shape. + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr ones(int ndims, const int* sz, int type); + + /** @brief Returns an identity matrix of the specified size and type. + + The method returns a Matlab-style identity matrix initializer, similarly to Mat::zeros. Similarly to + Mat::ones, you can use a scale operation to create a scaled identity matrix efficiently: + @code + // make a 4x4 diagonal matrix with 0.1's on the diagonal. + Mat A = Mat::eye(4, 4, CV_32F)*0.1; + @endcode + @note In case of multi-channels type, identity matrix will be initialized only for the first channel, + the others will be set to 0's + @param rows Number of rows. + @param cols Number of columns. + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr eye(int rows, int cols, int type); + + /** @overload + @param size Alternative matrix size specification as Size(cols, rows) . + @param type Created matrix type. + */ + CV_NODISCARD_STD static MatExpr eye(Size size, int type); + + /** @brief Allocates new array data if needed. + + This is one of the key Mat methods. Most new-style OpenCV functions and methods that produce arrays + call this method for each output array. The method uses the following algorithm: + + -# If the current array shape and the type match the new ones, return immediately. Otherwise, + de-reference the previous data by calling Mat::release. + -# Initialize the new header. + -# Allocate the new data of total()\*elemSize() bytes. + -# Allocate the new, associated with the data, reference counter and set it to 1. + + Such a scheme makes the memory management robust and efficient at the same time and helps avoid + extra typing for you. This means that usually there is no need to explicitly allocate output arrays. + That is, instead of writing: + @code + Mat color; + ... + Mat gray(color.rows, color.cols, color.depth()); + cvtColor(color, gray, COLOR_BGR2GRAY); + @endcode + you can simply write: + @code + Mat color; + ... + Mat gray; + cvtColor(color, gray, COLOR_BGR2GRAY); + @endcode + because cvtColor, as well as the most of OpenCV functions, calls Mat::create() for the output array + internally. + @param rows New number of rows. + @param cols New number of columns. + @param type New matrix type. + */ + void create(int rows, int cols, int type); + + /** @overload + @param size Alternative new matrix size specification: Size(cols, rows) + @param type New matrix type. + */ + void create(Size size, int type); + + /** @overload + @param ndims New array dimensionality. + @param sizes Array of integers specifying a new array shape. + @param type New matrix type. + */ + void create(int ndims, const int* sizes, int type); + + /** @overload + @param sizes Array of integers specifying a new array shape. + @param type New matrix type. + */ + void create(const std::vector& sizes, int type); + + /** @brief Increments the reference counter. + + The method increments the reference counter associated with the matrix data. If the matrix header + points to an external data set (see Mat::Mat ), the reference counter is NULL, and the method has no + effect in this case. Normally, to avoid memory leaks, the method should not be called explicitly. It + is called implicitly by the matrix assignment operator. The reference counter increment is an atomic + operation on the platforms that support it. Thus, it is safe to operate on the same matrices + asynchronously in different threads. + */ + void addref(); + + /** @brief Decrements the reference counter and deallocates the matrix if needed. + + The method decrements the reference counter associated with the matrix data. When the reference + counter reaches 0, the matrix data is deallocated and the data and the reference counter pointers + are set to NULL's. If the matrix header points to an external data set (see Mat::Mat ), the + reference counter is NULL, and the method has no effect in this case. + + This method can be called manually to force the matrix data deallocation. But since this method is + automatically called in the destructor, or by any other method that changes the data pointer, it is + usually not needed. The reference counter decrement and check for 0 is an atomic operation on the + platforms that support it. Thus, it is safe to operate on the same matrices asynchronously in + different threads. + */ + void release(); + + //! internal use function, consider to use 'release' method instead; deallocates the matrix data + void deallocate(); + //! internal use function; properly re-allocates _size, _step arrays + void copySize(const Mat& m); + + /** @brief Reserves space for the certain number of rows. + + The method reserves space for sz rows. If the matrix already has enough space to store sz rows, + nothing happens. If the matrix is reallocated, the first Mat::rows rows are preserved. The method + emulates the corresponding method of the STL vector class. + @param sz Number of rows. + */ + void reserve(size_t sz); + + /** @brief Reserves space for the certain number of bytes. + + The method reserves space for sz bytes. If the matrix already has enough space to store sz bytes, + nothing happens. If matrix has to be reallocated its previous content could be lost. + @param sz Number of bytes. + */ + void reserveBuffer(size_t sz); + + /** @brief Changes the number of matrix rows. + + The methods change the number of matrix rows. If the matrix is reallocated, the first + min(Mat::rows, sz) rows are preserved. The methods emulate the corresponding methods of the STL + vector class. + @param sz New number of rows. + */ + void resize(size_t sz); + + /** @overload + @param sz New number of rows. + @param s Value assigned to the newly added elements. + */ + void resize(size_t sz, const Scalar& s); + + //! internal function + void push_back_(const void* elem); + + /** @brief Adds elements to the bottom of the matrix. + + The methods add one or more elements to the bottom of the matrix. They emulate the corresponding + method of the STL vector class. When elem is Mat , its type and the number of columns must be the + same as in the container matrix. + @param elem Added element(s). + */ + template void push_back(const _Tp& elem); + + /** @overload + @param elem Added element(s). + */ + template void push_back(const Mat_<_Tp>& elem); + + /** @overload + @param elem Added element(s). + */ + template void push_back(const std::vector<_Tp>& elem); + + /** @overload + @param m Added line(s). + */ + void push_back(const Mat& m); + + /** @brief Removes elements from the bottom of the matrix. + + The method removes one or more rows from the bottom of the matrix. + @param nelems Number of removed rows. If it is greater than the total number of rows, an exception + is thrown. + */ + void pop_back(size_t nelems=1); + + /** @brief Locates the matrix header within a parent matrix. + + After you extracted a submatrix from a matrix using Mat::row, Mat::col, Mat::rowRange, + Mat::colRange, and others, the resultant submatrix points just to the part of the original big + matrix. However, each submatrix contains information (represented by datastart and dataend + fields) that helps reconstruct the original matrix size and the position of the extracted + submatrix within the original matrix. The method locateROI does exactly that. + @param wholeSize Output parameter that contains the size of the whole matrix containing *this* + as a part. + @param ofs Output parameter that contains an offset of *this* inside the whole matrix. + */ + void locateROI( Size& wholeSize, Point& ofs ) const; + + /** @brief Adjusts a submatrix size and position within the parent matrix. + + The method is complimentary to Mat::locateROI . The typical use of these functions is to determine + the submatrix position within the parent matrix and then shift the position somehow. Typically, it + can be required for filtering operations when pixels outside of the ROI should be taken into + account. When all the method parameters are positive, the ROI needs to grow in all directions by the + specified amount, for example: + @code + A.adjustROI(2, 2, 2, 2); + @endcode + In this example, the matrix size is increased by 4 elements in each direction. The matrix is shifted + by 2 elements to the left and 2 elements up, which brings in all the necessary pixels for the + filtering with the 5x5 kernel. + + adjustROI forces the adjusted ROI to be inside of the parent matrix that is boundaries of the + adjusted ROI are constrained by boundaries of the parent matrix. For example, if the submatrix A is + located in the first row of a parent matrix and you called A.adjustROI(2, 2, 2, 2) then A will not + be increased in the upward direction. + + The function is used internally by the OpenCV filtering functions, like filter2D , morphological + operations, and so on. + @param dtop Shift of the top submatrix boundary upwards. + @param dbottom Shift of the bottom submatrix boundary downwards. + @param dleft Shift of the left submatrix boundary to the left. + @param dright Shift of the right submatrix boundary to the right. + @sa copyMakeBorder + */ + Mat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + + /** @brief Extracts a rectangular submatrix. + + The operators make a new header for the specified sub-array of \*this . They are the most + generalized forms of Mat::row, Mat::col, Mat::rowRange, and Mat::colRange . For example, + `A(Range(0, 10), Range::all())` is equivalent to `A.rowRange(0, 10)`. Similarly to all of the above, + the operators are O(1) operations, that is, no matrix data is copied. + @param rowRange Start and end row of the extracted submatrix. The upper boundary is not included. To + select all the rows, use Range::all(). + @param colRange Start and end column of the extracted submatrix. The upper boundary is not included. + To select all the columns, use Range::all(). + */ + Mat operator()( Range rowRange, Range colRange ) const; + + /** @overload + @param roi Extracted submatrix specified as a rectangle. + */ + Mat operator()( const Rect& roi ) const; + + /** @overload + @param ranges Array of selected ranges along each array dimension. + */ + Mat operator()( const Range* ranges ) const; + + /** @overload + @param ranges Array of selected ranges along each array dimension. + */ + Mat operator()(const std::vector& ranges) const; + + template operator std::vector<_Tp>() const; + template operator Vec<_Tp, n>() const; + template operator Matx<_Tp, m, n>() const; + + template operator std::array<_Tp, _Nm>() const; + + /** @brief Reports whether the matrix is continuous or not. + + The method returns true if the matrix elements are stored continuously without gaps at the end of + each row. Otherwise, it returns false. Obviously, 1x1 or 1xN matrices are always continuous. + Matrices created with Mat::create are always continuous. But if you extract a part of the matrix + using Mat::col, Mat::diag, and so on, or constructed a matrix header for externally allocated data, + such matrices may no longer have this property. + + The continuity flag is stored as a bit in the Mat::flags field and is computed automatically when + you construct a matrix header. Thus, the continuity check is a very fast operation, though + theoretically it could be done as follows: + @code + // alternative implementation of Mat::isContinuous() + bool myCheckMatContinuity(const Mat& m) + { + //return (m.flags & Mat::CONTINUOUS_FLAG) != 0; + return m.rows == 1 || m.step == m.cols*m.elemSize(); + } + @endcode + The method is used in quite a few of OpenCV functions. The point is that element-wise operations + (such as arithmetic and logical operations, math functions, alpha blending, color space + transformations, and others) do not depend on the image geometry. Thus, if all the input and output + arrays are continuous, the functions can process them as very long single-row vectors. The example + below illustrates how an alpha-blending function can be implemented: + @code + template + void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst) + { + const float alpha_scale = (float)std::numeric_limits::max(), + inv_scale = 1.f/alpha_scale; + + CV_Assert( src1.type() == src2.type() && + src1.type() == CV_MAKETYPE(traits::Depth::value, 4) && + src1.size() == src2.size()); + Size size = src1.size(); + dst.create(size, src1.type()); + + // here is the idiom: check the arrays for continuity and, + // if this is the case, + // treat the arrays as 1D vectors + if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() ) + { + size.width *= size.height; + size.height = 1; + } + size.width *= 4; + + for( int i = 0; i < size.height; i++ ) + { + // when the arrays are continuous, + // the outer loop is executed only once + const T* ptr1 = src1.ptr(i); + const T* ptr2 = src2.ptr(i); + T* dptr = dst.ptr(i); + + for( int j = 0; j < size.width; j += 4 ) + { + float alpha = ptr1[j+3]*inv_scale, beta = ptr2[j+3]*inv_scale; + dptr[j] = saturate_cast(ptr1[j]*alpha + ptr2[j]*beta); + dptr[j+1] = saturate_cast(ptr1[j+1]*alpha + ptr2[j+1]*beta); + dptr[j+2] = saturate_cast(ptr1[j+2]*alpha + ptr2[j+2]*beta); + dptr[j+3] = saturate_cast((1 - (1-alpha)*(1-beta))*alpha_scale); + } + } + } + @endcode + This approach, while being very simple, can boost the performance of a simple element-operation by + 10-20 percents, especially if the image is rather small and the operation is quite simple. + + Another OpenCV idiom in this function, a call of Mat::create for the destination array, that + allocates the destination array unless it already has the proper size and type. And while the newly + allocated arrays are always continuous, you still need to check the destination array because + Mat::create does not always allocate a new matrix. + */ + bool isContinuous() const; + + //! returns true if the matrix is a submatrix of another matrix + bool isSubmatrix() const; + + /** @brief Returns the matrix element size in bytes. + + The method returns the matrix element size in bytes. For example, if the matrix type is CV_16SC3 , + the method returns 3\*sizeof(short) or 6. + */ + size_t elemSize() const; + + /** @brief Returns the size of each matrix element channel in bytes. + + The method returns the matrix element channel size in bytes, that is, it ignores the number of + channels. For example, if the matrix type is CV_16SC3 , the method returns sizeof(short) or 2. + */ + size_t elemSize1() const; + + /** @brief Returns the type of a matrix element. + + The method returns a matrix element type. This is an identifier compatible with the CvMat type + system, like CV_16SC3 or 16-bit signed 3-channel array, and so on. + */ + int type() const; + + /** @brief Returns the depth of a matrix element. + + The method returns the identifier of the matrix element depth (the type of each individual channel). + For example, for a 16-bit signed element array, the method returns CV_16S . A complete list of + matrix types contains the following values: + - CV_8U - 8-bit unsigned integers ( 0..255 ) + - CV_8S - 8-bit signed integers ( -128..127 ) + - CV_16U - 16-bit unsigned integers ( 0..65535 ) + - CV_16S - 16-bit signed integers ( -32768..32767 ) + - CV_32S - 32-bit signed integers ( -2147483648..2147483647 ) + - CV_32F - 32-bit floating-point numbers ( -FLT_MAX..FLT_MAX, INF, NAN ) + - CV_64F - 64-bit floating-point numbers ( -DBL_MAX..DBL_MAX, INF, NAN ) + */ + int depth() const; + + /** @brief Returns the number of matrix channels. + + The method returns the number of matrix channels. + */ + int channels() const; + + /** @brief Returns a normalized step. + + The method returns a matrix step divided by Mat::elemSize1() . It can be useful to quickly access an + arbitrary matrix element. + */ + size_t step1(int i=0) const; + + /** @brief Returns true if the array has no elements. + + The method returns true if Mat::total() is 0 or if Mat::data is NULL. Because of pop_back() and + resize() methods `M.total() == 0` does not imply that `M.data == NULL`. + */ + bool empty() const; + + /** @brief Returns the total number of array elements. + + The method returns the number of array elements (a number of pixels if the array represents an + image). + */ + size_t total() const; + + /** @brief Returns the total number of array elements. + + The method returns the number of elements within a certain sub-array slice with startDim <= dim < endDim + */ + size_t total(int startDim, int endDim=INT_MAX) const; + + /** + * @param elemChannels Number of channels or number of columns the matrix should have. + * For a 2-D matrix, when the matrix has only 1 column, then it should have + * elemChannels channels; When the matrix has only 1 channel, + * then it should have elemChannels columns. + * For a 3-D matrix, it should have only one channel. Furthermore, + * if the number of planes is not one, then the number of rows + * within every plane has to be 1; if the number of rows within + * every plane is not 1, then the number of planes has to be 1. + * @param depth The depth the matrix should have. Set it to -1 when any depth is fine. + * @param requireContinuous Set it to true to require the matrix to be continuous + * @return -1 if the requirement is not satisfied. + * Otherwise, it returns the number of elements in the matrix. Note + * that an element may have multiple channels. + * + * The following code demonstrates its usage for a 2-d matrix: + * @snippet snippets/core_mat_checkVector.cpp example-2d + * + * The following code demonstrates its usage for a 3-d matrix: + * @snippet snippets/core_mat_checkVector.cpp example-3d + */ + int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const; + + /** @brief Returns a pointer to the specified matrix row. + + The methods return `uchar*` or typed pointer to the specified matrix row. See the sample in + Mat::isContinuous to know how to use these methods. + @param i0 A 0-based row index. + */ + uchar* ptr(int i0=0); + /** @overload */ + const uchar* ptr(int i0=0) const; + + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + uchar* ptr(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + const uchar* ptr(int row, int col) const; + + /** @overload */ + uchar* ptr(int i0, int i1, int i2); + /** @overload */ + const uchar* ptr(int i0, int i1, int i2) const; + + /** @overload */ + uchar* ptr(const int* idx); + /** @overload */ + const uchar* ptr(const int* idx) const; + /** @overload */ + template uchar* ptr(const Vec& idx); + /** @overload */ + template const uchar* ptr(const Vec& idx) const; + + /** @overload */ + template _Tp* ptr(int i0=0); + /** @overload */ + template const _Tp* ptr(int i0=0) const; + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template _Tp* ptr(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template const _Tp* ptr(int row, int col) const; + /** @overload */ + template _Tp* ptr(int i0, int i1, int i2); + /** @overload */ + template const _Tp* ptr(int i0, int i1, int i2) const; + /** @overload */ + template _Tp* ptr(const int* idx); + /** @overload */ + template const _Tp* ptr(const int* idx) const; + /** @overload */ + template _Tp* ptr(const Vec& idx); + /** @overload */ + template const _Tp* ptr(const Vec& idx) const; + + /** @brief Returns a reference to the specified array element. + + The template methods return a reference to the specified array element. For the sake of higher + performance, the index range checks are only performed in the Debug configuration. + + Note that the variants with a single index (i) can be used to access elements of single-row or + single-column 2-dimensional arrays. That is, if, for example, A is a 1 x N floating-point matrix and + B is an M x 1 integer matrix, you can simply write `A.at(k+4)` and `B.at(2*i+1)` + instead of `A.at(0,k+4)` and `B.at(2*i+1,0)`, respectively. + + The example below initializes a Hilbert matrix: + @code + Mat H(100, 100, CV_64F); + for(int i = 0; i < H.rows; i++) + for(int j = 0; j < H.cols; j++) + H.at(i,j)=1./(i+j+1); + @endcode + + Keep in mind that the size identifier used in the at operator cannot be chosen at random. It depends + on the image from which you are trying to retrieve the data. The table below gives a better insight in this: + - If matrix is of type `CV_8U` then use `Mat.at(y,x)`. + - If matrix is of type `CV_8S` then use `Mat.at(y,x)`. + - If matrix is of type `CV_16U` then use `Mat.at(y,x)`. + - If matrix is of type `CV_16S` then use `Mat.at(y,x)`. + - If matrix is of type `CV_32S` then use `Mat.at(y,x)`. + - If matrix is of type `CV_32F` then use `Mat.at(y,x)`. + - If matrix is of type `CV_64F` then use `Mat.at(y,x)`. + + @param i0 Index along the dimension 0 + */ + template _Tp& at(int i0=0); + /** @overload + @param i0 Index along the dimension 0 + */ + template const _Tp& at(int i0=0) const; + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template _Tp& at(int row, int col); + /** @overload + @param row Index along the dimension 0 + @param col Index along the dimension 1 + */ + template const _Tp& at(int row, int col) const; + + /** @overload + @param i0 Index along the dimension 0 + @param i1 Index along the dimension 1 + @param i2 Index along the dimension 2 + */ + template _Tp& at(int i0, int i1, int i2); + /** @overload + @param i0 Index along the dimension 0 + @param i1 Index along the dimension 1 + @param i2 Index along the dimension 2 + */ + template const _Tp& at(int i0, int i1, int i2) const; + + /** @overload + @param idx Array of Mat::dims indices. + */ + template _Tp& at(const int* idx); + /** @overload + @param idx Array of Mat::dims indices. + */ + template const _Tp& at(const int* idx) const; + + /** @overload */ + template _Tp& at(const Vec& idx); + /** @overload */ + template const _Tp& at(const Vec& idx) const; + + /** @overload + special versions for 2D arrays (especially convenient for referencing image pixels) + @param pt Element position specified as Point(j,i) . + */ + template _Tp& at(Point pt); + /** @overload + special versions for 2D arrays (especially convenient for referencing image pixels) + @param pt Element position specified as Point(j,i) . + */ + template const _Tp& at(Point pt) const; + + /** @brief Returns the matrix iterator and sets it to the first matrix element. + + The methods return the matrix read-only or read-write iterators. The use of matrix iterators is very + similar to the use of bi-directional STL iterators. In the example below, the alpha blending + function is rewritten using the matrix iterators: + @code + template + void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst) + { + typedef Vec VT; + + const float alpha_scale = (float)std::numeric_limits::max(), + inv_scale = 1.f/alpha_scale; + + CV_Assert( src1.type() == src2.type() && + src1.type() == traits::Type::value && + src1.size() == src2.size()); + Size size = src1.size(); + dst.create(size, src1.type()); + + MatConstIterator_ it1 = src1.begin(), it1_end = src1.end(); + MatConstIterator_ it2 = src2.begin(); + MatIterator_ dst_it = dst.begin(); + + for( ; it1 != it1_end; ++it1, ++it2, ++dst_it ) + { + VT pix1 = *it1, pix2 = *it2; + float alpha = pix1[3]*inv_scale, beta = pix2[3]*inv_scale; + *dst_it = VT(saturate_cast(pix1[0]*alpha + pix2[0]*beta), + saturate_cast(pix1[1]*alpha + pix2[1]*beta), + saturate_cast(pix1[2]*alpha + pix2[2]*beta), + saturate_cast((1 - (1-alpha)*(1-beta))*alpha_scale)); + } + } + @endcode + */ + template MatIterator_<_Tp> begin(); + template MatConstIterator_<_Tp> begin() const; + + /** @brief Same as begin() but for inverse traversal + */ + template std::reverse_iterator> rbegin(); + template std::reverse_iterator> rbegin() const; + + /** @brief Returns the matrix iterator and sets it to the after-last matrix element. + + The methods return the matrix read-only or read-write iterators, set to the point following the last + matrix element. + */ + template MatIterator_<_Tp> end(); + template MatConstIterator_<_Tp> end() const; + + /** @brief Same as end() but for inverse traversal + */ + template std::reverse_iterator< MatIterator_<_Tp>> rend(); + template std::reverse_iterator< MatConstIterator_<_Tp>> rend() const; + + + /** @brief Runs the given functor over all matrix elements in parallel. + + The operation passed as argument has to be a function pointer, a function object or a lambda(C++11). + + Example 1. All of the operations below put 0xFF the first channel of all matrix elements: + @code + Mat image(1920, 1080, CV_8UC3); + typedef cv::Point3_ Pixel; + + // first. raw pointer access. + for (int r = 0; r < image.rows; ++r) { + Pixel* ptr = image.ptr(r, 0); + const Pixel* ptr_end = ptr + image.cols; + for (; ptr != ptr_end; ++ptr) { + ptr->x = 255; + } + } + + // Using MatIterator. (Simple but there are a Iterator's overhead) + for (Pixel &p : cv::Mat_(image)) { + p.x = 255; + } + + // Parallel execution with function object. + struct Operator { + void operator ()(Pixel &pixel, const int * position) { + pixel.x = 255; + } + }; + image.forEach(Operator()); + + // Parallel execution using C++11 lambda. + image.forEach([](Pixel &p, const int * position) -> void { + p.x = 255; + }); + @endcode + Example 2. Using the pixel's position: + @code + // Creating 3D matrix (255 x 255 x 255) typed uint8_t + // and initialize all elements by the value which equals elements position. + // i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3). + + int sizes[] = { 255, 255, 255 }; + typedef cv::Point3_ Pixel; + + Mat_ image = Mat::zeros(3, sizes, CV_8UC3); + + image.forEach([](Pixel& pixel, const int position[]) -> void { + pixel.x = position[0]; + pixel.y = position[1]; + pixel.z = position[2]; + }); + @endcode + */ + template void forEach(const Functor& operation); + /** @overload */ + template void forEach(const Functor& operation) const; + + Mat(Mat&& m); + Mat& operator = (Mat&& m); + + enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; + enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 }; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the matrix dimensionality, >= 2 + int dims; + //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions + int rows, cols; + //! pointer to the data + uchar* data; + + //! helper fields used in locateROI and adjustROI + const uchar* datastart; + const uchar* dataend; + const uchar* datalimit; + + //! custom allocator + MatAllocator* allocator; + //! and the standard allocator + static MatAllocator* getStdAllocator(); + static MatAllocator* getDefaultAllocator(); + static void setDefaultAllocator(MatAllocator* allocator); + + //! internal use method: updates the continuity flag + void updateContinuityFlag(); + + //! interaction with UMat + UMatData* u; + + MatSize size; + MatStep step; + +protected: + template void forEach_impl(const Functor& operation); +}; + + +///////////////////////////////// Mat_<_Tp> //////////////////////////////////// + +/** @brief Template matrix class derived from Mat + +@code{.cpp} + template class Mat_ : public Mat + { + public: + // ... some specific methods + // and + // no new extra fields + }; +@endcode +The class `Mat_<_Tp>` is a *thin* template wrapper on top of the Mat class. It does not have any +extra data fields. Nor this class nor Mat has any virtual methods. Thus, references or pointers to +these two classes can be freely but carefully converted one to another. For example: +@code{.cpp} + // create a 100x100 8-bit matrix + Mat M(100,100,CV_8U); + // this will be compiled fine. no any data conversion will be done. + Mat_& M1 = (Mat_&)M; + // the program is likely to crash at the statement below + M1(99,99) = 1.f; +@endcode +While Mat is sufficient in most cases, Mat_ can be more convenient if you use a lot of element +access operations and if you know matrix type at the compilation time. Note that +`Mat::at(int y,int x)` and `Mat_::operator()(int y,int x)` do absolutely the same +and run at the same speed, but the latter is certainly shorter: +@code{.cpp} + Mat_ M(20,20); + for(int i = 0; i < M.rows; i++) + for(int j = 0; j < M.cols; j++) + M(i,j) = 1./(i+j+1); + Mat E, V; + eigen(M,E,V); + cout << E.at(0,0)/E.at(M.rows-1,0); +@endcode +To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter: +@code{.cpp} + // allocate a 320x240 color image and fill it with green (in RGB space) + Mat_ img(240, 320, Vec3b(0,255,0)); + // now draw a diagonal white line + for(int i = 0; i < 100; i++) + img(i,i)=Vec3b(255,255,255); + // and now scramble the 2nd (red) channel of each pixel + for(int i = 0; i < img.rows; i++) + for(int j = 0; j < img.cols; j++) + img(i,j)[2] ^= (uchar)(i ^ j); +@endcode +Mat_ is fully compatible with C++11 range-based for loop. For example such loop +can be used to safely apply look-up table: +@code{.cpp} +void applyTable(Mat_& I, const uchar* const table) +{ + for(auto& pixel : I) + { + pixel = table[pixel]; + } +} +@endcode + */ +template class Mat_ : public Mat +{ +public: + typedef _Tp value_type; + typedef typename DataType<_Tp>::channel_type channel_type; + typedef MatIterator_<_Tp> iterator; + typedef MatConstIterator_<_Tp> const_iterator; + + //! default constructor + Mat_() CV_NOEXCEPT; + //! equivalent to Mat(_rows, _cols, DataType<_Tp>::type) + Mat_(int _rows, int _cols); + //! constructor that sets each matrix element to specified value + Mat_(int _rows, int _cols, const _Tp& value); + //! equivalent to Mat(_size, DataType<_Tp>::type) + explicit Mat_(Size _size); + //! constructor that sets each matrix element to specified value + Mat_(Size _size, const _Tp& value); + //! n-dim array constructor + Mat_(int _ndims, const int* _sizes); + //! n-dim array constructor that sets each matrix element to specified value + Mat_(int _ndims, const int* _sizes, const _Tp& value); + //! copy/conversion constructor. If m is of different type, it's converted + Mat_(const Mat& m); + //! copy constructor + Mat_(const Mat_& m); + //! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type + Mat_(int _rows, int _cols, _Tp* _data, size_t _step=AUTO_STEP); + //! constructs n-dim matrix on top of user-allocated data. steps are in bytes(!!!), regardless of the type + Mat_(int _ndims, const int* _sizes, _Tp* _data, const size_t* _steps=0); + //! selects a submatrix + Mat_(const Mat_& m, const Range& rowRange, const Range& colRange=Range::all()); + //! selects a submatrix + Mat_(const Mat_& m, const Rect& roi); + //! selects a submatrix, n-dim version + Mat_(const Mat_& m, const Range* ranges); + //! selects a submatrix, n-dim version + Mat_(const Mat_& m, const std::vector& ranges); + //! from a matrix expression + explicit Mat_(const MatExpr& e); + //! makes a matrix out of Vec, std::vector, Point_ or Point3_. The matrix will have a single column + explicit Mat_(const std::vector<_Tp>& vec, bool copyData=false); + template explicit Mat_(const Vec::channel_type, n>& vec, bool copyData=true); + template explicit Mat_(const Matx::channel_type, m, n>& mtx, bool copyData=true); + explicit Mat_(const Point_::channel_type>& pt, bool copyData=true); + explicit Mat_(const Point3_::channel_type>& pt, bool copyData=true); + explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer); + + Mat_(std::initializer_list<_Tp> values); + explicit Mat_(const std::initializer_list sizes, const std::initializer_list<_Tp> values); + + template explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false); + + Mat_& operator = (const Mat& m); + Mat_& operator = (const Mat_& m); + //! set all the elements to s. + Mat_& operator = (const _Tp& s); + //! assign a matrix expression + Mat_& operator = (const MatExpr& e); + + //! iterators; they are smart enough to skip gaps in the end of rows + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + + //reverse iterators + std::reverse_iterator rbegin(); + std::reverse_iterator rend(); + std::reverse_iterator rbegin() const; + std::reverse_iterator rend() const; + + //! template methods for operation over all matrix elements. + // the operations take care of skipping gaps in the end of rows (if any) + template void forEach(const Functor& operation); + template void forEach(const Functor& operation) const; + + //! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type) + void create(int _rows, int _cols); + //! equivalent to Mat::create(_size, DataType<_Tp>::type) + void create(Size _size); + //! equivalent to Mat::create(_ndims, _sizes, DatType<_Tp>::type) + void create(int _ndims, const int* _sizes); + //! equivalent to Mat::release() + void release(); + //! cross-product + Mat_ cross(const Mat_& m) const; + //! data type conversion + template operator Mat_() const; + //! overridden forms of Mat::row() etc. + Mat_ row(int y) const; + Mat_ col(int x) const; + Mat_ diag(int d=0) const; + CV_NODISCARD_STD Mat_ clone() const; + + //! overridden forms of Mat::elemSize() etc. + size_t elemSize() const; + size_t elemSize1() const; + int type() const; + int depth() const; + int channels() const; + size_t step1(int i=0) const; + //! returns step()/sizeof(_Tp) + size_t stepT(int i=0) const; + + //! overridden forms of Mat::zeros() etc. Data type is omitted, of course + CV_NODISCARD_STD static MatExpr zeros(int rows, int cols); + CV_NODISCARD_STD static MatExpr zeros(Size size); + CV_NODISCARD_STD static MatExpr zeros(int _ndims, const int* _sizes); + CV_NODISCARD_STD static MatExpr ones(int rows, int cols); + CV_NODISCARD_STD static MatExpr ones(Size size); + CV_NODISCARD_STD static MatExpr ones(int _ndims, const int* _sizes); + CV_NODISCARD_STD static MatExpr eye(int rows, int cols); + CV_NODISCARD_STD static MatExpr eye(Size size); + + //! some more overridden methods + Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright ); + Mat_ operator()( const Range& rowRange, const Range& colRange ) const; + Mat_ operator()( const Rect& roi ) const; + Mat_ operator()( const Range* ranges ) const; + Mat_ operator()(const std::vector& ranges) const; + + //! more convenient forms of row and element access operators + _Tp* operator [](int y); + const _Tp* operator [](int y) const; + + //! returns reference to the specified element + _Tp& operator ()(const int* idx); + //! returns read-only reference to the specified element + const _Tp& operator ()(const int* idx) const; + + //! returns reference to the specified element + template _Tp& operator ()(const Vec& idx); + //! returns read-only reference to the specified element + template const _Tp& operator ()(const Vec& idx) const; + + //! returns reference to the specified element (1D case) + _Tp& operator ()(int idx0); + //! returns read-only reference to the specified element (1D case) + const _Tp& operator ()(int idx0) const; + //! returns reference to the specified element (2D case) + _Tp& operator ()(int row, int col); + //! returns read-only reference to the specified element (2D case) + const _Tp& operator ()(int row, int col) const; + //! returns reference to the specified element (3D case) + _Tp& operator ()(int idx0, int idx1, int idx2); + //! returns read-only reference to the specified element (3D case) + const _Tp& operator ()(int idx0, int idx1, int idx2) const; + + _Tp& operator ()(Point pt); + const _Tp& operator ()(Point pt) const; + + //! conversion to vector. + operator std::vector<_Tp>() const; + + //! conversion to array. + template operator std::array<_Tp, _Nm>() const; + + //! conversion to Vec + template operator Vec::channel_type, n>() const; + //! conversion to Matx + template operator Matx::channel_type, m, n>() const; + + Mat_(Mat_&& m); + Mat_& operator = (Mat_&& m); + + Mat_(Mat&& m); + Mat_& operator = (Mat&& m); + + Mat_(MatExpr&& e); +}; + +typedef Mat_ Mat1b; +typedef Mat_ Mat2b; +typedef Mat_ Mat3b; +typedef Mat_ Mat4b; + +typedef Mat_ Mat1s; +typedef Mat_ Mat2s; +typedef Mat_ Mat3s; +typedef Mat_ Mat4s; + +typedef Mat_ Mat1w; +typedef Mat_ Mat2w; +typedef Mat_ Mat3w; +typedef Mat_ Mat4w; + +typedef Mat_ Mat1i; +typedef Mat_ Mat2i; +typedef Mat_ Mat3i; +typedef Mat_ Mat4i; + +typedef Mat_ Mat1f; +typedef Mat_ Mat2f; +typedef Mat_ Mat3f; +typedef Mat_ Mat4f; + +typedef Mat_ Mat1d; +typedef Mat_ Mat2d; +typedef Mat_ Mat3d; +typedef Mat_ Mat4d; + +/** @todo document */ +class CV_EXPORTS UMat +{ +public: + //! default constructor + UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT) CV_NOEXCEPT; + //! constructs 2D matrix of the specified size and type + // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) + UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + UMat(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + //! constructs 2D matrix and fills it with the specified value _s. + UMat(int rows, int cols, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); + UMat(Size size, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); + + //! constructs n-dimensional matrix + UMat(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + UMat(int ndims, const int* sizes, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT); + + //! copy constructor + UMat(const UMat& m); + + //! creates a matrix header for a part of the bigger matrix + UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all()); + UMat(const UMat& m, const Rect& roi); + UMat(const UMat& m, const Range* ranges); + UMat(const UMat& m, const std::vector& ranges); + + // FIXIT copyData=false is not implemented, drop this in favor of cv::Mat (OpenCV 5.0) + //! builds matrix from std::vector with or without copying the data + template explicit UMat(const std::vector<_Tp>& vec, bool copyData=false); + + //! destructor - calls release() + ~UMat(); + //! assignment operators + UMat& operator = (const UMat& m); + + Mat getMat(AccessFlag flags) const; + + //! returns a new matrix header for the specified row + UMat row(int y) const; + //! returns a new matrix header for the specified column + UMat col(int x) const; + //! ... for the specified row span + UMat rowRange(int startrow, int endrow) const; + UMat rowRange(const Range& r) const; + //! ... for the specified column span + UMat colRange(int startcol, int endcol) const; + UMat colRange(const Range& r) const; + //! ... for the specified diagonal + //! (d=0 - the main diagonal, + //! >0 - a diagonal from the upper half, + //! <0 - a diagonal from the lower half) + UMat diag(int d=0) const; + //! constructs a square diagonal matrix which main diagonal is vector "d" + CV_NODISCARD_STD static UMat diag(const UMat& d, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat diag(const UMat& d) { return diag(d, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + + //! returns deep copy of the matrix, i.e. the data is copied + CV_NODISCARD_STD UMat clone() const; + //! copies the matrix content to "m". + // It calls m.create(this->size(), this->type()). + void copyTo( OutputArray m ) const; + //! copies those matrix elements to "m" that are marked with non-zero mask elements. + void copyTo( OutputArray m, InputArray mask ) const; + //! converts matrix to another datatype with optional scaling. See cvConvertScale. + void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const; + + void assignTo( UMat& m, int type=-1 ) const; + + //! sets every matrix element to s + UMat& operator = (const Scalar& s); + //! sets some of the matrix elements to s, according to the mask + UMat& setTo(InputArray value, InputArray mask=noArray()); + //! creates alternative matrix header for the same data, with different + // number of channels and/or different number of rows. see cvReshape. + UMat reshape(int cn, int rows=0) const; + UMat reshape(int cn, int newndims, const int* newsz) const; + + //! matrix transposition by means of matrix expressions + UMat t() const; + //! matrix inversion by means of matrix expressions + UMat inv(int method=DECOMP_LU) const; + //! per-element matrix multiplication by means of matrix expressions + UMat mul(InputArray m, double scale=1) const; + + //! computes dot-product + double dot(InputArray m) const; + + //! Matlab-style matrix initialization + CV_NODISCARD_STD static UMat zeros(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat zeros(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat zeros(int ndims, const int* sz, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat zeros(int rows, int cols, int type) { return zeros(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + CV_NODISCARD_STD static UMat zeros(Size size, int type) { return zeros(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + CV_NODISCARD_STD static UMat zeros(int ndims, const int* sz, int type) { return zeros(ndims, sz, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + CV_NODISCARD_STD static UMat ones(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat ones(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat ones(int ndims, const int* sz, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat ones(int rows, int cols, int type) { return ones(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + CV_NODISCARD_STD static UMat ones(Size size, int type) { return ones(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + CV_NODISCARD_STD static UMat ones(int ndims, const int* sz, int type) { return ones(ndims, sz, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + CV_NODISCARD_STD static UMat eye(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat eye(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + CV_NODISCARD_STD static UMat eye(int rows, int cols, int type) { return eye(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + CV_NODISCARD_STD static UMat eye(Size size, int type) { return eye(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + + //! allocates new matrix data unless the matrix already has specified size and type. + // previous data is unreferenced if needed. + void create(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + void create(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + void create(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + void create(const std::vector& sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); + + //! increases the reference counter; use with care to avoid memleaks + void addref(); + //! decreases reference counter; + // deallocates the data when reference counter reaches 0. + void release(); + + //! deallocates the matrix data + void deallocate(); + //! internal use function; properly re-allocates _size, _step arrays + void copySize(const UMat& m); + + //! locates matrix header within a parent matrix. See below + void locateROI( Size& wholeSize, Point& ofs ) const; + //! moves/resizes the current matrix ROI inside the parent matrix. + UMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + //! extracts a rectangular sub-matrix + // (this is a generalized form of row, rowRange etc.) + UMat operator()( Range rowRange, Range colRange ) const; + UMat operator()( const Rect& roi ) const; + UMat operator()( const Range* ranges ) const; + UMat operator()(const std::vector& ranges) const; + + //! returns true iff the matrix data is continuous + // (i.e. when there are no gaps between successive rows). + // similar to CV_IS_MAT_CONT(cvmat->type) + bool isContinuous() const; + + //! returns true if the matrix is a submatrix of another matrix + bool isSubmatrix() const; + + //! returns element size in bytes, + // similar to CV_ELEM_SIZE(cvmat->type) + size_t elemSize() const; + //! returns the size of element channel in bytes. + size_t elemSize1() const; + //! returns element type, similar to CV_MAT_TYPE(cvmat->type) + int type() const; + //! returns element type, similar to CV_MAT_DEPTH(cvmat->type) + int depth() const; + //! returns element type, similar to CV_MAT_CN(cvmat->type) + int channels() const; + //! returns step/elemSize1() + size_t step1(int i=0) const; + //! returns true if matrix data is NULL + bool empty() const; + //! returns the total number of matrix elements + size_t total() const; + + //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise + int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const; + + UMat(UMat&& m); + UMat& operator = (UMat&& m); + + /*! Returns the OpenCL buffer handle on which UMat operates on. + The UMat instance should be kept alive during the use of the handle to prevent the buffer to be + returned to the OpenCV buffer pool. + */ + void* handle(AccessFlag accessFlags) const; + void ndoffset(size_t* ofs) const; + + enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; + enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 }; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! the matrix dimensionality, >= 2 + int dims; + + //! number of rows in the matrix; -1 when the matrix has more than 2 dimensions + int rows; + + //! number of columns in the matrix; -1 when the matrix has more than 2 dimensions + int cols; + + //! custom allocator + MatAllocator* allocator; + + //! usage flags for allocator; recommend do not set directly, instead set during construct/create/getUMat + UMatUsageFlags usageFlags; + + //! and the standard allocator + static MatAllocator* getStdAllocator(); + + //! internal use method: updates the continuity flag + void updateContinuityFlag(); + + //! black-box container of UMat data + UMatData* u; + + //! offset of the submatrix (or 0) + size_t offset; + + //! dimensional size of the matrix; accessible in various formats + MatSize size; + + //! number of bytes each matrix element/row/plane/dimension occupies + MatStep step; + +protected: +}; + + +/////////////////////////// multi-dimensional sparse matrix ////////////////////////// + +/** @brief The class SparseMat represents multi-dimensional sparse numerical arrays. + +Such a sparse array can store elements of any type that Mat can store. *Sparse* means that only +non-zero elements are stored (though, as a result of operations on a sparse matrix, some of its +stored elements can actually become 0. It is up to you to detect such elements and delete them +using SparseMat::erase ). The non-zero elements are stored in a hash table that grows when it is +filled so that the search time is O(1) in average (regardless of whether element is there or not). +Elements can be accessed using the following methods: +- Query operations (SparseMat::ptr and the higher-level SparseMat::ref, SparseMat::value and + SparseMat::find), for example: + @code + const int dims = 5; + int size[5] = {10, 10, 10, 10, 10}; + SparseMat sparse_mat(dims, size, CV_32F); + for(int i = 0; i < 1000; i++) + { + int idx[dims]; + for(int k = 0; k < dims; k++) + idx[k] = rand() % size[k]; + sparse_mat.ref(idx) += 1.f; + } + cout << "nnz = " << sparse_mat.nzcount() << endl; + @endcode +- Sparse matrix iterators. They are similar to MatIterator but different from NAryMatIterator. + That is, the iteration loop is familiar to STL users: + @code + // prints elements of a sparse floating-point matrix + // and the sum of elements. + SparseMatConstIterator_ + it = sparse_mat.begin(), + it_end = sparse_mat.end(); + double s = 0; + int dims = sparse_mat.dims(); + for(; it != it_end; ++it) + { + // print element indices and the element value + const SparseMat::Node* n = it.node(); + printf("("); + for(int i = 0; i < dims; i++) + printf("%d%s", n->idx[i], i < dims-1 ? ", " : ")"); + printf(": %g\n", it.value()); + s += *it; + } + printf("Element sum is %g\n", s); + @endcode + If you run this loop, you will notice that elements are not enumerated in a logical order + (lexicographical, and so on). They come in the same order as they are stored in the hash table + (semi-randomly). You may collect pointers to the nodes and sort them to get the proper ordering. + Note, however, that pointers to the nodes may become invalid when you add more elements to the + matrix. This may happen due to possible buffer reallocation. +- Combination of the above 2 methods when you need to process 2 or more sparse matrices + simultaneously. For example, this is how you can compute unnormalized cross-correlation of the 2 + floating-point sparse matrices: + @code + double cross_corr(const SparseMat& a, const SparseMat& b) + { + const SparseMat *_a = &a, *_b = &b; + // if b contains less elements than a, + // it is faster to iterate through b + if(_a->nzcount() > _b->nzcount()) + std::swap(_a, _b); + SparseMatConstIterator_ it = _a->begin(), + it_end = _a->end(); + double ccorr = 0; + for(; it != it_end; ++it) + { + // take the next element from the first matrix + float avalue = *it; + const Node* anode = it.node(); + // and try to find an element with the same index in the second matrix. + // since the hash value depends only on the element index, + // reuse the hash value stored in the node + float bvalue = _b->value(anode->idx,&anode->hashval); + ccorr += avalue*bvalue; + } + return ccorr; + } + @endcode + */ +class CV_EXPORTS SparseMat +{ +public: + typedef SparseMatIterator iterator; + typedef SparseMatConstIterator const_iterator; + + enum { MAGIC_VAL=0x42FD0000, MAX_DIM=32, HASH_SCALE=0x5bd1e995, HASH_BIT=0x80000000 }; + + //! the sparse matrix header + struct CV_EXPORTS Hdr + { + Hdr(int _dims, const int* _sizes, int _type); + void clear(); + int refcount; + int dims; + int valueOffset; + size_t nodeSize; + size_t nodeCount; + size_t freeList; + std::vector pool; + std::vector hashtab; + int size[MAX_DIM]; + }; + + //! sparse matrix node - element of a hash table + struct CV_EXPORTS Node + { + //! hash value + size_t hashval; + //! index of the next node in the same hash table entry + size_t next; + //! index of the matrix element + int idx[MAX_DIM]; + }; + + /** @brief Various SparseMat constructors. + */ + SparseMat(); + + /** @overload + @param dims Array dimensionality. + @param _sizes Sparce matrix size on all dementions. + @param _type Sparse matrix data type. + */ + SparseMat(int dims, const int* _sizes, int _type); + + /** @overload + @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted + to sparse representation. + */ + SparseMat(const SparseMat& m); + + /** @overload + @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted + to sparse representation. + */ + explicit SparseMat(const Mat& m); + + //! the destructor + ~SparseMat(); + + //! assignment operator. This is O(1) operation, i.e. no data is copied + SparseMat& operator = (const SparseMat& m); + //! equivalent to the corresponding constructor + SparseMat& operator = (const Mat& m); + + //! creates full copy of the matrix + CV_NODISCARD_STD SparseMat clone() const; + + //! copies all the data to the destination matrix. All the previous content of m is erased + void copyTo( SparseMat& m ) const; + //! converts sparse matrix to dense matrix. + void copyTo( Mat& m ) const; + //! multiplies all the matrix elements by the specified scale factor alpha and converts the results to the specified data type + void convertTo( SparseMat& m, int rtype, double alpha=1 ) const; + //! converts sparse matrix to dense n-dim matrix with optional type conversion and scaling. + /*! + @param [out] m - output matrix; if it does not have a proper size or type before the operation, + it is reallocated + @param [in] rtype - desired output matrix type or, rather, the depth since the number of channels + are the same as the input has; if rtype is negative, the output matrix will have the + same type as the input. + @param [in] alpha - optional scale factor + @param [in] beta - optional delta added to the scaled values + */ + void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const; + + // not used now + void assignTo( SparseMat& m, int type=-1 ) const; + + //! reallocates sparse matrix. + /*! + If the matrix already had the proper size and type, + it is simply cleared with clear(), otherwise, + the old matrix is released (using release()) and the new one is allocated. + */ + void create(int dims, const int* _sizes, int _type); + //! sets all the sparse matrix elements to 0, which means clearing the hash table. + void clear(); + //! manually increments the reference counter to the header. + void addref(); + // decrements the header reference counter. When the counter reaches 0, the header and all the underlying data are deallocated. + void release(); + + //! converts sparse matrix to the old-style representation; all the elements are copied. + //operator CvSparseMat*() const; + //! returns the size of each element in bytes (not including the overhead - the space occupied by SparseMat::Node elements) + size_t elemSize() const; + //! returns elemSize()/channels() + size_t elemSize1() const; + + //! returns type of sparse matrix elements + int type() const; + //! returns the depth of sparse matrix elements + int depth() const; + //! returns the number of channels + int channels() const; + + //! returns the array of sizes, or NULL if the matrix is not allocated + const int* size() const; + //! returns the size of i-th matrix dimension (or 0) + int size(int i) const; + //! returns the matrix dimensionality + int dims() const; + //! returns the number of non-zero elements (=the number of hash table nodes) + size_t nzcount() const; + + //! computes the element hash value (1D case) + size_t hash(int i0) const; + //! computes the element hash value (2D case) + size_t hash(int i0, int i1) const; + //! computes the element hash value (3D case) + size_t hash(int i0, int i1, int i2) const; + //! computes the element hash value (nD case) + size_t hash(const int* idx) const; + + //!@{ + /*! + specialized variants for 1D, 2D, 3D cases and the generic_type one for n-D case. + return pointer to the matrix element. + - if the element is there (it's non-zero), the pointer to it is returned + - if it's not there and createMissing=false, NULL pointer is returned + - if it's not there and createMissing=true, then the new element + is created and initialized with 0. Pointer to it is returned + - if the optional hashval pointer is not NULL, the element hash value is + not computed, but *hashval is taken instead. + */ + //! returns pointer to the specified element (1D case) + uchar* ptr(int i0, bool createMissing, size_t* hashval=0); + //! returns pointer to the specified element (2D case) + uchar* ptr(int i0, int i1, bool createMissing, size_t* hashval=0); + //! returns pointer to the specified element (3D case) + uchar* ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0); + //! returns pointer to the specified element (nD case) + uchar* ptr(const int* idx, bool createMissing, size_t* hashval=0); + //!@} + + //!@{ + /*! + return read-write reference to the specified sparse matrix element. + + `ref<_Tp>(i0,...[,hashval])` is equivalent to `*(_Tp*)ptr(i0,...,true[,hashval])`. + The methods always return a valid reference. + If the element did not exist, it is created and initialized with 0. + */ + //! returns reference to the specified element (1D case) + template _Tp& ref(int i0, size_t* hashval=0); + //! returns reference to the specified element (2D case) + template _Tp& ref(int i0, int i1, size_t* hashval=0); + //! returns reference to the specified element (3D case) + template _Tp& ref(int i0, int i1, int i2, size_t* hashval=0); + //! returns reference to the specified element (nD case) + template _Tp& ref(const int* idx, size_t* hashval=0); + //!@} + + //!@{ + /*! + return value of the specified sparse matrix element. + + `value<_Tp>(i0,...[,hashval])` is equivalent to + @code + { const _Tp* p = find<_Tp>(i0,...[,hashval]); return p ? *p : _Tp(); } + @endcode + + That is, if the element did not exist, the methods return 0. + */ + //! returns value of the specified element (1D case) + template _Tp value(int i0, size_t* hashval=0) const; + //! returns value of the specified element (2D case) + template _Tp value(int i0, int i1, size_t* hashval=0) const; + //! returns value of the specified element (3D case) + template _Tp value(int i0, int i1, int i2, size_t* hashval=0) const; + //! returns value of the specified element (nD case) + template _Tp value(const int* idx, size_t* hashval=0) const; + //!@} + + //!@{ + /*! + Return pointer to the specified sparse matrix element if it exists + + `find<_Tp>(i0,...[,hashval])` is equivalent to `(_const Tp*)ptr(i0,...false[,hashval])`. + + If the specified element does not exist, the methods return NULL. + */ + //! returns pointer to the specified element (1D case) + template const _Tp* find(int i0, size_t* hashval=0) const; + //! returns pointer to the specified element (2D case) + template const _Tp* find(int i0, int i1, size_t* hashval=0) const; + //! returns pointer to the specified element (3D case) + template const _Tp* find(int i0, int i1, int i2, size_t* hashval=0) const; + //! returns pointer to the specified element (nD case) + template const _Tp* find(const int* idx, size_t* hashval=0) const; + //!@} + + //! erases the specified element (2D case) + void erase(int i0, int i1, size_t* hashval=0); + //! erases the specified element (3D case) + void erase(int i0, int i1, int i2, size_t* hashval=0); + //! erases the specified element (nD case) + void erase(const int* idx, size_t* hashval=0); + + //!@{ + /*! + return the sparse matrix iterator pointing to the first sparse matrix element + */ + //! returns the sparse matrix iterator at the matrix beginning + SparseMatIterator begin(); + //! returns the sparse matrix iterator at the matrix beginning + template SparseMatIterator_<_Tp> begin(); + //! returns the read-only sparse matrix iterator at the matrix beginning + SparseMatConstIterator begin() const; + //! returns the read-only sparse matrix iterator at the matrix beginning + template SparseMatConstIterator_<_Tp> begin() const; + //!@} + /*! + return the sparse matrix iterator pointing to the element following the last sparse matrix element + */ + //! returns the sparse matrix iterator at the matrix end + SparseMatIterator end(); + //! returns the read-only sparse matrix iterator at the matrix end + SparseMatConstIterator end() const; + //! returns the typed sparse matrix iterator at the matrix end + template SparseMatIterator_<_Tp> end(); + //! returns the typed read-only sparse matrix iterator at the matrix end + template SparseMatConstIterator_<_Tp> end() const; + + //! returns the value stored in the sparse martix node + template _Tp& value(Node* n); + //! returns the value stored in the sparse martix node + template const _Tp& value(const Node* n) const; + + ////////////// some internal-use methods /////////////// + Node* node(size_t nidx); + const Node* node(size_t nidx) const; + + uchar* newNode(const int* idx, size_t hashval); + void removeNode(size_t hidx, size_t nidx, size_t previdx); + void resizeHashTab(size_t newsize); + + int flags; + Hdr* hdr; +}; + + + +///////////////////////////////// SparseMat_<_Tp> //////////////////////////////////// + +/** @brief Template sparse n-dimensional array class derived from SparseMat + +SparseMat_ is a thin wrapper on top of SparseMat created in the same way as Mat_ . It simplifies +notation of some operations: +@code + int sz[] = {10, 20, 30}; + SparseMat_ M(3, sz); + ... + M.ref(1, 2, 3) = M(4, 5, 6) + M(7, 8, 9); +@endcode + */ +template class SparseMat_ : public SparseMat +{ +public: + typedef SparseMatIterator_<_Tp> iterator; + typedef SparseMatConstIterator_<_Tp> const_iterator; + + //! the default constructor + SparseMat_(); + //! the full constructor equivalent to SparseMat(dims, _sizes, DataType<_Tp>::type) + SparseMat_(int dims, const int* _sizes); + //! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted + SparseMat_(const SparseMat& m); + //! the copy constructor. This is O(1) operation - no data is copied + SparseMat_(const SparseMat_& m); + //! converts dense matrix to the sparse form + SparseMat_(const Mat& m); + //! converts the old-style sparse matrix to the C++ class. All the elements are copied + //SparseMat_(const CvSparseMat* m); + //! the assignment operator. If DataType<_Tp>.type != m.type(), the m elements are converted + SparseMat_& operator = (const SparseMat& m); + //! the assignment operator. This is O(1) operation - no data is copied + SparseMat_& operator = (const SparseMat_& m); + //! converts dense matrix to the sparse form + SparseMat_& operator = (const Mat& m); + + //! makes full copy of the matrix. All the elements are duplicated + CV_NODISCARD_STD SparseMat_ clone() const; + //! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type) + void create(int dims, const int* _sizes); + //! converts sparse matrix to the old-style CvSparseMat. All the elements are copied + //operator CvSparseMat*() const; + + //! returns type of the matrix elements + int type() const; + //! returns depth of the matrix elements + int depth() const; + //! returns the number of channels in each matrix element + int channels() const; + + //! equivalent to SparseMat::ref<_Tp>(i0, hashval) + _Tp& ref(int i0, size_t* hashval=0); + //! equivalent to SparseMat::ref<_Tp>(i0, i1, hashval) + _Tp& ref(int i0, int i1, size_t* hashval=0); + //! equivalent to SparseMat::ref<_Tp>(i0, i1, i2, hashval) + _Tp& ref(int i0, int i1, int i2, size_t* hashval=0); + //! equivalent to SparseMat::ref<_Tp>(idx, hashval) + _Tp& ref(const int* idx, size_t* hashval=0); + + //! equivalent to SparseMat::value<_Tp>(i0, hashval) + _Tp operator()(int i0, size_t* hashval=0) const; + //! equivalent to SparseMat::value<_Tp>(i0, i1, hashval) + _Tp operator()(int i0, int i1, size_t* hashval=0) const; + //! equivalent to SparseMat::value<_Tp>(i0, i1, i2, hashval) + _Tp operator()(int i0, int i1, int i2, size_t* hashval=0) const; + //! equivalent to SparseMat::value<_Tp>(idx, hashval) + _Tp operator()(const int* idx, size_t* hashval=0) const; + + //! returns sparse matrix iterator pointing to the first sparse matrix element + SparseMatIterator_<_Tp> begin(); + //! returns read-only sparse matrix iterator pointing to the first sparse matrix element + SparseMatConstIterator_<_Tp> begin() const; + //! returns sparse matrix iterator pointing to the element following the last sparse matrix element + SparseMatIterator_<_Tp> end(); + //! returns read-only sparse matrix iterator pointing to the element following the last sparse matrix element + SparseMatConstIterator_<_Tp> end() const; +}; + + + +////////////////////////////////// MatConstIterator ////////////////////////////////// + +class CV_EXPORTS MatConstIterator +{ +public: + typedef uchar* value_type; + typedef ptrdiff_t difference_type; + typedef const uchar** pointer; + typedef uchar* reference; + + typedef std::random_access_iterator_tag iterator_category; + + //! default constructor + MatConstIterator(); + //! constructor that sets the iterator to the beginning of the matrix + MatConstIterator(const Mat* _m); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator(const Mat* _m, int _row, int _col=0); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator(const Mat* _m, Point _pt); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator(const Mat* _m, const int* _idx); + //! copy constructor + MatConstIterator(const MatConstIterator& it); + + //! copy operator + MatConstIterator& operator = (const MatConstIterator& it); + //! returns the current matrix element + const uchar* operator *() const; + //! returns the i-th matrix element, relative to the current + const uchar* operator [](ptrdiff_t i) const; + + //! shifts the iterator forward by the specified number of elements + MatConstIterator& operator += (ptrdiff_t ofs); + //! shifts the iterator backward by the specified number of elements + MatConstIterator& operator -= (ptrdiff_t ofs); + //! decrements the iterator + MatConstIterator& operator --(); + //! decrements the iterator + MatConstIterator operator --(int); + //! increments the iterator + MatConstIterator& operator ++(); + //! increments the iterator + MatConstIterator operator ++(int); + //! returns the current iterator position + Point pos() const; + //! returns the current iterator position + void pos(int* _idx) const; + + ptrdiff_t lpos() const; + void seek(ptrdiff_t ofs, bool relative = false); + void seek(const int* _idx, bool relative = false); + + const Mat* m; + size_t elemSize; + const uchar* ptr; + const uchar* sliceStart; + const uchar* sliceEnd; +}; + + + +////////////////////////////////// MatConstIterator_ ///////////////////////////////// + +/** @brief Matrix read-only iterator + */ +template +class MatConstIterator_ : public MatConstIterator +{ +public: + typedef _Tp value_type; + typedef ptrdiff_t difference_type; + typedef const _Tp* pointer; + typedef const _Tp& reference; + + typedef std::random_access_iterator_tag iterator_category; + + //! default constructor + MatConstIterator_(); + //! constructor that sets the iterator to the beginning of the matrix + MatConstIterator_(const Mat_<_Tp>* _m); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col=0); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator_(const Mat_<_Tp>* _m, Point _pt); + //! constructor that sets the iterator to the specified element of the matrix + MatConstIterator_(const Mat_<_Tp>* _m, const int* _idx); + //! copy constructor + MatConstIterator_(const MatConstIterator_& it); + + //! copy operator + MatConstIterator_& operator = (const MatConstIterator_& it); + //! returns the current matrix element + const _Tp& operator *() const; + //! returns the i-th matrix element, relative to the current + const _Tp& operator [](ptrdiff_t i) const; + + //! shifts the iterator forward by the specified number of elements + MatConstIterator_& operator += (ptrdiff_t ofs); + //! shifts the iterator backward by the specified number of elements + MatConstIterator_& operator -= (ptrdiff_t ofs); + //! decrements the iterator + MatConstIterator_& operator --(); + //! decrements the iterator + MatConstIterator_ operator --(int); + //! increments the iterator + MatConstIterator_& operator ++(); + //! increments the iterator + MatConstIterator_ operator ++(int); + //! returns the current iterator position + Point pos() const; +}; + + + +//////////////////////////////////// MatIterator_ //////////////////////////////////// + +/** @brief Matrix read-write iterator +*/ +template +class MatIterator_ : public MatConstIterator_<_Tp> +{ +public: + typedef _Tp* pointer; + typedef _Tp& reference; + + typedef std::random_access_iterator_tag iterator_category; + + //! the default constructor + MatIterator_(); + //! constructor that sets the iterator to the beginning of the matrix + MatIterator_(Mat_<_Tp>* _m); + //! constructor that sets the iterator to the specified element of the matrix + MatIterator_(Mat_<_Tp>* _m, int _row, int _col=0); + //! constructor that sets the iterator to the specified element of the matrix + MatIterator_(Mat_<_Tp>* _m, Point _pt); + //! constructor that sets the iterator to the specified element of the matrix + MatIterator_(Mat_<_Tp>* _m, const int* _idx); + //! copy constructor + MatIterator_(const MatIterator_& it); + //! copy operator + MatIterator_& operator = (const MatIterator_<_Tp>& it ); + + //! returns the current matrix element + _Tp& operator *() const; + //! returns the i-th matrix element, relative to the current + _Tp& operator [](ptrdiff_t i) const; + + //! shifts the iterator forward by the specified number of elements + MatIterator_& operator += (ptrdiff_t ofs); + //! shifts the iterator backward by the specified number of elements + MatIterator_& operator -= (ptrdiff_t ofs); + //! decrements the iterator + MatIterator_& operator --(); + //! decrements the iterator + MatIterator_ operator --(int); + //! increments the iterator + MatIterator_& operator ++(); + //! increments the iterator + MatIterator_ operator ++(int); +}; + + + +/////////////////////////////// SparseMatConstIterator /////////////////////////////// + +/** @brief Read-Only Sparse Matrix Iterator. + + Here is how to use the iterator to compute the sum of floating-point sparse matrix elements: + + \code + SparseMatConstIterator it = m.begin(), it_end = m.end(); + double s = 0; + CV_Assert( m.type() == CV_32F ); + for( ; it != it_end; ++it ) + s += it.value(); + \endcode +*/ +class CV_EXPORTS SparseMatConstIterator +{ +public: + //! the default constructor + SparseMatConstIterator(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatConstIterator(const SparseMat* _m); + //! the copy constructor + SparseMatConstIterator(const SparseMatConstIterator& it); + + //! the assignment operator + SparseMatConstIterator& operator = (const SparseMatConstIterator& it); + + //! template method returning the current matrix element + template const _Tp& value() const; + //! returns the current node of the sparse matrix. it.node->idx is the current element index + const SparseMat::Node* node() const; + + //! moves iterator to the previous element + SparseMatConstIterator& operator --(); + //! moves iterator to the previous element + SparseMatConstIterator operator --(int); + //! moves iterator to the next element + SparseMatConstIterator& operator ++(); + //! moves iterator to the next element + SparseMatConstIterator operator ++(int); + + //! moves iterator to the element after the last element + void seekEnd(); + + const SparseMat* m; + size_t hashidx; + uchar* ptr; +}; + + + +////////////////////////////////// SparseMatIterator ///////////////////////////////// + +/** @brief Read-write Sparse Matrix Iterator + + The class is similar to cv::SparseMatConstIterator, + but can be used for in-place modification of the matrix elements. +*/ +class CV_EXPORTS SparseMatIterator : public SparseMatConstIterator +{ +public: + //! the default constructor + SparseMatIterator(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatIterator(SparseMat* _m); + //! the full constructor setting the iterator to the specified sparse matrix element + SparseMatIterator(SparseMat* _m, const int* idx); + //! the copy constructor + SparseMatIterator(const SparseMatIterator& it); + + //! the assignment operator + SparseMatIterator& operator = (const SparseMatIterator& it); + //! returns read-write reference to the current sparse matrix element + template _Tp& value() const; + //! returns pointer to the current sparse matrix node. it.node->idx is the index of the current element (do not modify it!) + SparseMat::Node* node() const; + + //! moves iterator to the next element + SparseMatIterator& operator ++(); + //! moves iterator to the next element + SparseMatIterator operator ++(int); +}; + + + +/////////////////////////////// SparseMatConstIterator_ ////////////////////////////// + +/** @brief Template Read-Only Sparse Matrix Iterator Class. + + This is the derived from SparseMatConstIterator class that + introduces more convenient operator *() for accessing the current element. +*/ +template class SparseMatConstIterator_ : public SparseMatConstIterator +{ +public: + + typedef std::forward_iterator_tag iterator_category; + + //! the default constructor + SparseMatConstIterator_(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatConstIterator_(const SparseMat_<_Tp>* _m); + SparseMatConstIterator_(const SparseMat* _m); + //! the copy constructor + SparseMatConstIterator_(const SparseMatConstIterator_& it); + + //! the assignment operator + SparseMatConstIterator_& operator = (const SparseMatConstIterator_& it); + //! the element access operator + const _Tp& operator *() const; + + //! moves iterator to the next element + SparseMatConstIterator_& operator ++(); + //! moves iterator to the next element + SparseMatConstIterator_ operator ++(int); +}; + + + +///////////////////////////////// SparseMatIterator_ ///////////////////////////////// + +/** @brief Template Read-Write Sparse Matrix Iterator Class. + + This is the derived from cv::SparseMatConstIterator_ class that + introduces more convenient operator *() for accessing the current element. +*/ +template class SparseMatIterator_ : public SparseMatConstIterator_<_Tp> +{ +public: + + typedef std::forward_iterator_tag iterator_category; + + //! the default constructor + SparseMatIterator_(); + //! the full constructor setting the iterator to the first sparse matrix element + SparseMatIterator_(SparseMat_<_Tp>* _m); + SparseMatIterator_(SparseMat* _m); + //! the copy constructor + SparseMatIterator_(const SparseMatIterator_& it); + + //! the assignment operator + SparseMatIterator_& operator = (const SparseMatIterator_& it); + //! returns the reference to the current element + _Tp& operator *() const; + + //! moves the iterator to the next element + SparseMatIterator_& operator ++(); + //! moves the iterator to the next element + SparseMatIterator_ operator ++(int); +}; + + + +/////////////////////////////////// NAryMatIterator ////////////////////////////////// + +/** @brief n-ary multi-dimensional array iterator. + +Use the class to implement unary, binary, and, generally, n-ary element-wise operations on +multi-dimensional arrays. Some of the arguments of an n-ary function may be continuous arrays, some +may be not. It is possible to use conventional MatIterator 's for each array but incrementing all of +the iterators after each small operations may be a big overhead. In this case consider using +NAryMatIterator to iterate through several matrices simultaneously as long as they have the same +geometry (dimensionality and all the dimension sizes are the same). On each iteration `it.planes[0]`, +`it.planes[1]`,... will be the slices of the corresponding matrices. + +The example below illustrates how you can compute a normalized and threshold 3D color histogram: +@code + void computeNormalizedColorHist(const Mat& image, Mat& hist, int N, double minProb) + { + const int histSize[] = {N, N, N}; + + // make sure that the histogram has a proper size and type + hist.create(3, histSize, CV_32F); + + // and clear it + hist = Scalar(0); + + // the loop below assumes that the image + // is a 8-bit 3-channel. check it. + CV_Assert(image.type() == CV_8UC3); + MatConstIterator_ it = image.begin(), + it_end = image.end(); + for( ; it != it_end; ++it ) + { + const Vec3b& pix = *it; + hist.at(pix[0]*N/256, pix[1]*N/256, pix[2]*N/256) += 1.f; + } + + minProb *= image.rows*image.cols; + + // initialize iterator (the style is different from STL). + // after initialization the iterator will contain + // the number of slices or planes the iterator will go through. + // it simultaneously increments iterators for several matrices + // supplied as a null terminated list of pointers + const Mat* arrays[] = {&hist, 0}; + Mat planes[1]; + NAryMatIterator itNAry(arrays, planes, 1); + double s = 0; + // iterate through the matrix. on each iteration + // itNAry.planes[i] (of type Mat) will be set to the current plane + // of the i-th n-dim matrix passed to the iterator constructor. + for(int p = 0; p < itNAry.nplanes; p++, ++itNAry) + { + threshold(itNAry.planes[0], itNAry.planes[0], minProb, 0, THRESH_TOZERO); + s += sum(itNAry.planes[0])[0]; + } + + s = 1./s; + itNAry = NAryMatIterator(arrays, planes, 1); + for(int p = 0; p < itNAry.nplanes; p++, ++itNAry) + itNAry.planes[0] *= s; + } +@endcode + */ +class CV_EXPORTS NAryMatIterator +{ +public: + //! the default constructor + NAryMatIterator(); + //! the full constructor taking arbitrary number of n-dim matrices + NAryMatIterator(const Mat** arrays, uchar** ptrs, int narrays=-1); + //! the full constructor taking arbitrary number of n-dim matrices + NAryMatIterator(const Mat** arrays, Mat* planes, int narrays=-1); + //! the separate iterator initialization method + void init(const Mat** arrays, Mat* planes, uchar** ptrs, int narrays=-1); + + //! proceeds to the next plane of every iterated matrix + NAryMatIterator& operator ++(); + //! proceeds to the next plane of every iterated matrix (postfix increment operator) + NAryMatIterator operator ++(int); + + //! the iterated arrays + const Mat** arrays; + //! the current planes + Mat* planes; + //! data pointers + uchar** ptrs; + //! the number of arrays + int narrays; + //! the number of hyper-planes that the iterator steps through + size_t nplanes; + //! the size of each segment (in elements) + size_t size; +protected: + int iterdepth; + size_t idx; +}; + + + +///////////////////////////////// Matrix Expressions ///////////////////////////////// + +class CV_EXPORTS MatOp +{ +public: + MatOp(); + virtual ~MatOp(); + + virtual bool elementWise(const MatExpr& expr) const; + virtual void assign(const MatExpr& expr, Mat& m, int type=-1) const = 0; + virtual void roi(const MatExpr& expr, const Range& rowRange, + const Range& colRange, MatExpr& res) const; + virtual void diag(const MatExpr& expr, int d, MatExpr& res) const; + virtual void augAssignAdd(const MatExpr& expr, Mat& m) const; + virtual void augAssignSubtract(const MatExpr& expr, Mat& m) const; + virtual void augAssignMultiply(const MatExpr& expr, Mat& m) const; + virtual void augAssignDivide(const MatExpr& expr, Mat& m) const; + virtual void augAssignAnd(const MatExpr& expr, Mat& m) const; + virtual void augAssignOr(const MatExpr& expr, Mat& m) const; + virtual void augAssignXor(const MatExpr& expr, Mat& m) const; + + virtual void add(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const; + virtual void add(const MatExpr& expr1, const Scalar& s, MatExpr& res) const; + + virtual void subtract(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const; + virtual void subtract(const Scalar& s, const MatExpr& expr, MatExpr& res) const; + + virtual void multiply(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const; + virtual void multiply(const MatExpr& expr1, double s, MatExpr& res) const; + + virtual void divide(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const; + virtual void divide(double s, const MatExpr& expr, MatExpr& res) const; + + virtual void abs(const MatExpr& expr, MatExpr& res) const; + + virtual void transpose(const MatExpr& expr, MatExpr& res) const; + virtual void matmul(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const; + virtual void invert(const MatExpr& expr, int method, MatExpr& res) const; + + virtual Size size(const MatExpr& expr) const; + virtual int type(const MatExpr& expr) const; +}; + +/** @brief Matrix expression representation +@anchor MatrixExpressions +This is a list of implemented matrix operations that can be combined in arbitrary complex +expressions (here A, B stand for matrices ( Mat ), s for a scalar ( Scalar ), alpha for a +real-valued scalar ( double )): +- Addition, subtraction, negation: `A+B`, `A-B`, `A+s`, `A-s`, `s+A`, `s-A`, `-A` +- Scaling: `A*alpha` +- Per-element multiplication and division: `A.mul(B)`, `A/B`, `alpha/A` +- Matrix multiplication: `A*B` +- Transposition: `A.t()` (means AT) +- Matrix inversion and pseudo-inversion, solving linear systems and least-squares problems: + `A.inv([method]) (~ A-1)`, `A.inv([method])*B (~ X: AX=B)` +- Comparison: `A cmpop B`, `A cmpop alpha`, `alpha cmpop A`, where *cmpop* is one of + `>`, `>=`, `==`, `!=`, `<=`, `<`. The result of comparison is an 8-bit single channel mask whose + elements are set to 255 (if the particular element or pair of elements satisfy the condition) or + 0. +- Bitwise logical operations: `A logicop B`, `A logicop s`, `s logicop A`, `~A`, where *logicop* is one of + `&`, `|`, `^`. +- Element-wise minimum and maximum: `min(A, B)`, `min(A, alpha)`, `max(A, B)`, `max(A, alpha)` +- Element-wise absolute value: `abs(A)` +- Cross-product, dot-product: `A.cross(B)`, `A.dot(B)` +- Any function of matrix or matrices and scalars that returns a matrix or a scalar, such as norm, + mean, sum, countNonZero, trace, determinant, repeat, and others. +- Matrix initializers ( Mat::eye(), Mat::zeros(), Mat::ones() ), matrix comma-separated + initializers, matrix constructors and operators that extract sub-matrices (see Mat description). +- Mat_() constructors to cast the result to the proper type. +@note Comma-separated initializers and probably some other operations may require additional +explicit Mat() or Mat_() constructor calls to resolve a possible ambiguity. + +Here are examples of matrix expressions: +@code + // compute pseudo-inverse of A, equivalent to A.inv(DECOMP_SVD) + SVD svd(A); + Mat pinvA = svd.vt.t()*Mat::diag(1./svd.w)*svd.u.t(); + + // compute the new vector of parameters in the Levenberg-Marquardt algorithm + x -= (A.t()*A + lambda*Mat::eye(A.cols,A.cols,A.type())).inv(DECOMP_CHOLESKY)*(A.t()*err); + + // sharpen image using "unsharp mask" algorithm + Mat blurred; double sigma = 1, threshold = 5, amount = 1; + GaussianBlur(img, blurred, Size(), sigma, sigma); + Mat lowContrastMask = abs(img - blurred) < threshold; + Mat sharpened = img*(1+amount) + blurred*(-amount); + img.copyTo(sharpened, lowContrastMask); +@endcode +*/ +class CV_EXPORTS MatExpr +{ +public: + MatExpr(); + explicit MatExpr(const Mat& m); + + MatExpr(const MatOp* _op, int _flags, const Mat& _a = Mat(), const Mat& _b = Mat(), + const Mat& _c = Mat(), double _alpha = 1, double _beta = 1, const Scalar& _s = Scalar()); + + operator Mat() const; + template operator Mat_<_Tp>() const; + + Size size() const; + int type() const; + + MatExpr row(int y) const; + MatExpr col(int x) const; + MatExpr diag(int d = 0) const; + MatExpr operator()( const Range& rowRange, const Range& colRange ) const; + MatExpr operator()( const Rect& roi ) const; + + MatExpr t() const; + MatExpr inv(int method = DECOMP_LU) const; + MatExpr mul(const MatExpr& e, double scale=1) const; + MatExpr mul(const Mat& m, double scale=1) const; + + Mat cross(const Mat& m) const; + double dot(const Mat& m) const; + + void swap(MatExpr& b); + + const MatOp* op; + int flags; + + Mat a, b, c; + double alpha, beta; + Scalar s; +}; + +//! @} core_basic + +//! @relates cv::MatExpr +//! @{ +CV_EXPORTS MatExpr operator + (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator + (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator + (const Scalar& s, const Mat& a); +CV_EXPORTS MatExpr operator + (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s); +CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e); +CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator + (const Mat& a, const Matx<_Tp, m, n>& b) { return a + Mat(b); } +template static inline +MatExpr operator + (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) + b; } + +CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator - (const Scalar& s, const Mat& a); +CV_EXPORTS MatExpr operator - (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s); +CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e); +CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator - (const Mat& a, const Matx<_Tp, m, n>& b) { return a - Mat(b); } +template static inline +MatExpr operator - (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) - b; } + +CV_EXPORTS MatExpr operator - (const Mat& m); +CV_EXPORTS MatExpr operator - (const MatExpr& e); + +CV_EXPORTS MatExpr operator * (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator * (const Mat& a, double s); +CV_EXPORTS MatExpr operator * (double s, const Mat& a); +CV_EXPORTS MatExpr operator * (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator * (const MatExpr& e, double s); +CV_EXPORTS MatExpr operator * (double s, const MatExpr& e); +CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator * (const Mat& a, const Matx<_Tp, m, n>& b) { return a * Mat(b); } +template static inline +MatExpr operator * (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) * b; } + +CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator / (const Mat& a, double s); +CV_EXPORTS MatExpr operator / (double s, const Mat& a); +CV_EXPORTS MatExpr operator / (const MatExpr& e, const Mat& m); +CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e); +CV_EXPORTS MatExpr operator / (const MatExpr& e, double s); +CV_EXPORTS MatExpr operator / (double s, const MatExpr& e); +CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2); +template static inline +MatExpr operator / (const Mat& a, const Matx<_Tp, m, n>& b) { return a / Mat(b); } +template static inline +MatExpr operator / (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) / b; } + +CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator < (const Mat& a, double s); +CV_EXPORTS MatExpr operator < (double s, const Mat& a); +template static inline +MatExpr operator < (const Mat& a, const Matx<_Tp, m, n>& b) { return a < Mat(b); } +template static inline +MatExpr operator < (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) < b; } + +CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator <= (const Mat& a, double s); +CV_EXPORTS MatExpr operator <= (double s, const Mat& a); +template static inline +MatExpr operator <= (const Mat& a, const Matx<_Tp, m, n>& b) { return a <= Mat(b); } +template static inline +MatExpr operator <= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) <= b; } + +CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator == (const Mat& a, double s); +CV_EXPORTS MatExpr operator == (double s, const Mat& a); +template static inline +MatExpr operator == (const Mat& a, const Matx<_Tp, m, n>& b) { return a == Mat(b); } +template static inline +MatExpr operator == (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) == b; } + +CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator != (const Mat& a, double s); +CV_EXPORTS MatExpr operator != (double s, const Mat& a); +template static inline +MatExpr operator != (const Mat& a, const Matx<_Tp, m, n>& b) { return a != Mat(b); } +template static inline +MatExpr operator != (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) != b; } + +CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator >= (const Mat& a, double s); +CV_EXPORTS MatExpr operator >= (double s, const Mat& a); +template static inline +MatExpr operator >= (const Mat& a, const Matx<_Tp, m, n>& b) { return a >= Mat(b); } +template static inline +MatExpr operator >= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) >= b; } + +CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator > (const Mat& a, double s); +CV_EXPORTS MatExpr operator > (double s, const Mat& a); +template static inline +MatExpr operator > (const Mat& a, const Matx<_Tp, m, n>& b) { return a > Mat(b); } +template static inline +MatExpr operator > (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) > b; } + +CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a); +template static inline +MatExpr operator & (const Mat& a, const Matx<_Tp, m, n>& b) { return a & Mat(b); } +template static inline +MatExpr operator & (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) & b; } + +CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a); +template static inline +MatExpr operator | (const Mat& a, const Matx<_Tp, m, n>& b) { return a | Mat(b); } +template static inline +MatExpr operator | (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) | b; } + +CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b); +CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s); +CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a); +template static inline +MatExpr operator ^ (const Mat& a, const Matx<_Tp, m, n>& b) { return a ^ Mat(b); } +template static inline +MatExpr operator ^ (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) ^ b; } + +CV_EXPORTS MatExpr operator ~(const Mat& m); + +CV_EXPORTS MatExpr min(const Mat& a, const Mat& b); +CV_EXPORTS MatExpr min(const Mat& a, double s); +CV_EXPORTS MatExpr min(double s, const Mat& a); +template static inline +MatExpr min (const Mat& a, const Matx<_Tp, m, n>& b) { return min(a, Mat(b)); } +template static inline +MatExpr min (const Matx<_Tp, m, n>& a, const Mat& b) { return min(Mat(a), b); } + +CV_EXPORTS MatExpr max(const Mat& a, const Mat& b); +CV_EXPORTS MatExpr max(const Mat& a, double s); +CV_EXPORTS MatExpr max(double s, const Mat& a); +template static inline +MatExpr max (const Mat& a, const Matx<_Tp, m, n>& b) { return max(a, Mat(b)); } +template static inline +MatExpr max (const Matx<_Tp, m, n>& a, const Mat& b) { return max(Mat(a), b); } + +/** @brief Calculates an absolute value of each matrix element. + +abs is a meta-function that is expanded to one of absdiff or convertScaleAbs forms: +- C = abs(A-B) is equivalent to `absdiff(A, B, C)` +- C = abs(A) is equivalent to `absdiff(A, Scalar::all(0), C)` +- C = `Mat_ >(abs(A*alpha + beta))` is equivalent to `convertScaleAbs(A, C, alpha, +beta)` + +The output matrix has the same size and the same type as the input one except for the last case, +where C is depth=CV_8U . +@param m matrix. +@sa @ref MatrixExpressions, absdiff, convertScaleAbs + */ +CV_EXPORTS MatExpr abs(const Mat& m); +/** @overload +@param e matrix expression. +*/ +CV_EXPORTS MatExpr abs(const MatExpr& e); +//! @} relates cv::MatExpr + +} // cv + +#include "opencv2/core/mat.inl.hpp" + +#endif // OPENCV_CORE_MAT_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/mat.inl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/mat.inl.hpp new file mode 100644 index 0000000..886b82c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/mat.inl.hpp @@ -0,0 +1,3422 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_MATRIX_OPERATIONS_HPP +#define OPENCV_CORE_MATRIX_OPERATIONS_HPP + +#ifndef __cplusplus +# error mat.inl.hpp header must be compiled as C++ +#endif + +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + +#if defined(CV_SKIP_DISABLE_CLANG_ENUM_WARNINGS) + // nothing +#elif defined(CV_FORCE_DISABLE_CLANG_ENUM_WARNINGS) + #define CV_DISABLE_CLANG_ENUM_WARNINGS +#elif defined(__clang__) && defined(__has_warning) + #if __has_warning("-Wdeprecated-enum-enum-conversion") && __has_warning("-Wdeprecated-anon-enum-enum-conversion") + #define CV_DISABLE_CLANG_ENUM_WARNINGS + #endif +#endif +#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion" +#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion" +#endif + +namespace cv +{ +CV__DEBUG_NS_BEGIN + + +//! @cond IGNORED + +////////////////////////// Custom (raw) type wrapper ////////////////////////// + +template static inline +int rawType() +{ + CV_StaticAssert(sizeof(_Tp) <= CV_CN_MAX, "sizeof(_Tp) is too large"); + const int elemSize = sizeof(_Tp); + return (int)CV_MAKETYPE(CV_8U, elemSize); +} + +//////////////////////// Input/Output Arrays //////////////////////// + +inline void _InputArray::init(int _flags, const void* _obj) +{ flags = _flags; obj = (void*)_obj; } + +inline void _InputArray::init(int _flags, const void* _obj, Size _sz) +{ flags = _flags; obj = (void*)_obj; sz = _sz; } + +inline void* _InputArray::getObj() const { return obj; } +inline int _InputArray::getFlags() const { return flags; } +inline Size _InputArray::getSz() const { return sz; } + +inline _InputArray::_InputArray() { init(0 + NONE, 0); } +inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); } +inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); } +inline _InputArray::_InputArray(const UMat& m) { init(UMAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::vector<_Tp>& vec) +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); } + +template inline +_InputArray::_InputArray(const std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); } + +inline +_InputArray::_InputArray(const std::vector& vec) +{ init(FIXED_TYPE + STD_BOOL_VECTOR + traits::Type::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_READ, &vec); } + +template inline +_InputArray::_InputArray(const Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, &mtx, Size(n, m)); } + +template inline +_InputArray::_InputArray(const _Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, vec, Size(n, 1)); } + +template inline +_InputArray::_InputArray(const Mat_<_Tp>& m) +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_READ, &m); } + +inline _InputArray::_InputArray(const double& val) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); } + +inline _InputArray::_InputArray(const cuda::GpuMat& d_mat) +{ init(CUDA_GPU_MAT + ACCESS_READ, &d_mat); } + +inline _InputArray::_InputArray(const std::vector& d_mat) +{ init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_READ, &d_mat);} + +inline _InputArray::_InputArray(const ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_READ, &buf); } + +inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); } + +template inline +_InputArray _InputArray::rawIn(const std::vector<_Tp>& vec) +{ + _InputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_READ; + v.obj = (void*)&vec; + return v; +} + +template inline +_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr) +{ + _InputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + +inline _InputArray::~_InputArray() {} + +inline Mat _InputArray::getMat(int i) const +{ + if( kind() == MAT && i < 0 ) + return *(const Mat*)obj; + return getMat_(i); +} + +inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; } +inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; } +inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; } +inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; } +inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; } +inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || + kind() == _InputArray::STD_BOOL_VECTOR || + (kind() == _InputArray::MATX && (sz.width <= 1 || sz.height <= 1)); } +inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; } +inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; } + +//////////////////////////////////////////////////////////////////////////////////////// + +inline _OutputArray::_OutputArray() { init(NONE + ACCESS_WRITE, 0); } +inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags + ACCESS_WRITE, _obj); } +inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_MAT + ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(UMat& m) { init(UMAT + ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(std::vector<_Tp>& vec) +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(Mat_<_Tp>& m) +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); } + +template inline +_OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); } + +template inline +_OutputArray::_OutputArray(_Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); } + +template inline +_OutputArray::_OutputArray(const std::vector<_Tp>& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(const std::array& arr) +{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } + +template inline +_OutputArray::_OutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); } + +template inline +_OutputArray::_OutputArray(const Mat_<_Tp>& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); } + +template inline +_OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); } + +template inline +_OutputArray::_OutputArray(const _Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); } + +inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat) +{ init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } + +inline _OutputArray::_OutputArray(std::vector& d_mat) +{ init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_WRITE, &d_mat);} + +inline _OutputArray::_OutputArray(ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_WRITE, &buf); } + +inline _OutputArray::_OutputArray(cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } + +inline _OutputArray::_OutputArray(const Mat& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); } + +inline _OutputArray::_OutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_WRITE, &vec); } + +inline _OutputArray::_OutputArray(const UMat& m) +{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_WRITE, &m); } + +inline _OutputArray::_OutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_WRITE, &vec); } + +inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); } + + +inline _OutputArray::_OutputArray(const ogl::Buffer& buf) +{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_WRITE, &buf); } + +inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); } + +template inline +_OutputArray _OutputArray::rawOut(std::vector<_Tp>& vec) +{ + _OutputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_WRITE; + v.obj = (void*)&vec; + return v; +} + +template inline +_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr) +{ + _OutputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + +/////////////////////////////////////////////////////////////////////////////////////////// + +inline _InputOutputArray::_InputOutputArray() { init(0+ACCESS_RW, 0); } +inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags+ACCESS_RW, _obj); } +inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); } +inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec) +{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(std::array& arr) +{ init(STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(Mat_<_Tp>& m) +{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); } + +template inline +_InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); } + +template inline +_InputOutputArray::_InputOutputArray(_Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::array& arr) +{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); } + +template inline +_InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); } + +template inline +_InputOutputArray::_InputOutputArray(const _Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); } + +inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat) +{ init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); } + +inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_RW, &buf); } + +inline _InputOutputArray::_InputOutputArray(cuda::HostMem& cuda_mem) +{ init(CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } + +inline _InputOutputArray::_InputOutputArray(const Mat& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); } + +inline _InputOutputArray::_InputOutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); } + +inline _InputOutputArray::_InputOutputArray(const UMat& m) +{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_RW, &m); } + +inline _InputOutputArray::_InputOutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_RW, &vec); } + +inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_RW, &d_mat); } + +inline _InputOutputArray::_InputOutputArray(const std::vector& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);} + +template<> inline _InputOutputArray::_InputOutputArray(std::vector& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);} + +inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf) +{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); } + +inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); } + +template inline +_InputOutputArray _InputOutputArray::rawInOut(std::vector<_Tp>& vec) +{ + _InputOutputArray v; + v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_RW; + v.obj = (void*)&vec; + return v; +} + +template inline +_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr) +{ + _InputOutputArray v; + v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW; + v.obj = (void*)arr.data(); + v.sz = Size(1, _Nm); + return v; +} + + +template static inline _InputArray rawIn(_Tp& v) { return _InputArray::rawIn(v); } +template static inline _OutputArray rawOut(_Tp& v) { return _OutputArray::rawOut(v); } +template static inline _InputOutputArray rawInOut(_Tp& v) { return _InputOutputArray::rawInOut(v); } + +CV__DEBUG_NS_END + +//////////////////////////////////////////// Mat ////////////////////////////////////////// + +template inline +Mat::Mat(const std::vector<_Tp>& vec, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), + cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if(vec.empty()) + return; + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)&vec[0]; + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this); +} + +template inline +Mat::Mat(const std::initializer_list<_Tp> list) + : Mat() +{ + CV_Assert(list.size() != 0); + Mat((int)list.size(), 1, traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this); +} + +template inline +Mat::Mat(const std::initializer_list sizes, const std::initializer_list<_Tp> list) + : Mat() +{ + size_t size_total = 1; + for(auto s : sizes) + size_total *= s; + CV_Assert(list.size() != 0); + CV_Assert(size_total == list.size()); + Mat((int)sizes.size(), (int*)sizes.begin(), traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this); +} + +template inline +Mat::Mat(const std::array<_Tp, _Nm>& arr, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)arr.size()), + cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if(arr.empty()) + return; + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)arr.data(); + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat((int)arr.size(), 1, traits::Type<_Tp>::value, (uchar*)arr.data()).copyTo(*this); +} + +template inline +Mat::Mat(const Vec<_Tp, n>& vec, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)vec.val; + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat(n, 1, traits::Type<_Tp>::value, (void*)vec.val).copyTo(*this); +} + + +template inline +Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = cols * sizeof(_Tp); + step[1] = sizeof(_Tp); + datastart = data = (uchar*)M.val; + datalimit = dataend = datastart + rows * step[0]; + } + else + Mat(m, n, traits::Type<_Tp>::value, (uchar*)M.val).copyTo(*this); +} + +template inline +Mat::Mat(const Point_<_Tp>& pt, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)&pt.x; + datalimit = dataend = datastart + rows * step[0]; + } + else + { + create(2, 1, traits::Type<_Tp>::value); + ((_Tp*)data)[0] = pt.x; + ((_Tp*)data)[1] = pt.y; + } +} + +template inline +Mat::Mat(const Point3_<_Tp>& pt, bool copyData) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0), + datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) +{ + if( !copyData ) + { + step[0] = step[1] = sizeof(_Tp); + datastart = data = (uchar*)&pt.x; + datalimit = dataend = datastart + rows * step[0]; + } + else + { + create(3, 1, traits::Type<_Tp>::value); + ((_Tp*)data)[0] = pt.x; + ((_Tp*)data)[1] = pt.y; + ((_Tp*)data)[2] = pt.z; + } +} + +template inline +Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer) + : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0), + datastart(0), dataend(0), allocator(0), u(0), size(&rows) +{ + *this = commaInitializer.operator Mat_<_Tp>(); +} + +inline +Mat Mat::row(int y) const +{ + return Mat(*this, Range(y, y + 1), Range::all()); +} + +inline +Mat Mat::col(int x) const +{ + return Mat(*this, Range::all(), Range(x, x + 1)); +} + +inline +Mat Mat::rowRange(int startrow, int endrow) const +{ + return Mat(*this, Range(startrow, endrow), Range::all()); +} + +inline +Mat Mat::rowRange(const Range& r) const +{ + return Mat(*this, r, Range::all()); +} + +inline +Mat Mat::colRange(int startcol, int endcol) const +{ + return Mat(*this, Range::all(), Range(startcol, endcol)); +} + +inline +Mat Mat::colRange(const Range& r) const +{ + return Mat(*this, Range::all(), r); +} + +inline +Mat Mat::operator()( Range _rowRange, Range _colRange ) const +{ + return Mat(*this, _rowRange, _colRange); +} + +inline +Mat Mat::operator()( const Rect& roi ) const +{ + return Mat(*this, roi); +} + +inline +Mat Mat::operator()(const Range* ranges) const +{ + return Mat(*this, ranges); +} + +inline +Mat Mat::operator()(const std::vector& ranges) const +{ + return Mat(*this, ranges); +} + +inline +bool Mat::isContinuous() const +{ + return (flags & CONTINUOUS_FLAG) != 0; +} + +inline +bool Mat::isSubmatrix() const +{ + return (flags & SUBMATRIX_FLAG) != 0; +} + +inline +size_t Mat::elemSize() const +{ + size_t res = dims > 0 ? step.p[dims - 1] : 0; + CV_DbgAssert(res != 0); + return res; +} + +inline +size_t Mat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int Mat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int Mat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int Mat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +uchar* Mat::ptr(int y) +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return data + step.p[0] * y; +} + +inline +const uchar* Mat::ptr(int y) const +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return data + step.p[0] * y; +} + +template inline +_Tp* Mat::ptr(int y) +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return (_Tp*)(data + step.p[0] * y); +} + +template inline +const _Tp* Mat::ptr(int y) const +{ + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); + return (const _Tp*)(data + step.p[0] * y); +} + +inline +uchar* Mat::ptr(int i0, int i1) +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return data + i0 * step.p[0] + i1 * step.p[1]; +} + +inline +const uchar* Mat::ptr(int i0, int i1) const +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return data + i0 * step.p[0] + i1 * step.p[1]; +} + +template inline +_Tp* Mat::ptr(int i0, int i1) +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1]); +} + +template inline +const _Tp* Mat::ptr(int i0, int i1) const +{ + CV_DbgAssert(dims >= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1]); +} + +inline +uchar* Mat::ptr(int i0, int i1, int i2) +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]; +} + +inline +const uchar* Mat::ptr(int i0, int i1, int i2) const +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]; +} + +template inline +_Tp* Mat::ptr(int i0, int i1, int i2) +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]); +} + +template inline +const _Tp* Mat::ptr(int i0, int i1, int i2) const +{ + CV_DbgAssert(dims >= 3); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]); + return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]); +} + +inline +uchar* Mat::ptr(const int* idx) +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return p; +} + +inline +const uchar* Mat::ptr(const int* idx) const +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return p; +} + +template inline +_Tp* Mat::ptr(const int* idx) +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return (_Tp*)p; +} + +template inline +const _Tp* Mat::ptr(const int* idx) const +{ + int i, d = dims; + uchar* p = data; + CV_DbgAssert( d >= 1 && p ); + for( i = 0; i < d; i++ ) + { + CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] ); + p += idx[i] * step.p[i]; + } + return (const _Tp*)p; +} + +template inline +uchar* Mat::ptr(const Vec& idx) +{ + return Mat::ptr(idx.val); +} + +template inline +const uchar* Mat::ptr(const Vec& idx) const +{ + return Mat::ptr(idx.val); +} + +template inline +_Tp* Mat::ptr(const Vec& idx) +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return Mat::ptr<_Tp>(idx.val); +} + +template inline +const _Tp* Mat::ptr(const Vec& idx) const +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return Mat::ptr<_Tp>(idx.val); +} + + +template inline +_Tp& Mat::at(int i0, int i1) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((_Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +const _Tp& Mat::at(int i0, int i1) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((const _Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +_Tp& Mat::at(Point pt) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((_Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +const _Tp& Mat::at(Point pt) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels())); + CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1()); + return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +_Tp& Mat::at(int i0) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1])); + CV_DbgAssert(elemSize() == sizeof(_Tp)); + if( isContinuous() || size.p[0] == 1 ) + return ((_Tp*)data)[i0]; + if( size.p[1] == 1 ) + return *(_Tp*)(data + step.p[0] * i0); + int i = i0 / cols, j = i0 - i * cols; + return ((_Tp*)(data + step.p[0] * i))[j]; +} + +template inline +const _Tp& Mat::at(int i0) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1])); + CV_DbgAssert(elemSize() == sizeof(_Tp)); + if( isContinuous() || size.p[0] == 1 ) + return ((const _Tp*)data)[i0]; + if( size.p[1] == 1 ) + return *(const _Tp*)(data + step.p[0] * i0); + int i = i0 / cols, j = i0 - i * cols; + return ((const _Tp*)(data + step.p[0] * i))[j]; +} + +template inline +_Tp& Mat::at(int i0, int i1, int i2) +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(_Tp*)ptr(i0, i1, i2); +} + +template inline +const _Tp& Mat::at(int i0, int i1, int i2) const +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(const _Tp*)ptr(i0, i1, i2); +} + +template inline +_Tp& Mat::at(const int* idx) +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(_Tp*)ptr(idx); +} + +template inline +const _Tp& Mat::at(const int* idx) const +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(const _Tp*)ptr(idx); +} + +template inline +_Tp& Mat::at(const Vec& idx) +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(_Tp*)ptr(idx.val); +} + +template inline +const _Tp& Mat::at(const Vec& idx) const +{ + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return *(const _Tp*)ptr(idx.val); +} + +template inline +MatConstIterator_<_Tp> Mat::begin() const +{ + if (empty()) + return MatConstIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this); +} + +template inline +std::reverse_iterator> Mat::rbegin() const +{ + if (empty()) + return std::reverse_iterator>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this); + it += total(); + return std::reverse_iterator> (it); +} + +template inline +MatConstIterator_<_Tp> Mat::end() const +{ + if (empty()) + return MatConstIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this); + it += total(); + return it; +} + +template inline +std::reverse_iterator> Mat::rend() const +{ + if (empty()) + return std::reverse_iterator>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return std::reverse_iterator>((const Mat_<_Tp>*)this); +} + +template inline +MatIterator_<_Tp> Mat::begin() +{ + if (empty()) + return MatIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return MatIterator_<_Tp>((Mat_<_Tp>*)this); +} + +template inline +std::reverse_iterator> Mat::rbegin() +{ + if (empty()) + return std::reverse_iterator>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + MatIterator_<_Tp> it((Mat_<_Tp>*)this); + it += total(); + return std::reverse_iterator>(it); +} + +template inline +MatIterator_<_Tp> Mat::end() +{ + if (empty()) + return MatIterator_<_Tp>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + MatIterator_<_Tp> it((Mat_<_Tp>*)this); + it += total(); + return it; +} + +template inline +std::reverse_iterator> Mat::rend() +{ + if (empty()) + return std::reverse_iterator>(); + CV_DbgAssert( elemSize() == sizeof(_Tp) ); + return std::reverse_iterator>(MatIterator_<_Tp>((Mat_<_Tp>*)this)); +} + +template inline +void Mat::forEach(const Functor& operation) { + this->forEach_impl<_Tp>(operation); +} + +template inline +void Mat::forEach(const Functor& operation) const { + // call as not const + (const_cast(this))->forEach<_Tp>(operation); +} + +template inline +Mat::operator std::vector<_Tp>() const +{ + std::vector<_Tp> v; + copyTo(v); + return v; +} + +template inline +Mat::operator std::array<_Tp, _Nm>() const +{ + std::array<_Tp, _Nm> v; + copyTo(v); + return v; +} + +template inline +Mat::operator Vec<_Tp, n>() const +{ + CV_Assert( data && dims <= 2 && (rows == 1 || cols == 1) && + rows + cols - 1 == n && channels() == 1 ); + + if( isContinuous() && type() == traits::Type<_Tp>::value ) + return Vec<_Tp, n>((_Tp*)data); + Vec<_Tp, n> v; + Mat tmp(rows, cols, traits::Type<_Tp>::value, v.val); + convertTo(tmp, tmp.type()); + return v; +} + +template inline +Mat::operator Matx<_Tp, m, n>() const +{ + CV_Assert( data && dims <= 2 && rows == m && cols == n && channels() == 1 ); + + if( isContinuous() && type() == traits::Type<_Tp>::value ) + return Matx<_Tp, m, n>((_Tp*)data); + Matx<_Tp, m, n> mtx; + Mat tmp(rows, cols, traits::Type<_Tp>::value, mtx.val); + convertTo(tmp, tmp.type()); + return mtx; +} + +template inline +void Mat::push_back(const _Tp& elem) +{ + if( !data ) + { + *this = Mat(1, 1, traits::Type<_Tp>::value, (void*)&elem).clone(); + return; + } + CV_Assert(traits::Type<_Tp>::value == type() && cols == 1 + /* && dims == 2 (cols == 1 implies dims == 2) */); + const uchar* tmp = dataend + step[0]; + if( !isSubmatrix() && isContinuous() && tmp <= datalimit ) + { + *(_Tp*)(data + (size.p[0]++) * step.p[0]) = elem; + dataend = tmp; + } + else + push_back_(&elem); +} + +template inline +void Mat::push_back(const Mat_<_Tp>& m) +{ + push_back((const Mat&)m); +} + +template<> inline +void Mat::push_back(const MatExpr& expr) +{ + push_back(static_cast(expr)); +} + + +template inline +void Mat::push_back(const std::vector<_Tp>& v) +{ + push_back(Mat(v)); +} + + +///////////////////////////// MatSize //////////////////////////// + +inline +MatSize::MatSize(int* _p) CV_NOEXCEPT + : p(_p) {} + +inline +int MatSize::dims() const CV_NOEXCEPT +{ + return (p - 1)[0]; +} + +inline +Size MatSize::operator()() const +{ + CV_DbgAssert(dims() <= 2); + return Size(p[1], p[0]); +} + +inline +const int& MatSize::operator[](int i) const +{ + CV_DbgAssert(i < dims()); +#ifdef __OPENCV_BUILD + CV_DbgAssert(i >= 0); +#endif + return p[i]; +} + +inline +int& MatSize::operator[](int i) +{ + CV_DbgAssert(i < dims()); +#ifdef __OPENCV_BUILD + CV_DbgAssert(i >= 0); +#endif + return p[i]; +} + +inline +MatSize::operator const int*() const CV_NOEXCEPT +{ + return p; +} + +inline +bool MatSize::operator != (const MatSize& sz) const CV_NOEXCEPT +{ + return !(*this == sz); +} + + + +///////////////////////////// MatStep //////////////////////////// + +inline +MatStep::MatStep() CV_NOEXCEPT +{ + p = buf; p[0] = p[1] = 0; +} + +inline +MatStep::MatStep(size_t s) CV_NOEXCEPT +{ + p = buf; p[0] = s; p[1] = 0; +} + +inline +const size_t& MatStep::operator[](int i) const CV_NOEXCEPT +{ + return p[i]; +} + +inline +size_t& MatStep::operator[](int i) CV_NOEXCEPT +{ + return p[i]; +} + +inline MatStep::operator size_t() const +{ + CV_DbgAssert( p == buf ); + return buf[0]; +} + +inline MatStep& MatStep::operator = (size_t s) +{ + CV_DbgAssert( p == buf ); + buf[0] = s; + return *this; +} + + + +////////////////////////////// Mat_<_Tp> //////////////////////////// + +template inline +Mat_<_Tp>::Mat_() CV_NOEXCEPT + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; +} + +template inline +Mat_<_Tp>::Mat_(int _rows, int _cols) + : Mat(_rows, _cols, traits::Type<_Tp>::value) +{ +} + +template inline +Mat_<_Tp>::Mat_(int _rows, int _cols, const _Tp& value) + : Mat(_rows, _cols, traits::Type<_Tp>::value) +{ + *this = value; +} + +template inline +Mat_<_Tp>::Mat_(Size _sz) + : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value) +{} + +template inline +Mat_<_Tp>::Mat_(Size _sz, const _Tp& value) + : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value) +{ + *this = value; +} + +template inline +Mat_<_Tp>::Mat_(int _dims, const int* _sz) + : Mat(_dims, _sz, traits::Type<_Tp>::value) +{} + +template inline +Mat_<_Tp>::Mat_(int _dims, const int* _sz, const _Tp& _s) + : Mat(_dims, _sz, traits::Type<_Tp>::value, Scalar(_s)) +{} + +template inline +Mat_<_Tp>::Mat_(int _dims, const int* _sz, _Tp* _data, const size_t* _steps) + : Mat(_dims, _sz, traits::Type<_Tp>::value, _data, _steps) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const Range* ranges) + : Mat(m, ranges) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const std::vector& ranges) + : Mat(m, ranges) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat& m) + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; + *this = m; +} + +template inline +Mat_<_Tp>::Mat_(const Mat_& m) + : Mat(m) +{} + +template inline +Mat_<_Tp>::Mat_(int _rows, int _cols, _Tp* _data, size_t steps) + : Mat(_rows, _cols, traits::Type<_Tp>::value, _data, steps) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_& m, const Range& _rowRange, const Range& _colRange) + : Mat(m, _rowRange, _colRange) +{} + +template inline +Mat_<_Tp>::Mat_(const Mat_& m, const Rect& roi) + : Mat(m, roi) +{} + +template template inline +Mat_<_Tp>::Mat_(const Vec::channel_type, n>& vec, bool copyData) + : Mat(n / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&vec) +{ + CV_Assert(n%DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template template inline +Mat_<_Tp>::Mat_(const Matx::channel_type, m, n>& M, bool copyData) + : Mat(m, n / DataType<_Tp>::channels, traits::Type<_Tp>::value, (void*)&M) +{ + CV_Assert(n % DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template inline +Mat_<_Tp>::Mat_(const Point_::channel_type>& pt, bool copyData) + : Mat(2 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt) +{ + CV_Assert(2 % DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template inline +Mat_<_Tp>::Mat_(const Point3_::channel_type>& pt, bool copyData) + : Mat(3 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt) +{ + CV_Assert(3 % DataType<_Tp>::channels == 0); + if( copyData ) + *this = clone(); +} + +template inline +Mat_<_Tp>::Mat_(const MatCommaInitializer_<_Tp>& commaInitializer) + : Mat(commaInitializer) +{} + +template inline +Mat_<_Tp>::Mat_(const std::vector<_Tp>& vec, bool copyData) + : Mat(vec, copyData) +{} + +template inline +Mat_<_Tp>::Mat_(std::initializer_list<_Tp> list) + : Mat(list) +{} + +template inline +Mat_<_Tp>::Mat_(const std::initializer_list sizes, std::initializer_list<_Tp> list) + : Mat(sizes, list) +{} + +template template inline +Mat_<_Tp>::Mat_(const std::array<_Tp, _Nm>& arr, bool copyData) + : Mat(arr, copyData) +{} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m) +{ + if (m.empty()) + { + release(); + return *this; + } + if( traits::Type<_Tp>::value == m.type() ) + { + Mat::operator = (m); + return *this; + } + if( traits::Depth<_Tp>::value == m.depth() ) + { + return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0)); + } + CV_Assert(DataType<_Tp>::channels == m.channels() || m.empty()); + m.convertTo(*this, type()); + return *this; +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat_& m) +{ + Mat::operator=(m); + return *this; +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const _Tp& s) +{ + typedef typename DataType<_Tp>::vec_type VT; + Mat::operator=(Scalar((const VT&)s)); + return *this; +} + +template inline +void Mat_<_Tp>::create(int _rows, int _cols) +{ + Mat::create(_rows, _cols, traits::Type<_Tp>::value); +} + +template inline +void Mat_<_Tp>::create(Size _sz) +{ + Mat::create(_sz, traits::Type<_Tp>::value); +} + +template inline +void Mat_<_Tp>::create(int _dims, const int* _sz) +{ + Mat::create(_dims, _sz, traits::Type<_Tp>::value); +} + +template inline +void Mat_<_Tp>::release() +{ + Mat::release(); + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; +} + +template inline +Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const +{ + return Mat_<_Tp>(Mat::cross(m)); +} + +template template inline +Mat_<_Tp>::operator Mat_() const +{ + return Mat_(static_cast(*this)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::row(int y) const +{ + return Mat_(*this, Range(y, y+1), Range::all()); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::col(int x) const +{ + return Mat_(*this, Range::all(), Range(x, x+1)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::diag(int d) const +{ + return Mat_(Mat::diag(d)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::clone() const +{ + return Mat_(Mat::clone()); +} + +template inline +size_t Mat_<_Tp>::elemSize() const +{ + CV_DbgAssert( Mat::elemSize() == sizeof(_Tp) ); + return sizeof(_Tp); +} + +template inline +size_t Mat_<_Tp>::elemSize1() const +{ + CV_DbgAssert( Mat::elemSize1() == sizeof(_Tp) / DataType<_Tp>::channels ); + return sizeof(_Tp) / DataType<_Tp>::channels; +} + +template inline +int Mat_<_Tp>::type() const +{ + CV_DbgAssert( Mat::type() == traits::Type<_Tp>::value ); + return traits::Type<_Tp>::value; +} + +template inline +int Mat_<_Tp>::depth() const +{ + CV_DbgAssert( Mat::depth() == traits::Depth<_Tp>::value ); + return traits::Depth<_Tp>::value; +} + +template inline +int Mat_<_Tp>::channels() const +{ + CV_DbgAssert( Mat::channels() == DataType<_Tp>::channels ); + return DataType<_Tp>::channels; +} + +template inline +size_t Mat_<_Tp>::stepT(int i) const +{ + return step.p[i] / elemSize(); +} + +template inline +size_t Mat_<_Tp>::step1(int i) const +{ + return step.p[i] / elemSize1(); +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::adjustROI( int dtop, int dbottom, int dleft, int dright ) +{ + return (Mat_<_Tp>&)(Mat::adjustROI(dtop, dbottom, dleft, dright)); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()( const Range& _rowRange, const Range& _colRange ) const +{ + return Mat_<_Tp>(*this, _rowRange, _colRange); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()( const Rect& roi ) const +{ + return Mat_<_Tp>(*this, roi); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const +{ + return Mat_<_Tp>(*this, ranges); +} + +template inline +Mat_<_Tp> Mat_<_Tp>::operator()(const std::vector& ranges) const +{ + return Mat_<_Tp>(*this, ranges); +} + +template inline +_Tp* Mat_<_Tp>::operator [](int y) +{ + CV_DbgAssert( 0 <= y && y < size.p[0] ); + return (_Tp*)(data + y*step.p[0]); +} + +template inline +const _Tp* Mat_<_Tp>::operator [](int y) const +{ + CV_DbgAssert( 0 <= y && y < size.p[0] ); + return (const _Tp*)(data + y*step.p[0]); +} + +template inline +_Tp& Mat_<_Tp>::operator ()(int i0, int i1) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((_Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(int i0, int i1) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((const _Tp*)(data + step.p[0] * i0))[i1]; +} + +template inline +_Tp& Mat_<_Tp>::operator ()(Point pt) +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((_Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(Point pt) const +{ + CV_DbgAssert(dims <= 2); + CV_DbgAssert(data); + CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]); + CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]); + CV_DbgAssert(type() == traits::Type<_Tp>::value); + return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x]; +} + +template inline +_Tp& Mat_<_Tp>::operator ()(const int* idx) +{ + return Mat::at<_Tp>(idx); +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(const int* idx) const +{ + return Mat::at<_Tp>(idx); +} + +template template inline +_Tp& Mat_<_Tp>::operator ()(const Vec& idx) +{ + return Mat::at<_Tp>(idx); +} + +template template inline +const _Tp& Mat_<_Tp>::operator ()(const Vec& idx) const +{ + return Mat::at<_Tp>(idx); +} + +template inline +_Tp& Mat_<_Tp>::operator ()(int i0) +{ + return this->at<_Tp>(i0); +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(int i0) const +{ + return this->at<_Tp>(i0); +} + +template inline +_Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2) +{ + return this->at<_Tp>(i0, i1, i2); +} + +template inline +const _Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2) const +{ + return this->at<_Tp>(i0, i1, i2); +} + +template inline +Mat_<_Tp>::operator std::vector<_Tp>() const +{ + std::vector<_Tp> v; + copyTo(v); + return v; +} + +template template inline +Mat_<_Tp>::operator std::array<_Tp, _Nm>() const +{ + std::array<_Tp, _Nm> a; + copyTo(a); + return a; +} + +template template inline +Mat_<_Tp>::operator Vec::channel_type, n>() const +{ + CV_Assert(n % DataType<_Tp>::channels == 0); + +#if defined _MSC_VER + const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround) + return pMat->operator Vec::channel_type, n>(); +#else + return this->Mat::operator Vec::channel_type, n>(); +#endif +} + +template template inline +Mat_<_Tp>::operator Matx::channel_type, m, n>() const +{ + CV_Assert(n % DataType<_Tp>::channels == 0); + +#if defined _MSC_VER + const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround) + Matx::channel_type, m, n> res = pMat->operator Matx::channel_type, m, n>(); + return res; +#else + Matx::channel_type, m, n> res = this->Mat::operator Matx::channel_type, m, n>(); + return res; +#endif +} + +template inline +MatConstIterator_<_Tp> Mat_<_Tp>::begin() const +{ + return Mat::begin<_Tp>(); +} + +template inline +std::reverse_iterator> Mat_<_Tp>::rbegin() const +{ + return Mat::rbegin<_Tp>(); +} + +template inline +MatConstIterator_<_Tp> Mat_<_Tp>::end() const +{ + return Mat::end<_Tp>(); +} + +template inline +std::reverse_iterator> Mat_<_Tp>::rend() const +{ + return Mat::rend<_Tp>(); +} + +template inline +MatIterator_<_Tp> Mat_<_Tp>::begin() +{ + return Mat::begin<_Tp>(); +} + +template inline +std::reverse_iterator> Mat_<_Tp>::rbegin() +{ + return Mat::rbegin<_Tp>(); +} + +template inline +MatIterator_<_Tp> Mat_<_Tp>::end() +{ + return Mat::end<_Tp>(); +} + +template inline +std::reverse_iterator> Mat_<_Tp>::rend() +{ + return Mat::rend<_Tp>(); +} + +template template inline +void Mat_<_Tp>::forEach(const Functor& operation) { + Mat::forEach<_Tp, Functor>(operation); +} + +template template inline +void Mat_<_Tp>::forEach(const Functor& operation) const { + Mat::forEach<_Tp, Functor>(operation); +} + +template inline +Mat_<_Tp>::Mat_(Mat_&& m) + : Mat(std::move(m)) +{ +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (Mat_&& m) +{ + Mat::operator = (std::move(m)); + return *this; +} + +template inline +Mat_<_Tp>::Mat_(Mat&& m) + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; + *this = std::move(m); +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (Mat&& m) +{ + if (m.empty()) + { + release(); + return *this; + } + if( traits::Type<_Tp>::value == m.type() ) + { + Mat::operator = ((Mat&&)m); + return *this; + } + if( traits::Depth<_Tp>::value == m.depth() ) + { + Mat::operator = ((Mat&&)m.reshape(DataType<_Tp>::channels, m.dims, 0)); + return *this; + } + CV_DbgAssert(DataType<_Tp>::channels == m.channels()); + m.convertTo(*this, type()); + return *this; +} + +template inline +Mat_<_Tp>::Mat_(MatExpr&& e) + : Mat() +{ + flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; + *this = Mat(e); +} + + +///////////////////////////// SparseMat ///////////////////////////// + +inline +SparseMat SparseMat::clone() const +{ + SparseMat temp; + this->copyTo(temp); + return temp; +} + +inline +size_t SparseMat::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t SparseMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int SparseMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int SparseMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int SparseMat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +const int* SparseMat::size() const +{ + return hdr ? hdr->size : 0; +} + +inline +int SparseMat::size(int i) const +{ + if( hdr ) + { + CV_DbgAssert((unsigned)i < (unsigned)hdr->dims); + return hdr->size[i]; + } + return 0; +} + +inline +int SparseMat::dims() const +{ + return hdr ? hdr->dims : 0; +} + +inline +size_t SparseMat::nzcount() const +{ + return hdr ? hdr->nodeCount : 0; +} + +template inline +_Tp& SparseMat::ref(int i0, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(i0, true, hashval); +} + +template inline +_Tp& SparseMat::ref(int i0, int i1, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, true, hashval); +} + +template inline +_Tp& SparseMat::ref(int i0, int i1, int i2, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, i2, true, hashval); +} + +template inline +_Tp& SparseMat::ref(const int* idx, size_t* hashval) +{ + return *(_Tp*)((SparseMat*)this)->ptr(idx, true, hashval); +} + +template inline +_Tp SparseMat::value(int i0, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval); + return p ? *p : _Tp(); +} + +template inline +_Tp SparseMat::value(int i0, int i1, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval); + return p ? *p : _Tp(); +} + +template inline +_Tp SparseMat::value(int i0, int i1, int i2, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval); + return p ? *p : _Tp(); +} + +template inline +_Tp SparseMat::value(const int* idx, size_t* hashval) const +{ + const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval); + return p ? *p : _Tp(); +} + +template inline +const _Tp* SparseMat::find(int i0, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval); +} + +template inline +const _Tp* SparseMat::find(int i0, int i1, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval); +} + +template inline +const _Tp* SparseMat::find(int i0, int i1, int i2, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval); +} + +template inline +const _Tp* SparseMat::find(const int* idx, size_t* hashval) const +{ + return (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval); +} + +template inline +_Tp& SparseMat::value(Node* n) +{ + return *(_Tp*)((uchar*)n + hdr->valueOffset); +} + +template inline +const _Tp& SparseMat::value(const Node* n) const +{ + return *(const _Tp*)((const uchar*)n + hdr->valueOffset); +} + +inline +SparseMat::Node* SparseMat::node(size_t nidx) +{ + return (Node*)(void*)&hdr->pool[nidx]; +} + +inline +const SparseMat::Node* SparseMat::node(size_t nidx) const +{ + return (const Node*)(const void*)&hdr->pool[nidx]; +} + +inline +SparseMatIterator SparseMat::begin() +{ + return SparseMatIterator(this); +} + +inline +SparseMatConstIterator SparseMat::begin() const +{ + return SparseMatConstIterator(this); +} + +inline +SparseMatIterator SparseMat::end() +{ + SparseMatIterator it(this); + it.seekEnd(); + return it; +} + +inline +SparseMatConstIterator SparseMat::end() const +{ + SparseMatConstIterator it(this); + it.seekEnd(); + return it; +} + +template inline +SparseMatIterator_<_Tp> SparseMat::begin() +{ + return SparseMatIterator_<_Tp>(this); +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat::begin() const +{ + return SparseMatConstIterator_<_Tp>(this); +} + +template inline +SparseMatIterator_<_Tp> SparseMat::end() +{ + SparseMatIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat::end() const +{ + SparseMatConstIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + + + +///////////////////////////// SparseMat_ //////////////////////////// + +template inline +SparseMat_<_Tp>::SparseMat_() +{ + flags = MAGIC_VAL + traits::Type<_Tp>::value; +} + +template inline +SparseMat_<_Tp>::SparseMat_(int _dims, const int* _sizes) + : SparseMat(_dims, _sizes, traits::Type<_Tp>::value) +{} + +template inline +SparseMat_<_Tp>::SparseMat_(const SparseMat& m) +{ + if( m.type() == traits::Type<_Tp>::value ) + *this = (const SparseMat_<_Tp>&)m; + else + m.convertTo(*this, traits::Type<_Tp>::value); +} + +template inline +SparseMat_<_Tp>::SparseMat_(const SparseMat_<_Tp>& m) +{ + this->flags = m.flags; + this->hdr = m.hdr; + if( this->hdr ) + CV_XADD(&this->hdr->refcount, 1); +} + +template inline +SparseMat_<_Tp>::SparseMat_(const Mat& m) +{ + SparseMat sm(m); + *this = sm; +} + +template inline +SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat_<_Tp>& m) +{ + if( this != &m ) + { + if( m.hdr ) CV_XADD(&m.hdr->refcount, 1); + release(); + flags = m.flags; + hdr = m.hdr; + } + return *this; +} + +template inline +SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat& m) +{ + if( m.type() == traits::Type<_Tp>::value ) + return (*this = (const SparseMat_<_Tp>&)m); + m.convertTo(*this, traits::Type<_Tp>::value); + return *this; +} + +template inline +SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const Mat& m) +{ + return (*this = SparseMat(m)); +} + +template inline +SparseMat_<_Tp> SparseMat_<_Tp>::clone() const +{ + SparseMat_<_Tp> m; + this->copyTo(m); + return m; +} + +template inline +void SparseMat_<_Tp>::create(int _dims, const int* _sizes) +{ + SparseMat::create(_dims, _sizes, traits::Type<_Tp>::value); +} + +template inline +int SparseMat_<_Tp>::type() const +{ + return traits::Type<_Tp>::value; +} + +template inline +int SparseMat_<_Tp>::depth() const +{ + return traits::Depth<_Tp>::value; +} + +template inline +int SparseMat_<_Tp>::channels() const +{ + return DataType<_Tp>::channels; +} + +template inline +_Tp& SparseMat_<_Tp>::ref(int i0, size_t* hashval) +{ + return SparseMat::ref<_Tp>(i0, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(int i0, size_t* hashval) const +{ + return SparseMat::value<_Tp>(i0, hashval); +} + +template inline +_Tp& SparseMat_<_Tp>::ref(int i0, int i1, size_t* hashval) +{ + return SparseMat::ref<_Tp>(i0, i1, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(int i0, int i1, size_t* hashval) const +{ + return SparseMat::value<_Tp>(i0, i1, hashval); +} + +template inline +_Tp& SparseMat_<_Tp>::ref(int i0, int i1, int i2, size_t* hashval) +{ + return SparseMat::ref<_Tp>(i0, i1, i2, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(int i0, int i1, int i2, size_t* hashval) const +{ + return SparseMat::value<_Tp>(i0, i1, i2, hashval); +} + +template inline +_Tp& SparseMat_<_Tp>::ref(const int* idx, size_t* hashval) +{ + return SparseMat::ref<_Tp>(idx, hashval); +} + +template inline +_Tp SparseMat_<_Tp>::operator()(const int* idx, size_t* hashval) const +{ + return SparseMat::value<_Tp>(idx, hashval); +} + +template inline +SparseMatIterator_<_Tp> SparseMat_<_Tp>::begin() +{ + return SparseMatIterator_<_Tp>(this); +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::begin() const +{ + return SparseMatConstIterator_<_Tp>(this); +} + +template inline +SparseMatIterator_<_Tp> SparseMat_<_Tp>::end() +{ + SparseMatIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + +template inline +SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::end() const +{ + SparseMatConstIterator_<_Tp> it(this); + it.seekEnd(); + return it; +} + + + +////////////////////////// MatConstIterator ///////////////////////// + +inline +MatConstIterator::MatConstIterator() + : m(0), elemSize(0), ptr(0), sliceStart(0), sliceEnd(0) +{} + +inline +MatConstIterator::MatConstIterator(const Mat* _m) + : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0) +{ + if( m && m->isContinuous() ) + { + CV_Assert(!m->empty()); + sliceStart = m->ptr(); + sliceEnd = sliceStart + m->total()*elemSize; + } + seek((const int*)0); +} + +inline +MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col) + : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0) +{ + CV_Assert(m && m->dims <= 2); + if( m->isContinuous() ) + { + CV_Assert(!m->empty()); + sliceStart = m->ptr(); + sliceEnd = sliceStart + m->total()*elemSize; + } + int idx[] = {_row, _col}; + seek(idx); +} + +inline +MatConstIterator::MatConstIterator(const Mat* _m, Point _pt) + : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0) +{ + CV_Assert(m && m->dims <= 2); + if( m->isContinuous() ) + { + CV_Assert(!m->empty()); + sliceStart = m->ptr(); + sliceEnd = sliceStart + m->total()*elemSize; + } + int idx[] = {_pt.y, _pt.x}; + seek(idx); +} + +inline +MatConstIterator::MatConstIterator(const MatConstIterator& it) + : m(it.m), elemSize(it.elemSize), ptr(it.ptr), sliceStart(it.sliceStart), sliceEnd(it.sliceEnd) +{} + +inline +MatConstIterator& MatConstIterator::operator = (const MatConstIterator& it ) +{ + m = it.m; elemSize = it.elemSize; ptr = it.ptr; + sliceStart = it.sliceStart; sliceEnd = it.sliceEnd; + return *this; +} + +inline +const uchar* MatConstIterator::operator *() const +{ + return ptr; +} + +inline MatConstIterator& MatConstIterator::operator += (ptrdiff_t ofs) +{ + if( !m || ofs == 0 ) + return *this; + ptrdiff_t ofsb = ofs*elemSize; + ptr += ofsb; + if( ptr < sliceStart || sliceEnd <= ptr ) + { + ptr -= ofsb; + seek(ofs, true); + } + return *this; +} + +inline +MatConstIterator& MatConstIterator::operator -= (ptrdiff_t ofs) +{ + return (*this += -ofs); +} + +inline +MatConstIterator& MatConstIterator::operator --() +{ + if( m && (ptr -= elemSize) < sliceStart ) + { + ptr += elemSize; + seek(-1, true); + } + return *this; +} + +inline +MatConstIterator MatConstIterator::operator --(int) +{ + MatConstIterator b = *this; + *this += -1; + return b; +} + +inline +MatConstIterator& MatConstIterator::operator ++() +{ + if( m && (ptr += elemSize) >= sliceEnd ) + { + ptr -= elemSize; + seek(1, true); + } + return *this; +} + +inline MatConstIterator MatConstIterator::operator ++(int) +{ + MatConstIterator b = *this; + *this += 1; + return b; +} + + +static inline +bool operator == (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.m == b.m && a.ptr == b.ptr; +} + +static inline +bool operator != (const MatConstIterator& a, const MatConstIterator& b) +{ + return !(a == b); +} + +static inline +bool operator < (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr < b.ptr; +} + +static inline +bool operator > (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr > b.ptr; +} + +static inline +bool operator <= (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr <= b.ptr; +} + +static inline +bool operator >= (const MatConstIterator& a, const MatConstIterator& b) +{ + return a.ptr >= b.ptr; +} + +static inline +ptrdiff_t operator - (const MatConstIterator& b, const MatConstIterator& a) +{ + if( a.m != b.m ) + return ((size_t)(-1) >> 1); + if( a.sliceEnd == b.sliceEnd ) + return (b.ptr - a.ptr)/static_cast(b.elemSize); + + return b.lpos() - a.lpos(); +} + +static inline +MatConstIterator operator + (const MatConstIterator& a, ptrdiff_t ofs) +{ + MatConstIterator b = a; + return b += ofs; +} + +static inline +MatConstIterator operator + (ptrdiff_t ofs, const MatConstIterator& a) +{ + MatConstIterator b = a; + return b += ofs; +} + +static inline +MatConstIterator operator - (const MatConstIterator& a, ptrdiff_t ofs) +{ + MatConstIterator b = a; + return b += -ofs; +} + + +inline +const uchar* MatConstIterator::operator [](ptrdiff_t i) const +{ + return *(*this + i); +} + + + +///////////////////////// MatConstIterator_ ///////////////////////// + +template inline +MatConstIterator_<_Tp>::MatConstIterator_() +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m) + : MatConstIterator(_m) +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col) + : MatConstIterator(_m, _row, _col) +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, Point _pt) + : MatConstIterator(_m, _pt) +{} + +template inline +MatConstIterator_<_Tp>::MatConstIterator_(const MatConstIterator_& it) + : MatConstIterator(it) +{} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator = (const MatConstIterator_& it ) +{ + MatConstIterator::operator = (it); + return *this; +} + +template inline +const _Tp& MatConstIterator_<_Tp>::operator *() const +{ + return *(_Tp*)(this->ptr); +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator += (ptrdiff_t ofs) +{ + MatConstIterator::operator += (ofs); + return *this; +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator -= (ptrdiff_t ofs) +{ + return (*this += -ofs); +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator --() +{ + MatConstIterator::operator --(); + return *this; +} + +template inline +MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator --(int) +{ + MatConstIterator_ b = *this; + MatConstIterator::operator --(); + return b; +} + +template inline +MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator ++() +{ + MatConstIterator::operator ++(); + return *this; +} + +template inline +MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator ++(int) +{ + MatConstIterator_ b = *this; + MatConstIterator::operator ++(); + return b; +} + + +template inline +Point MatConstIterator_<_Tp>::pos() const +{ + if( !m ) + return Point(); + CV_DbgAssert( m->dims <= 2 ); + if( m->isContinuous() ) + { + ptrdiff_t ofs = (const _Tp*)ptr - (const _Tp*)m->data; + int y = (int)(ofs / m->cols); + int x = (int)(ofs - (ptrdiff_t)y * m->cols); + return Point(x, y); + } + else + { + ptrdiff_t ofs = (uchar*)ptr - m->data; + int y = (int)(ofs / m->step); + int x = (int)((ofs - y * m->step)/sizeof(_Tp)); + return Point(x, y); + } +} + + +template static inline +bool operator == (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b) +{ + return a.m == b.m && a.ptr == b.ptr; +} + +template static inline +bool operator != (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b) +{ + return a.m != b.m || a.ptr != b.ptr; +} + +template static inline +MatConstIterator_<_Tp> operator + (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatConstIterator_<_Tp>&)t; +} + +template static inline +MatConstIterator_<_Tp> operator + (ptrdiff_t ofs, const MatConstIterator_<_Tp>& a) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatConstIterator_<_Tp>&)t; +} + +template static inline +MatConstIterator_<_Tp> operator - (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a - ofs; + return (MatConstIterator_<_Tp>&)t; +} + +template inline +const _Tp& MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const +{ + return *(_Tp*)MatConstIterator::operator [](i); +} + + + +//////////////////////////// MatIterator_ /////////////////////////// + +template inline +MatIterator_<_Tp>::MatIterator_() + : MatConstIterator_<_Tp>() +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m) + : MatConstIterator_<_Tp>(_m) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, int _row, int _col) + : MatConstIterator_<_Tp>(_m, _row, _col) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, Point _pt) + : MatConstIterator_<_Tp>(_m, _pt) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, const int* _idx) + : MatConstIterator_<_Tp>(_m, _idx) +{} + +template inline +MatIterator_<_Tp>::MatIterator_(const MatIterator_& it) + : MatConstIterator_<_Tp>(it) +{} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator = (const MatIterator_<_Tp>& it ) +{ + MatConstIterator::operator = (it); + return *this; +} + +template inline +_Tp& MatIterator_<_Tp>::operator *() const +{ + return *(_Tp*)(this->ptr); +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator += (ptrdiff_t ofs) +{ + MatConstIterator::operator += (ofs); + return *this; +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator -= (ptrdiff_t ofs) +{ + MatConstIterator::operator += (-ofs); + return *this; +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator --() +{ + MatConstIterator::operator --(); + return *this; +} + +template inline +MatIterator_<_Tp> MatIterator_<_Tp>::operator --(int) +{ + MatIterator_ b = *this; + MatConstIterator::operator --(); + return b; +} + +template inline +MatIterator_<_Tp>& MatIterator_<_Tp>::operator ++() +{ + MatConstIterator::operator ++(); + return *this; +} + +template inline +MatIterator_<_Tp> MatIterator_<_Tp>::operator ++(int) +{ + MatIterator_ b = *this; + MatConstIterator::operator ++(); + return b; +} + +template inline +_Tp& MatIterator_<_Tp>::operator [](ptrdiff_t i) const +{ + return *(*this + i); +} + + +template static inline +bool operator == (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b) +{ + return a.m == b.m && a.ptr == b.ptr; +} + +template static inline +bool operator != (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b) +{ + return a.m != b.m || a.ptr != b.ptr; +} + +template static inline +MatIterator_<_Tp> operator + (const MatIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatIterator_<_Tp>&)t; +} + +template static inline +MatIterator_<_Tp> operator + (ptrdiff_t ofs, const MatIterator_<_Tp>& a) +{ + MatConstIterator t = (const MatConstIterator&)a + ofs; + return (MatIterator_<_Tp>&)t; +} + +template static inline +MatIterator_<_Tp> operator - (const MatIterator_<_Tp>& a, ptrdiff_t ofs) +{ + MatConstIterator t = (const MatConstIterator&)a - ofs; + return (MatIterator_<_Tp>&)t; +} + + + +/////////////////////// SparseMatConstIterator ////////////////////// + +inline +SparseMatConstIterator::SparseMatConstIterator() + : m(0), hashidx(0), ptr(0) +{} + +inline +SparseMatConstIterator::SparseMatConstIterator(const SparseMatConstIterator& it) + : m(it.m), hashidx(it.hashidx), ptr(it.ptr) +{} + +inline SparseMatConstIterator& SparseMatConstIterator::operator = (const SparseMatConstIterator& it) +{ + if( this != &it ) + { + m = it.m; + hashidx = it.hashidx; + ptr = it.ptr; + } + return *this; +} + +template inline +const _Tp& SparseMatConstIterator::value() const +{ + return *(const _Tp*)ptr; +} + +inline +const SparseMat::Node* SparseMatConstIterator::node() const +{ + return (ptr && m && m->hdr) ? (const SparseMat::Node*)(const void*)(ptr - m->hdr->valueOffset) : 0; +} + +inline +SparseMatConstIterator SparseMatConstIterator::operator ++(int) +{ + SparseMatConstIterator it = *this; + ++*this; + return it; +} + +inline +void SparseMatConstIterator::seekEnd() +{ + if( m && m->hdr ) + { + hashidx = m->hdr->hashtab.size(); + ptr = 0; + } +} + + +static inline +bool operator == (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2) +{ + return it1.m == it2.m && it1.ptr == it2.ptr; +} + +static inline +bool operator != (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2) +{ + return !(it1 == it2); +} + + + +///////////////////////// SparseMatIterator ///////////////////////// + +inline +SparseMatIterator::SparseMatIterator() +{} + +inline +SparseMatIterator::SparseMatIterator(SparseMat* _m) + : SparseMatConstIterator(_m) +{} + +inline +SparseMatIterator::SparseMatIterator(const SparseMatIterator& it) + : SparseMatConstIterator(it) +{} + +inline +SparseMatIterator& SparseMatIterator::operator = (const SparseMatIterator& it) +{ + (SparseMatConstIterator&)*this = it; + return *this; +} + +template inline +_Tp& SparseMatIterator::value() const +{ + return *(_Tp*)ptr; +} + +inline +SparseMat::Node* SparseMatIterator::node() const +{ + return (SparseMat::Node*)SparseMatConstIterator::node(); +} + +inline +SparseMatIterator& SparseMatIterator::operator ++() +{ + SparseMatConstIterator::operator ++(); + return *this; +} + +inline +SparseMatIterator SparseMatIterator::operator ++(int) +{ + SparseMatIterator it = *this; + ++*this; + return it; +} + + + +////////////////////// SparseMatConstIterator_ ////////////////////// + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_() +{} + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat_<_Tp>* _m) + : SparseMatConstIterator(_m) +{} + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m) + : SparseMatConstIterator(_m) +{ + CV_Assert( _m->type() == traits::Type<_Tp>::value ); +} + +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMatConstIterator_<_Tp>& it) + : SparseMatConstIterator(it) +{} + +template inline +SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator = (const SparseMatConstIterator_<_Tp>& it) +{ + return reinterpret_cast&> + (*reinterpret_cast(this) = + reinterpret_cast(it)); +} + +template inline +const _Tp& SparseMatConstIterator_<_Tp>::operator *() const +{ + return *(const _Tp*)this->ptr; +} + +template inline +SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator ++() +{ + SparseMatConstIterator::operator ++(); + return *this; +} + +template inline +SparseMatConstIterator_<_Tp> SparseMatConstIterator_<_Tp>::operator ++(int) +{ + SparseMatConstIterator_<_Tp> it = *this; + SparseMatConstIterator::operator ++(); + return it; +} + + + +///////////////////////// SparseMatIterator_ //////////////////////// + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_() +{} + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat_<_Tp>* _m) + : SparseMatConstIterator_<_Tp>(_m) +{} + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat* _m) + : SparseMatConstIterator_<_Tp>(_m) +{} + +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_(const SparseMatIterator_<_Tp>& it) + : SparseMatConstIterator_<_Tp>(it) +{} + +template inline +SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator = (const SparseMatIterator_<_Tp>& it) +{ + return reinterpret_cast&> + (*reinterpret_cast(this) = + reinterpret_cast(it)); +} + +template inline +_Tp& SparseMatIterator_<_Tp>::operator *() const +{ + return *(_Tp*)this->ptr; +} + +template inline +SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator ++() +{ + SparseMatConstIterator::operator ++(); + return *this; +} + +template inline +SparseMatIterator_<_Tp> SparseMatIterator_<_Tp>::operator ++(int) +{ + SparseMatIterator_<_Tp> it = *this; + SparseMatConstIterator::operator ++(); + return it; +} + + + +//////////////////////// MatCommaInitializer_ /////////////////////// + +template inline +MatCommaInitializer_<_Tp>::MatCommaInitializer_(Mat_<_Tp>* _m) + : it(_m) +{} + +template template inline +MatCommaInitializer_<_Tp>& MatCommaInitializer_<_Tp>::operator , (T2 v) +{ + CV_DbgAssert( this->it < ((const Mat_<_Tp>*)this->it.m)->end() ); + *this->it = _Tp(v); + ++this->it; + return *this; +} + +template inline +MatCommaInitializer_<_Tp>::operator Mat_<_Tp>() const +{ + CV_DbgAssert( this->it == ((const Mat_<_Tp>*)this->it.m)->end() ); + return Mat_<_Tp>(*this->it.m); +} + + +template static inline +MatCommaInitializer_<_Tp> operator << (const Mat_<_Tp>& m, T2 val) +{ + MatCommaInitializer_<_Tp> commaInitializer((Mat_<_Tp>*)&m); + return (commaInitializer, val); +} + + + +///////////////////////// Matrix Expressions //////////////////////// + +inline +Mat& Mat::operator = (const MatExpr& e) +{ + e.op->assign(e, *this); + return *this; +} + +template inline +Mat_<_Tp>::Mat_(const MatExpr& e) +{ + e.op->assign(e, *this, traits::Type<_Tp>::value); +} + +template inline +Mat_<_Tp>& Mat_<_Tp>::operator = (const MatExpr& e) +{ + e.op->assign(e, *this, traits::Type<_Tp>::value); + return *this; +} + +template inline +MatExpr Mat_<_Tp>::zeros(int rows, int cols) +{ + return Mat::zeros(rows, cols, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::zeros(Size sz) +{ + return Mat::zeros(sz, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::ones(int rows, int cols) +{ + return Mat::ones(rows, cols, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::ones(Size sz) +{ + return Mat::ones(sz, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::eye(int rows, int cols) +{ + return Mat::eye(rows, cols, traits::Type<_Tp>::value); +} + +template inline +MatExpr Mat_<_Tp>::eye(Size sz) +{ + return Mat::eye(sz, traits::Type<_Tp>::value); +} + +inline +MatExpr::MatExpr() + : op(0), flags(0), a(Mat()), b(Mat()), c(Mat()), alpha(0), beta(0), s() +{} + +inline +MatExpr::MatExpr(const MatOp* _op, int _flags, const Mat& _a, const Mat& _b, + const Mat& _c, double _alpha, double _beta, const Scalar& _s) + : op(_op), flags(_flags), a(_a), b(_b), c(_c), alpha(_alpha), beta(_beta), s(_s) +{} + +inline +MatExpr::operator Mat() const +{ + Mat m; + op->assign(*this, m); + return m; +} + +template inline +MatExpr::operator Mat_<_Tp>() const +{ + Mat_<_Tp> m; + op->assign(*this, m, traits::Type<_Tp>::value); + return m; +} + + +template static inline +MatExpr min(const Mat_<_Tp>& a, const Mat_<_Tp>& b) +{ + return cv::min((const Mat&)a, (const Mat&)b); +} + +template static inline +MatExpr min(const Mat_<_Tp>& a, double s) +{ + return cv::min((const Mat&)a, s); +} + +template static inline +MatExpr min(double s, const Mat_<_Tp>& a) +{ + return cv::min((const Mat&)a, s); +} + +template static inline +MatExpr max(const Mat_<_Tp>& a, const Mat_<_Tp>& b) +{ + return cv::max((const Mat&)a, (const Mat&)b); +} + +template static inline +MatExpr max(const Mat_<_Tp>& a, double s) +{ + return cv::max((const Mat&)a, s); +} + +template static inline +MatExpr max(double s, const Mat_<_Tp>& a) +{ + return cv::max((const Mat&)a, s); +} + +template static inline +MatExpr abs(const Mat_<_Tp>& m) +{ + return cv::abs((const Mat&)m); +} + + +static inline +Mat& operator += (Mat& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, a); + return a; +} + +static inline +const Mat& operator += (const Mat& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator += (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator += (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignAdd(b, (Mat&)a); + return a; +} + +static inline +Mat& operator -= (Mat& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, a); + return a; +} + +static inline +const Mat& operator -= (const Mat& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator -= (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator -= (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignSubtract(b, (Mat&)a); + return a; +} + +static inline +Mat& operator *= (Mat& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, a); + return a; +} + +static inline +const Mat& operator *= (const Mat& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator *= (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator *= (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignMultiply(b, (Mat&)a); + return a; +} + +static inline +Mat& operator /= (Mat& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, a); + return a; +} + +static inline +const Mat& operator /= (const Mat& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, (Mat&)a); + return a; +} + +template static inline +Mat_<_Tp>& operator /= (Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, a); + return a; +} + +template static inline +const Mat_<_Tp>& operator /= (const Mat_<_Tp>& a, const MatExpr& b) +{ + b.op->augAssignDivide(b, (Mat&)a); + return a; +} + + +//////////////////////////////// UMat //////////////////////////////// + +template inline +UMat::UMat(const std::vector<_Tp>& vec, bool copyData) +: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), +cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows) +{ + if(vec.empty()) + return; + if( !copyData ) + { + // !!!TODO!!! + CV_Error(Error::StsNotImplemented, ""); + } + else + Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this); +} + +inline +UMat UMat::row(int y) const +{ + return UMat(*this, Range(y, y + 1), Range::all()); +} + +inline +UMat UMat::col(int x) const +{ + return UMat(*this, Range::all(), Range(x, x + 1)); +} + +inline +UMat UMat::rowRange(int startrow, int endrow) const +{ + return UMat(*this, Range(startrow, endrow), Range::all()); +} + +inline +UMat UMat::rowRange(const Range& r) const +{ + return UMat(*this, r, Range::all()); +} + +inline +UMat UMat::colRange(int startcol, int endcol) const +{ + return UMat(*this, Range::all(), Range(startcol, endcol)); +} + +inline +UMat UMat::colRange(const Range& r) const +{ + return UMat(*this, Range::all(), r); +} + +inline +UMat UMat::operator()( Range _rowRange, Range _colRange ) const +{ + return UMat(*this, _rowRange, _colRange); +} + +inline +UMat UMat::operator()( const Rect& roi ) const +{ + return UMat(*this, roi); +} + +inline +UMat UMat::operator()(const Range* ranges) const +{ + return UMat(*this, ranges); +} + +inline +UMat UMat::operator()(const std::vector& ranges) const +{ + return UMat(*this, ranges); +} + +inline +bool UMat::isContinuous() const +{ + return (flags & CONTINUOUS_FLAG) != 0; +} + +inline +bool UMat::isSubmatrix() const +{ + return (flags & SUBMATRIX_FLAG) != 0; +} + +inline +size_t UMat::elemSize() const +{ + size_t res = dims > 0 ? step.p[dims - 1] : 0; + CV_DbgAssert(res != 0); + return res; +} + +inline +size_t UMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int UMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int UMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int UMat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +size_t UMat::step1(int i) const +{ + return step.p[i] / elemSize1(); +} + + +inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; } +inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; } +inline bool UMatData::deviceMemMapped() const { return (flags & DEVICE_MEM_MAPPED) != 0; } +inline bool UMatData::copyOnMap() const { return (flags & COPY_ON_MAP) != 0; } +inline bool UMatData::tempUMat() const { return (flags & TEMP_UMAT) != 0; } +inline bool UMatData::tempCopiedUMat() const { return (flags & TEMP_COPIED_UMAT) == TEMP_COPIED_UMAT; } + +inline void UMatData::markDeviceMemMapped(bool flag) +{ + if(flag) + flags |= DEVICE_MEM_MAPPED; + else + flags &= ~DEVICE_MEM_MAPPED; +} + +inline void UMatData::markHostCopyObsolete(bool flag) +{ + if(flag) + flags |= HOST_COPY_OBSOLETE; + else + flags &= ~HOST_COPY_OBSOLETE; +} +inline void UMatData::markDeviceCopyObsolete(bool flag) +{ + if(flag) + flags |= DEVICE_COPY_OBSOLETE; + else + flags &= ~DEVICE_COPY_OBSOLETE; +} + +//! @endcond + +static inline +void swap(MatExpr& a, MatExpr& b) { a.swap(b); } + +} //cv + +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + +#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS +#undef CV_DISABLE_CLANG_ENUM_WARNINGS +#pragma clang diagnostic pop +#endif + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/matx.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/matx.hpp new file mode 100644 index 0000000..162ce6e --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/matx.hpp @@ -0,0 +1,1528 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_MATX_HPP +#define OPENCV_CORE_MATX_HPP + +#ifndef __cplusplus +# error matx.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/base.hpp" +#include "opencv2/core/traits.hpp" +#include "opencv2/core/saturate.hpp" + +#include + +namespace cv +{ + +//! @addtogroup core_basic +//! @{ + +////////////////////////////// Small Matrix /////////////////////////// + +//! @cond IGNORED +// FIXIT Remove this (especially CV_EXPORTS modifier) +struct CV_EXPORTS Matx_AddOp { Matx_AddOp() {} Matx_AddOp(const Matx_AddOp&) {} }; +struct CV_EXPORTS Matx_SubOp { Matx_SubOp() {} Matx_SubOp(const Matx_SubOp&) {} }; +struct CV_EXPORTS Matx_ScaleOp { Matx_ScaleOp() {} Matx_ScaleOp(const Matx_ScaleOp&) {} }; +struct CV_EXPORTS Matx_MulOp { Matx_MulOp() {} Matx_MulOp(const Matx_MulOp&) {} }; +struct CV_EXPORTS Matx_DivOp { Matx_DivOp() {} Matx_DivOp(const Matx_DivOp&) {} }; +struct CV_EXPORTS Matx_MatMulOp { Matx_MatMulOp() {} Matx_MatMulOp(const Matx_MatMulOp&) {} }; +struct CV_EXPORTS Matx_TOp { Matx_TOp() {} Matx_TOp(const Matx_TOp&) {} }; +//! @endcond + +/** @brief Template class for small matrices whose type and size are known at compilation time + +If you need a more flexible type, use Mat . The elements of the matrix M are accessible using the +M(i,j) notation. Most of the common matrix operations (see also @ref MatrixExpressions ) are +available. To do an operation on Matx that is not implemented, you can easily convert the matrix to +Mat and backwards: +@code{.cpp} + Matx33f m(1, 2, 3, + 4, 5, 6, + 7, 8, 9); + cout << sum(Mat(m*m.t())) << endl; +@endcode +Except of the plain constructor which takes a list of elements, Matx can be initialized from a C-array: +@code{.cpp} + float values[] = { 1, 2, 3}; + Matx31f m(values); +@endcode +In case if C++11 features are available, std::initializer_list can be also used to initialize Matx: +@code{.cpp} + Matx31f m = { 1, 2, 3}; +@endcode + */ +template class Matx +{ +public: + enum { + rows = m, + cols = n, + channels = rows*cols, +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = traits::Type<_Tp>::value, + type = CV_MAKETYPE(depth, channels), +#endif + shortdim = (m < n ? m : n) + }; + + typedef _Tp value_type; + typedef Matx<_Tp, m, n> mat_type; + typedef Matx<_Tp, shortdim, 1> diag_type; + + //! default constructor + Matx(); + + explicit Matx(_Tp v0); //!< 1x1 matrix + Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 1x5 or 5x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 1x6, 2x3, 3x2 or 6x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 1x7 or 7x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 1x8, 2x4, 4x2 or 8x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 1x9, 3x3 or 9x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 1x10, 2x5 or 5x2 or 10x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, + _Tp v4, _Tp v5, _Tp v6, _Tp v7, + _Tp v8, _Tp v9, _Tp v10, _Tp v11); //!< 1x12, 2x6, 3x4, 4x3, 6x2 or 12x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, + _Tp v4, _Tp v5, _Tp v6, _Tp v7, + _Tp v8, _Tp v9, _Tp v10, _Tp v11, + _Tp v12, _Tp v13); //!< 1x14, 2x7, 7x2 or 14x1 matrix + Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, + _Tp v4, _Tp v5, _Tp v6, _Tp v7, + _Tp v8, _Tp v9, _Tp v10, _Tp v11, + _Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix + explicit Matx(const _Tp* vals); //!< initialize from a plain array + + Matx(std::initializer_list<_Tp>); //!< initialize from an initializer list + + CV_NODISCARD_STD static Matx all(_Tp alpha); + CV_NODISCARD_STD static Matx zeros(); + CV_NODISCARD_STD static Matx ones(); + CV_NODISCARD_STD static Matx eye(); + CV_NODISCARD_STD static Matx diag(const diag_type& d); + /** @brief Generates uniformly distributed random numbers + @param a Range boundary. + @param b The other range boundary (boundaries don't have to be ordered, the lower boundary is inclusive, + the upper one is exclusive). + */ + CV_NODISCARD_STD static Matx randu(_Tp a, _Tp b); + /** @brief Generates normally distributed random numbers + @param a Mean value. + @param b Standard deviation. + */ + CV_NODISCARD_STD static Matx randn(_Tp a, _Tp b); + + //! dot product computed with the default precision + _Tp dot(const Matx<_Tp, m, n>& v) const; + + //! dot product computed in double-precision arithmetics + double ddot(const Matx<_Tp, m, n>& v) const; + + //! conversion to another data type + template operator Matx() const; + + //! change the matrix shape + template Matx<_Tp, m1, n1> reshape() const; + + //! extract part of the matrix + template Matx<_Tp, m1, n1> get_minor(int base_row, int base_col) const; + + //! extract the matrix row + Matx<_Tp, 1, n> row(int i) const; + + //! extract the matrix column + Matx<_Tp, m, 1> col(int i) const; + + //! extract the matrix diagonal + diag_type diag() const; + + //! transpose the matrix + Matx<_Tp, n, m> t() const; + + //! invert the matrix + Matx<_Tp, n, m> inv(int method=DECOMP_LU, bool *p_is_ok = NULL) const; + + //! solve linear system + template Matx<_Tp, n, l> solve(const Matx<_Tp, m, l>& rhs, int flags=DECOMP_LU) const; + Vec<_Tp, n> solve(const Vec<_Tp, m>& rhs, int method) const; + + //! multiply two matrices element-wise + Matx<_Tp, m, n> mul(const Matx<_Tp, m, n>& a) const; + + //! divide two matrices element-wise + Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const; + + //! element access + const _Tp& operator ()(int row, int col) const; + _Tp& operator ()(int row, int col); + + //! 1D element access + const _Tp& operator ()(int i) const; + _Tp& operator ()(int i); + + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp); + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp); + template Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp); + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp); + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp); + template Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp); + Matx(const Matx<_Tp, n, m>& a, Matx_TOp); + + _Tp val[m*n]; //< matrix elements +}; + +typedef Matx Matx12f; +typedef Matx Matx12d; +typedef Matx Matx13f; +typedef Matx Matx13d; +typedef Matx Matx14f; +typedef Matx Matx14d; +typedef Matx Matx16f; +typedef Matx Matx16d; + +typedef Matx Matx21f; +typedef Matx Matx21d; +typedef Matx Matx31f; +typedef Matx Matx31d; +typedef Matx Matx41f; +typedef Matx Matx41d; +typedef Matx Matx61f; +typedef Matx Matx61d; + +typedef Matx Matx22f; +typedef Matx Matx22d; +typedef Matx Matx23f; +typedef Matx Matx23d; +typedef Matx Matx32f; +typedef Matx Matx32d; + +typedef Matx Matx33f; +typedef Matx Matx33d; + +typedef Matx Matx34f; +typedef Matx Matx34d; +typedef Matx Matx43f; +typedef Matx Matx43d; + +typedef Matx Matx44f; +typedef Matx Matx44d; +typedef Matx Matx66f; +typedef Matx Matx66d; + +/*! + traits +*/ +template class DataType< Matx<_Tp, m, n> > +{ +public: + typedef Matx<_Tp, m, n> value_type; + typedef Matx::work_type, m, n> work_type; + typedef _Tp channel_type; + typedef value_type vec_type; + + enum { generic_type = 0, + channels = m * n, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; +}; + +namespace traits { +template +struct Depth< Matx<_Tp, m, n> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Matx<_Tp, m, n> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, n*m) }; }; +} // namespace + + +/** @brief Comma-separated Matrix Initializer +*/ +template class MatxCommaInitializer +{ +public: + MatxCommaInitializer(Matx<_Tp, m, n>* _mtx); + template MatxCommaInitializer<_Tp, m, n>& operator , (T2 val); + Matx<_Tp, m, n> operator *() const; + + Matx<_Tp, m, n>* dst; + int idx; +}; + +/* + Utility methods +*/ +template static double determinant(const Matx<_Tp, m, m>& a); +template static double trace(const Matx<_Tp, m, n>& a); +template static double norm(const Matx<_Tp, m, n>& M); +template static double norm(const Matx<_Tp, m, n>& M, int normType); + + + +/////////////////////// Vec (used as element of multi-channel images ///////////////////// + +/** @brief Template class for short numerical vectors, a partial case of Matx + +This template class represents short numerical vectors (of 1, 2, 3, 4 ... elements) on which you +can perform basic arithmetical operations, access individual elements using [] operator etc. The +vectors are allocated on stack, as opposite to std::valarray, std::vector, cv::Mat etc., which +elements are dynamically allocated in the heap. + +The template takes 2 parameters: +@tparam _Tp element type +@tparam cn the number of elements + +In addition to the universal notation like Vec, you can use shorter aliases +for the most popular specialized variants of Vec, e.g. Vec3f ~ Vec. + +It is possible to convert Vec\ to/from Point_, Vec\ to/from Point3_ , and Vec\ +to CvScalar or Scalar_. Use operator[] to access the elements of Vec. + +All the expected vector operations are also implemented: +- v1 = v2 + v3 +- v1 = v2 - v3 +- v1 = v2 \* scale +- v1 = scale \* v2 +- v1 = -v2 +- v1 += v2 and other augmenting operations +- v1 == v2, v1 != v2 +- norm(v1) (euclidean norm) +The Vec class is commonly used to describe pixel types of multi-channel arrays. See Mat for details. +*/ +template class Vec : public Matx<_Tp, cn, 1> +{ +public: + typedef _Tp value_type; + enum { + channels = cn, +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = Matx<_Tp, cn, 1>::depth, + type = CV_MAKETYPE(depth, channels), +#endif + _dummy_enum_finalizer = 0 + }; + + //! default constructor + Vec(); + + Vec(_Tp v0); //!< 1-element vector constructor + Vec(_Tp v0, _Tp v1); //!< 2-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2); //!< 3-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 4-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 5-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 6-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 7-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 8-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 9-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 10-element vector constructor + Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor + explicit Vec(const _Tp* values); + + Vec(std::initializer_list<_Tp>); + + Vec(const Vec<_Tp, cn>& v); + + static Vec all(_Tp alpha); + static Vec ones(); + static Vec randn(_Tp a, _Tp b); + static Vec randu(_Tp a, _Tp b); + static Vec zeros(); +#ifdef CV_CXX11 + static Vec diag(_Tp alpha) = delete; + static Vec eye() = delete; +#endif + + //! per-element multiplication + Vec mul(const Vec<_Tp, cn>& v) const; + + //! conjugation (makes sense for complex numbers and quaternions) + Vec conj() const; + + /*! + cross product of the two 3D vectors. + + For other dimensionalities the exception is raised + */ + Vec cross(const Vec& v) const; + //! conversion to another data type + template operator Vec() const; + + /*! element access */ + const _Tp& operator [](int i) const; + _Tp& operator[](int i); + const _Tp& operator ()(int i) const; + _Tp& operator ()(int i); + +#ifdef CV_CXX11 + Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default; +#endif + + Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp); + Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp); + template Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp); +}; + +/** @name Shorter aliases for the most popular specializations of Vec + @{ +*/ +typedef Vec Vec2b; +typedef Vec Vec3b; +typedef Vec Vec4b; + +typedef Vec Vec2s; +typedef Vec Vec3s; +typedef Vec Vec4s; + +typedef Vec Vec2w; +typedef Vec Vec3w; +typedef Vec Vec4w; + +typedef Vec Vec2i; +typedef Vec Vec3i; +typedef Vec Vec4i; +typedef Vec Vec6i; +typedef Vec Vec8i; + +typedef Vec Vec2f; +typedef Vec Vec3f; +typedef Vec Vec4f; +typedef Vec Vec6f; + +typedef Vec Vec2d; +typedef Vec Vec3d; +typedef Vec Vec4d; +typedef Vec Vec6d; +/** @} */ + +/*! + traits +*/ +template class DataType< Vec<_Tp, cn> > +{ +public: + typedef Vec<_Tp, cn> value_type; + typedef Vec::work_type, cn> work_type; + typedef _Tp channel_type; + typedef value_type vec_type; + + enum { generic_type = 0, + channels = cn, + fmt = DataType::fmt + ((channels - 1) << 8), +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + depth = DataType::depth, + type = CV_MAKETYPE(depth, channels), +#endif + _dummy_enum_finalizer = 0 + }; +}; + +namespace traits { +template +struct Depth< Vec<_Tp, cn> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Vec<_Tp, cn> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, cn) }; }; +} // namespace + + +/** @brief Comma-separated Vec Initializer +*/ +template class VecCommaInitializer : public MatxCommaInitializer<_Tp, m, 1> +{ +public: + VecCommaInitializer(Vec<_Tp, m>* _vec); + template VecCommaInitializer<_Tp, m>& operator , (T2 val); + Vec<_Tp, m> operator *() const; +}; + +template static Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v); + +//! @} core_basic + +//! @cond IGNORED + +///////////////////////////////////// helper classes ///////////////////////////////////// +namespace internal +{ + +template struct Matx_DetOp +{ + double operator ()(const Matx<_Tp, m, m>& a) const + { + Matx<_Tp, m, m> temp = a; + double p = LU(temp.val, m*sizeof(_Tp), m, 0, 0, 0); + if( p == 0 ) + return p; + for( int i = 0; i < m; i++ ) + p *= temp(i, i); + return p; + } +}; + +template struct Matx_DetOp<_Tp, 1> +{ + double operator ()(const Matx<_Tp, 1, 1>& a) const + { + return a(0,0); + } +}; + +template struct Matx_DetOp<_Tp, 2> +{ + double operator ()(const Matx<_Tp, 2, 2>& a) const + { + return a(0,0)*a(1,1) - a(0,1)*a(1,0); + } +}; + +template struct Matx_DetOp<_Tp, 3> +{ + double operator ()(const Matx<_Tp, 3, 3>& a) const + { + return a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2)) - + a(0,1)*(a(1,0)*a(2,2) - a(2,0)*a(1,2)) + + a(0,2)*(a(1,0)*a(2,1) - a(2,0)*a(1,1)); + } +}; + +template Vec<_Tp, 2> inline conjugate(const Vec<_Tp, 2>& v) +{ + return Vec<_Tp, 2>(v[0], -v[1]); +} + +template Vec<_Tp, 4> inline conjugate(const Vec<_Tp, 4>& v) +{ + return Vec<_Tp, 4>(v[0], -v[1], -v[2], -v[3]); +} + +} // internal + + + +////////////////////////////////// Matx Implementation /////////////////////////////////// + +template inline +Matx<_Tp, m, n>::Matx() +{ + for(int i = 0; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0) +{ + val[0] = v0; + for(int i = 1; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1) +{ + CV_StaticAssert(channels >= 2, "Matx should have at least 2 elements."); + val[0] = v0; val[1] = v1; + for(int i = 2; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2) +{ + CV_StaticAssert(channels >= 3, "Matx should have at least 3 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; + for(int i = 3; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3) +{ + CV_StaticAssert(channels >= 4, "Matx should have at least 4 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + for(int i = 4; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4) +{ + CV_StaticAssert(channels >= 5, "Matx should have at least 5 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; val[4] = v4; + for(int i = 5; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5) +{ + CV_StaticAssert(channels >= 6, "Matx should have at least 6 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; + for(int i = 6; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6) +{ + CV_StaticAssert(channels >= 7, "Matx should have at least 7 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; + for(int i = 7; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7) +{ + CV_StaticAssert(channels >= 8, "Matx should have at least 8 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + for(int i = 8; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8) +{ + CV_StaticAssert(channels >= 9, "Matx should have at least 9 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; + for(int i = 9; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9) +{ + CV_StaticAssert(channels >= 10, "Matx should have at least 10 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; + for(int i = 10; i < channels; i++) val[i] = _Tp(0); +} + + +template inline +Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11) +{ + CV_StaticAssert(channels >= 12, "Matx should have at least 12 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11; + for(int i = 12; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13) +{ + CV_StaticAssert(channels >= 14, "Matx should have at least 14 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11; + val[12] = v12; val[13] = v13; + for (int i = 14; i < channels; i++) val[i] = _Tp(0); +} + + +template inline +Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13, _Tp v14, _Tp v15) +{ + CV_StaticAssert(channels >= 16, "Matx should have at least 16 elements."); + val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; + val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7; + val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11; + val[12] = v12; val[13] = v13; val[14] = v14; val[15] = v15; + for(int i = 16; i < channels; i++) val[i] = _Tp(0); +} + +template inline +Matx<_Tp, m, n>::Matx(const _Tp* values) +{ + for( int i = 0; i < channels; i++ ) val[i] = values[i]; +} + +template inline +Matx<_Tp, m, n>::Matx(std::initializer_list<_Tp> list) +{ + CV_DbgAssert(list.size() == channels); + int i = 0; + for(const auto& elem : list) + { + val[i++] = elem; + } +} + +template inline +Matx<_Tp, m, n> Matx<_Tp, m, n>::all(_Tp alpha) +{ + Matx<_Tp, m, n> M; + for( int i = 0; i < m*n; i++ ) M.val[i] = alpha; + return M; +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::zeros() +{ + return all(0); +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::ones() +{ + return all(1); +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::eye() +{ + Matx<_Tp,m,n> M; + for(int i = 0; i < shortdim; i++) + M(i,i) = 1; + return M; +} + +template inline +_Tp Matx<_Tp, m, n>::dot(const Matx<_Tp, m, n>& M) const +{ + _Tp s = 0; + for( int i = 0; i < channels; i++ ) s += val[i]*M.val[i]; + return s; +} + +template inline +double Matx<_Tp, m, n>::ddot(const Matx<_Tp, m, n>& M) const +{ + double s = 0; + for( int i = 0; i < channels; i++ ) s += (double)val[i]*M.val[i]; + return s; +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::diag(const typename Matx<_Tp,m,n>::diag_type& d) +{ + Matx<_Tp,m,n> M; + for(int i = 0; i < shortdim; i++) + M(i,i) = d(i, 0); + return M; +} + +template template +inline Matx<_Tp, m, n>::operator Matx() const +{ + Matx M; + for( int i = 0; i < m*n; i++ ) M.val[i] = saturate_cast(val[i]); + return M; +} + +template template inline +Matx<_Tp, m1, n1> Matx<_Tp, m, n>::reshape() const +{ + CV_StaticAssert(m1*n1 == m*n, "Input and destnarion matrices must have the same number of elements"); + return (const Matx<_Tp, m1, n1>&)*this; +} + +template +template inline +Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int base_row, int base_col) const +{ + CV_DbgAssert(0 <= base_row && base_row+m1 <= m && 0 <= base_col && base_col+n1 <= n); + Matx<_Tp, m1, n1> s; + for( int di = 0; di < m1; di++ ) + for( int dj = 0; dj < n1; dj++ ) + s(di, dj) = (*this)(base_row+di, base_col+dj); + return s; +} + +template inline +Matx<_Tp, 1, n> Matx<_Tp, m, n>::row(int i) const +{ + CV_DbgAssert((unsigned)i < (unsigned)m); + return Matx<_Tp, 1, n>(&val[i*n]); +} + +template inline +Matx<_Tp, m, 1> Matx<_Tp, m, n>::col(int j) const +{ + CV_DbgAssert((unsigned)j < (unsigned)n); + Matx<_Tp, m, 1> v; + for( int i = 0; i < m; i++ ) + v.val[i] = val[i*n + j]; + return v; +} + +template inline +typename Matx<_Tp, m, n>::diag_type Matx<_Tp, m, n>::diag() const +{ + diag_type d; + for( int i = 0; i < shortdim; i++ ) + d.val[i] = val[i*n + i]; + return d; +} + +template inline +const _Tp& Matx<_Tp, m, n>::operator()(int row_idx, int col_idx) const +{ + CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n ); + return this->val[row_idx*n + col_idx]; +} + +template inline +_Tp& Matx<_Tp, m, n>::operator ()(int row_idx, int col_idx) +{ + CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n ); + return val[row_idx*n + col_idx]; +} + +template inline +const _Tp& Matx<_Tp, m, n>::operator ()(int i) const +{ + CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row"); + CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) ); + return val[i]; +} + +template inline +_Tp& Matx<_Tp, m, n>::operator ()(int i) +{ + CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row"); + CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) ); + return val[i]; +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] + b.val[i]); +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] - b.val[i]); +} + +template template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] * alpha); +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] * b.val[i]); +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] / b.val[i]); +} + +template template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp) +{ + for( int i = 0; i < m; i++ ) + for( int j = 0; j < n; j++ ) + { + _Tp s = 0; + for( int k = 0; k < l; k++ ) + s += a(i, k) * b(k, j); + val[i*n + j] = s; + } +} + +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, n, m>& a, Matx_TOp) +{ + for( int i = 0; i < m; i++ ) + for( int j = 0; j < n; j++ ) + val[i*n + j] = a(j, i); +} + +template inline +Matx<_Tp, m, n> Matx<_Tp, m, n>::mul(const Matx<_Tp, m, n>& a) const +{ + return Matx<_Tp, m, n>(*this, a, Matx_MulOp()); +} + +template inline +Matx<_Tp, m, n> Matx<_Tp, m, n>::div(const Matx<_Tp, m, n>& a) const +{ + return Matx<_Tp, m, n>(*this, a, Matx_DivOp()); +} + +template inline +Matx<_Tp, n, m> Matx<_Tp, m, n>::t() const +{ + return Matx<_Tp, n, m>(*this, Matx_TOp()); +} + +template inline +Vec<_Tp, n> Matx<_Tp, m, n>::solve(const Vec<_Tp, m>& rhs, int method) const +{ + Matx<_Tp, n, 1> x = solve((const Matx<_Tp, m, 1>&)(rhs), method); + return (Vec<_Tp, n>&)(x); +} + +template static inline +double determinant(const Matx<_Tp, m, m>& a) +{ + return cv::internal::Matx_DetOp<_Tp, m>()(a); +} + +template static inline +double trace(const Matx<_Tp, m, n>& a) +{ + _Tp s = 0; + for( int i = 0; i < std::min(m, n); i++ ) + s += a(i,i); + return s; +} + +template static inline +double norm(const Matx<_Tp, m, n>& M) +{ + return std::sqrt(normL2Sqr<_Tp, double>(M.val, m*n)); +} + +template static inline +double norm(const Matx<_Tp, m, n>& M, int normType) +{ + switch(normType) { + case NORM_INF: + return (double)normInf<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n); + case NORM_L1: + return (double)normL1<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n); + case NORM_L2SQR: + return (double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n); + default: + case NORM_L2: + return std::sqrt((double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n)); + } +} + + + +//////////////////////////////// matx comma initializer ////////////////////////////////// + +template static inline +MatxCommaInitializer<_Tp, m, n> operator << (const Matx<_Tp, m, n>& mtx, _T2 val) +{ + MatxCommaInitializer<_Tp, m, n> commaInitializer((Matx<_Tp, m, n>*)&mtx); + return (commaInitializer, val); +} + +template inline +MatxCommaInitializer<_Tp, m, n>::MatxCommaInitializer(Matx<_Tp, m, n>* _mtx) + : dst(_mtx), idx(0) +{} + +template template inline +MatxCommaInitializer<_Tp, m, n>& MatxCommaInitializer<_Tp, m, n>::operator , (_T2 value) +{ + CV_DbgAssert( idx < m*n ); + dst->val[idx++] = saturate_cast<_Tp>(value); + return *this; +} + +template inline +Matx<_Tp, m, n> MatxCommaInitializer<_Tp, m, n>::operator *() const +{ + CV_DbgAssert( idx == n*m ); + return *dst; +} + + + +/////////////////////////////////// Vec Implementation /////////////////////////////////// + +template inline +Vec<_Tp, cn>::Vec() {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0) + : Matx<_Tp, cn, 1>(v0) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1) + : Matx<_Tp, cn, 1>(v0, v1) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2) + : Matx<_Tp, cn, 1>(v0, v1, v2) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9) {} + +template inline +Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13) + : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) {} + +template inline +Vec<_Tp, cn>::Vec(const _Tp* values) + : Matx<_Tp, cn, 1>(values) {} + +template inline +Vec<_Tp, cn>::Vec(std::initializer_list<_Tp> list) + : Matx<_Tp, cn, 1>(list) {} + +template inline +Vec<_Tp, cn>::Vec(const Vec<_Tp, cn>& m) + : Matx<_Tp, cn, 1>(m.val) {} + +template inline +Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp op) + : Matx<_Tp, cn, 1>(a, b, op) {} + +template inline +Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp op) + : Matx<_Tp, cn, 1>(a, b, op) {} + +template template inline +Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp op) + : Matx<_Tp, cn, 1>(a, alpha, op) {} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha) +{ + Vec v; + for( int i = 0; i < cn; i++ ) v.val[i] = alpha; + return v; +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::ones() +{ + return Vec::all(1); +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::zeros() +{ + return Vec::all(0); +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const +{ + Vec<_Tp, cn> w; + for( int i = 0; i < cn; i++ ) w.val[i] = saturate_cast<_Tp>(this->val[i]*v.val[i]); + return w; +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template<> inline +Vec Vec::conj() const +{ + return cv::internal::conjugate(*this); +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::cross(const Vec<_Tp, cn>&) const +{ + CV_StaticAssert(cn == 3, "for arbitrary-size vector there is no cross-product defined"); + return Vec<_Tp, cn>(); +} + +template<> inline +Vec Vec::cross(const Vec& v) const +{ + return Vec(this->val[1]*v.val[2] - this->val[2]*v.val[1], + this->val[2]*v.val[0] - this->val[0]*v.val[2], + this->val[0]*v.val[1] - this->val[1]*v.val[0]); +} + +template<> inline +Vec Vec::cross(const Vec& v) const +{ + return Vec(this->val[1]*v.val[2] - this->val[2]*v.val[1], + this->val[2]*v.val[0] - this->val[0]*v.val[2], + this->val[0]*v.val[1] - this->val[1]*v.val[0]); +} + +template template inline +Vec<_Tp, cn>::operator Vec() const +{ + Vec v; + for( int i = 0; i < cn; i++ ) v.val[i] = saturate_cast(this->val[i]); + return v; +} + +template inline +const _Tp& Vec<_Tp, cn>::operator [](int i) const +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +_Tp& Vec<_Tp, cn>::operator [](int i) +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +const _Tp& Vec<_Tp, cn>::operator ()(int i) const +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +_Tp& Vec<_Tp, cn>::operator ()(int i) +{ + CV_DbgAssert( (unsigned)i < (unsigned)cn ); + return this->val[i]; +} + +template inline +Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v) +{ + double nv = norm(v); + return v * (nv ? 1./nv : 0.); +} + + + +//////////////////////////////// vec comma initializer ////////////////////////////////// + + +template static inline +VecCommaInitializer<_Tp, cn> operator << (const Vec<_Tp, cn>& vec, _T2 val) +{ + VecCommaInitializer<_Tp, cn> commaInitializer((Vec<_Tp, cn>*)&vec); + return (commaInitializer, val); +} + +template inline +VecCommaInitializer<_Tp, cn>::VecCommaInitializer(Vec<_Tp, cn>* _vec) + : MatxCommaInitializer<_Tp, cn, 1>(_vec) +{} + +template template inline +VecCommaInitializer<_Tp, cn>& VecCommaInitializer<_Tp, cn>::operator , (_T2 value) +{ + CV_DbgAssert( this->idx < cn ); + this->dst->val[this->idx++] = saturate_cast<_Tp>(value); + return *this; +} + +template inline +Vec<_Tp, cn> VecCommaInitializer<_Tp, cn>::operator *() const +{ + CV_DbgAssert( this->idx == cn ); + return *this->dst; +} + +//! @endcond + +///////////////////////////// Matx out-of-class operators //////////////////////////////// + +//! @relates cv::Matx +//! @{ + +template static inline +Matx<_Tp1, m, n>& operator += (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]); + return a; +} + +template static inline +Matx<_Tp1, m, n>& operator -= (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]); + return a; +} + +template static inline +Matx<_Tp, m, n> operator + (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + return Matx<_Tp, m, n>(a, b, Matx_AddOp()); +} + +template static inline +Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + return Matx<_Tp, m, n>(a, b, Matx_SubOp()); +} + +template static inline +Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, int alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha); + return a; +} + +template static inline +Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, float alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha); + return a; +} + +template static inline +Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, double alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha); + return a; +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, int alpha) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, float alpha) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, double alpha) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (int alpha, const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (float alpha, const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, float alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = a.val[i] / alpha; + return a; +} + +template static inline +Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, double alpha) +{ + for( int i = 0; i < m*n; i++ ) + a.val[i] = a.val[i] / alpha; + return a; +} + +template static inline +Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, float alpha) +{ + return Matx<_Tp, m, n>(a, 1.f/alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, double alpha) +{ + return Matx<_Tp, m, n>(a, 1./alpha, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a) +{ + return Matx<_Tp, m, n>(a, -1, Matx_ScaleOp()); +} + +template static inline +Matx<_Tp, m, n> operator * (const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b) +{ + return Matx<_Tp, m, n>(a, b, Matx_MatMulOp()); +} + +template static inline +Vec<_Tp, m> operator * (const Matx<_Tp, m, n>& a, const Vec<_Tp, n>& b) +{ + Matx<_Tp, m, 1> c(a, b, Matx_MatMulOp()); + return (const Vec<_Tp, m>&)(c); +} + +template static inline +bool operator == (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + for( int i = 0; i < m*n; i++ ) + if( a.val[i] != b.val[i] ) return false; + return true; +} + +template static inline +bool operator != (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + return !(a == b); +} + +//! @} + +////////////////////////////// Vec out-of-class operators //////////////////////////////// + +//! @relates cv::Vec +//! @{ + +template static inline +Vec<_Tp1, cn>& operator += (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b) +{ + for( int i = 0; i < cn; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]); + return a; +} + +template static inline +Vec<_Tp1, cn>& operator -= (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b) +{ + for( int i = 0; i < cn; i++ ) + a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]); + return a; +} + +template static inline +Vec<_Tp, cn> operator + (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b) +{ + return Vec<_Tp, cn>(a, b, Matx_AddOp()); +} + +template static inline +Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b) +{ + return Vec<_Tp, cn>(a, b, Matx_SubOp()); +} + +template static inline +Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, int alpha) +{ + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*alpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, float alpha) +{ + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*alpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, double alpha) +{ + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*alpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, int alpha) +{ + double ialpha = 1./alpha; + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*ialpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, float alpha) +{ + float ialpha = 1.f/alpha; + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*ialpha); + return a; +} + +template static inline +Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, double alpha) +{ + double ialpha = 1./alpha; + for( int i = 0; i < cn; i++ ) + a[i] = saturate_cast<_Tp>(a[i]*ialpha); + return a; +} + +template static inline +Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, int alpha) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (int alpha, const Vec<_Tp, cn>& a) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, float alpha) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (float alpha, const Vec<_Tp, cn>& a) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, double alpha) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator * (double alpha, const Vec<_Tp, cn>& a) +{ + return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, int alpha) +{ + return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, float alpha) +{ + return Vec<_Tp, cn>(a, 1.f/alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, double alpha) +{ + return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp()); +} + +template static inline +Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a) +{ + Vec<_Tp,cn> t; + for( int i = 0; i < cn; i++ ) t.val[i] = saturate_cast<_Tp>(-a.val[i]); + return t; +} + +template inline Vec<_Tp, 4> operator * (const Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2) +{ + return Vec<_Tp, 4>(saturate_cast<_Tp>(v1[0]*v2[0] - v1[1]*v2[1] - v1[2]*v2[2] - v1[3]*v2[3]), + saturate_cast<_Tp>(v1[0]*v2[1] + v1[1]*v2[0] + v1[2]*v2[3] - v1[3]*v2[2]), + saturate_cast<_Tp>(v1[0]*v2[2] - v1[1]*v2[3] + v1[2]*v2[0] + v1[3]*v2[1]), + saturate_cast<_Tp>(v1[0]*v2[3] + v1[1]*v2[2] - v1[2]*v2[1] + v1[3]*v2[0])); +} + +template inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2) +{ + v1 = v1 * v2; + return v1; +} + +//! @} + +} // cv + +#endif // OPENCV_CORE_MATX_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/neon_utils.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/neon_utils.hpp new file mode 100644 index 0000000..573ba99 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/neon_utils.hpp @@ -0,0 +1,128 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_HAL_NEON_UTILS_HPP +#define OPENCV_HAL_NEON_UTILS_HPP + +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils_neon +//! @{ + +#if CV_NEON + +inline int32x2_t cv_vrnd_s32_f32(float32x2_t v) +{ + static int32x2_t v_sign = vdup_n_s32(1 << 31), + v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f)); + + int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v))); + return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition))); +} + +inline int32x4_t cv_vrndq_s32_f32(float32x4_t v) +{ + static int32x4_t v_sign = vdupq_n_s32(1 << 31), + v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); + + int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v))); + return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition))); +} + +inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v) +{ + static float32x2_t v_05 = vdup_n_f32(0.5f); + return vcvt_u32_f32(vadd_f32(v, v_05)); +} + +inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v) +{ + static float32x4_t v_05 = vdupq_n_f32(0.5f); + return vcvtq_u32_f32(vaddq_f32(v, v_05)); +} + +inline float32x4_t cv_vrecpq_f32(float32x4_t val) +{ + float32x4_t reciprocal = vrecpeq_f32(val); + reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); + reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal); + return reciprocal; +} + +inline float32x2_t cv_vrecp_f32(float32x2_t val) +{ + float32x2_t reciprocal = vrecpe_f32(val); + reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal); + reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal); + return reciprocal; +} + +inline float32x4_t cv_vrsqrtq_f32(float32x4_t val) +{ + float32x4_t e = vrsqrteq_f32(val); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e); + e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e); + return e; +} + +inline float32x2_t cv_vrsqrt_f32(float32x2_t val) +{ + float32x2_t e = vrsqrte_f32(val); + e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e); + e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e); + return e; +} + +inline float32x4_t cv_vsqrtq_f32(float32x4_t val) +{ + return cv_vrecpq_f32(cv_vrsqrtq_f32(val)); +} + +inline float32x2_t cv_vsqrt_f32(float32x2_t val) +{ + return cv_vrecp_f32(cv_vrsqrt_f32(val)); +} + +#endif + +//! @} + +#endif // OPENCV_HAL_NEON_UTILS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ocl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ocl.hpp new file mode 100644 index 0000000..642b050 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ocl.hpp @@ -0,0 +1,902 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_OPENCL_HPP +#define OPENCV_OPENCL_HPP + +#include "opencv2/core.hpp" +#include +#include + +namespace cv { namespace ocl { + +//! @addtogroup core_opencl +//! @{ + +CV_EXPORTS_W bool haveOpenCL(); +CV_EXPORTS_W bool useOpenCL(); +CV_EXPORTS_W bool haveAmdBlas(); +CV_EXPORTS_W bool haveAmdFft(); +CV_EXPORTS_W void setUseOpenCL(bool flag); +CV_EXPORTS_W void finish(); + +CV_EXPORTS bool haveSVM(); + +class CV_EXPORTS Context; +class CV_EXPORTS_W_SIMPLE Device; +class CV_EXPORTS Kernel; +class CV_EXPORTS Program; +class CV_EXPORTS ProgramSource; +class CV_EXPORTS Queue; +class CV_EXPORTS PlatformInfo; +class CV_EXPORTS Image2D; + +class CV_EXPORTS_W_SIMPLE Device +{ +public: + CV_WRAP Device() CV_NOEXCEPT; + explicit Device(void* d); + Device(const Device& d); + Device& operator = (const Device& d); + Device(Device&& d) CV_NOEXCEPT; + Device& operator = (Device&& d) CV_NOEXCEPT; + CV_WRAP ~Device(); + + void set(void* d); + + enum + { + TYPE_DEFAULT = (1 << 0), + TYPE_CPU = (1 << 1), + TYPE_GPU = (1 << 2), + TYPE_ACCELERATOR = (1 << 3), + TYPE_DGPU = TYPE_GPU + (1 << 16), + TYPE_IGPU = TYPE_GPU + (1 << 17), + TYPE_ALL = 0xFFFFFFFF + }; + + CV_WRAP String name() const; + CV_WRAP String extensions() const; + CV_WRAP bool isExtensionSupported(const String& extensionName) const; + CV_WRAP String version() const; + CV_WRAP String vendorName() const; + CV_WRAP String OpenCL_C_Version() const; + CV_WRAP String OpenCLVersion() const; + CV_WRAP int deviceVersionMajor() const; + CV_WRAP int deviceVersionMinor() const; + CV_WRAP String driverVersion() const; + void* ptr() const; + + CV_WRAP int type() const; + + CV_WRAP int addressBits() const; + CV_WRAP bool available() const; + CV_WRAP bool compilerAvailable() const; + CV_WRAP bool linkerAvailable() const; + + enum + { + FP_DENORM=(1 << 0), + FP_INF_NAN=(1 << 1), + FP_ROUND_TO_NEAREST=(1 << 2), + FP_ROUND_TO_ZERO=(1 << 3), + FP_ROUND_TO_INF=(1 << 4), + FP_FMA=(1 << 5), + FP_SOFT_FLOAT=(1 << 6), + FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) + }; + CV_WRAP int doubleFPConfig() const; + CV_WRAP int singleFPConfig() const; + CV_WRAP int halfFPConfig() const; + + CV_WRAP bool endianLittle() const; + CV_WRAP bool errorCorrectionSupport() const; + + enum + { + EXEC_KERNEL=(1 << 0), + EXEC_NATIVE_KERNEL=(1 << 1) + }; + CV_WRAP int executionCapabilities() const; + + CV_WRAP size_t globalMemCacheSize() const; + + enum + { + NO_CACHE=0, + READ_ONLY_CACHE=1, + READ_WRITE_CACHE=2 + }; + CV_WRAP int globalMemCacheType() const; + CV_WRAP int globalMemCacheLineSize() const; + CV_WRAP size_t globalMemSize() const; + + CV_WRAP size_t localMemSize() const; + enum + { + NO_LOCAL_MEM=0, + LOCAL_IS_LOCAL=1, + LOCAL_IS_GLOBAL=2 + }; + CV_WRAP int localMemType() const; + CV_WRAP bool hostUnifiedMemory() const; + + CV_WRAP bool imageSupport() const; + + CV_WRAP bool imageFromBufferSupport() const; + uint imagePitchAlignment() const; + uint imageBaseAddressAlignment() const; + + /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value) + CV_WRAP bool intelSubgroupsSupport() const; + + CV_WRAP size_t image2DMaxWidth() const; + CV_WRAP size_t image2DMaxHeight() const; + + CV_WRAP size_t image3DMaxWidth() const; + CV_WRAP size_t image3DMaxHeight() const; + CV_WRAP size_t image3DMaxDepth() const; + + CV_WRAP size_t imageMaxBufferSize() const; + CV_WRAP size_t imageMaxArraySize() const; + + enum + { + UNKNOWN_VENDOR=0, + VENDOR_AMD=1, + VENDOR_INTEL=2, + VENDOR_NVIDIA=3 + }; + CV_WRAP int vendorID() const; + // FIXIT + // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform. + // This method should use platform name instead of vendor name. + // After fix restore code in arithm.cpp: ocl_compare() + CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; } + CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } + CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } + + CV_WRAP int maxClockFrequency() const; + CV_WRAP int maxComputeUnits() const; + CV_WRAP int maxConstantArgs() const; + CV_WRAP size_t maxConstantBufferSize() const; + + CV_WRAP size_t maxMemAllocSize() const; + CV_WRAP size_t maxParameterSize() const; + + CV_WRAP int maxReadImageArgs() const; + CV_WRAP int maxWriteImageArgs() const; + CV_WRAP int maxSamplers() const; + + CV_WRAP size_t maxWorkGroupSize() const; + CV_WRAP int maxWorkItemDims() const; + void maxWorkItemSizes(size_t*) const; + + CV_WRAP int memBaseAddrAlign() const; + + CV_WRAP int nativeVectorWidthChar() const; + CV_WRAP int nativeVectorWidthShort() const; + CV_WRAP int nativeVectorWidthInt() const; + CV_WRAP int nativeVectorWidthLong() const; + CV_WRAP int nativeVectorWidthFloat() const; + CV_WRAP int nativeVectorWidthDouble() const; + CV_WRAP int nativeVectorWidthHalf() const; + + CV_WRAP int preferredVectorWidthChar() const; + CV_WRAP int preferredVectorWidthShort() const; + CV_WRAP int preferredVectorWidthInt() const; + CV_WRAP int preferredVectorWidthLong() const; + CV_WRAP int preferredVectorWidthFloat() const; + CV_WRAP int preferredVectorWidthDouble() const; + CV_WRAP int preferredVectorWidthHalf() const; + + CV_WRAP size_t printfBufferSize() const; + CV_WRAP size_t profilingTimerResolution() const; + + CV_WRAP static const Device& getDefault(); + + /** + * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success. + * + * @note Ownership of the passed device is passed to OpenCV on success. + * The caller should additionally call `clRetainDevice` on it if it intends + * to continue using the device. + */ + static Device fromHandle(void* d); + + struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + + +class CV_EXPORTS Context +{ +public: + Context() CV_NOEXCEPT; + explicit Context(int dtype); //!< @deprecated + ~Context(); + Context(const Context& c); + Context& operator= (const Context& c); + Context(Context&& c) CV_NOEXCEPT; + Context& operator = (Context&& c) CV_NOEXCEPT; + + /** @deprecated */ + bool create(); + /** @deprecated */ + bool create(int dtype); + + size_t ndevices() const; + Device& device(size_t idx) const; + Program getProg(const ProgramSource& prog, + const String& buildopt, String& errmsg); + void unloadProg(Program& prog); + + + /** Get thread-local OpenCL context (initialize if necessary) */ +#if 0 // OpenCV 5.0 + static Context& getDefault(); +#else + static Context& getDefault(bool initialize = true); +#endif + + /** @returns cl_context value */ + void* ptr() const; + + /** + * @brief Get OpenCL context property specified on context creation + * @param propertyId Property id (CL_CONTEXT_* as defined in cl_context_properties type) + * @returns Property value if property was specified on clCreateContext, or NULL if context created without the property + */ + void* getOpenCLContextProperty(int propertyId) const; + + bool useSVM() const; + void setUseSVM(bool enabled); + + /** + * @param context OpenCL handle (cl_context). clRetainContext() is called on success + */ + static Context fromHandle(void* context); + static Context fromDevice(const ocl::Device& device); + static Context create(const std::string& configuration); + + void release(); + + struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +// TODO OpenCV 5.0 +//protected: + Impl* p; +}; + +/** @deprecated */ +class CV_EXPORTS Platform +{ +public: + Platform() CV_NOEXCEPT; + ~Platform(); + Platform(const Platform& p); + Platform& operator = (const Platform& p); + Platform(Platform&& p) CV_NOEXCEPT; + Platform& operator = (Platform&& p) CV_NOEXCEPT; + + void* ptr() const; + + /** @deprecated */ + static Platform& getDefault(); + + struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + +/** @brief Attaches OpenCL context to OpenCV +@note + OpenCV will check if available OpenCL platform has platformName name, then assign context to + OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and + new command queue will be created. +@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime +@param platformID ID of platform attached context was created for +@param context OpenCL context to be attached to OpenCV +@param deviceID ID of device, must be created from attached context +*/ +CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID); + +/** @brief Convert OpenCL buffer to UMat +@note + OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory + content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and + `clRetainMemObject` is called. +@param cl_mem_buffer source clBuffer handle +@param step num of bytes in single row +@param rows number of rows +@param cols number of cols +@param type OpenCV type of image +@param dst destination UMat +*/ +CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst); + +/** @brief Convert OpenCL image2d_t to UMat +@note + OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content + is copied from image to UMat with `clEnqueueCopyImageToBuffer` function. +@param cl_mem_image source image2d_t handle +@param dst destination UMat +*/ +CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst); + +// TODO Move to internal header +/// @deprecated +void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device); + +class CV_EXPORTS Queue +{ +public: + Queue() CV_NOEXCEPT; + explicit Queue(const Context& c, const Device& d=Device()); + ~Queue(); + Queue(const Queue& q); + Queue& operator = (const Queue& q); + Queue(Queue&& q) CV_NOEXCEPT; + Queue& operator = (Queue&& q) CV_NOEXCEPT; + + bool create(const Context& c=Context(), const Device& d=Device()); + void finish(); + void* ptr() const; + static Queue& getDefault(); + + /// @brief Returns OpenCL command queue with enable profiling mode support + const Queue& getProfilingQueue() const; + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + + +class CV_EXPORTS KernelArg +{ +public: + enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 }; + KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0); + KernelArg() CV_NOEXCEPT; + + static KernelArg Local(size_t localMemSize) + { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); } + static KernelArg PtrWriteOnly(const UMat& m) + { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); } + static KernelArg PtrReadOnly(const UMat& m) + { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); } + static KernelArg PtrReadWrite(const UMat& m) + { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); } + static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); } + static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); } + static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); } + static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); } + static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } + static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1) + { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); } + static KernelArg Constant(const Mat& m); + template static KernelArg Constant(const _Tp* arr, size_t n) + { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); } + + int flags; + UMat* m; + const void* obj; + size_t sz; + int wscale, iwscale; +}; + + +class CV_EXPORTS Kernel +{ +public: + Kernel() CV_NOEXCEPT; + Kernel(const char* kname, const Program& prog); + Kernel(const char* kname, const ProgramSource& prog, + const String& buildopts = String(), String* errmsg=0); + ~Kernel(); + Kernel(const Kernel& k); + Kernel& operator = (const Kernel& k); + Kernel(Kernel&& k) CV_NOEXCEPT; + Kernel& operator = (Kernel&& k) CV_NOEXCEPT; + + bool empty() const; + bool create(const char* kname, const Program& prog); + bool create(const char* kname, const ProgramSource& prog, + const String& buildopts, String* errmsg=0); + + int set(int i, const void* value, size_t sz); + int set(int i, const Image2D& image2D); + int set(int i, const UMat& m); + int set(int i, const KernelArg& arg); + template int set(int i, const _Tp& value) + { return set(i, &value, sizeof(value)); } + + +protected: + template inline + int set_args_(int i, const _Tp0& a0) { return set(i, a0); } + template inline + int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); } +public: + /** @brief Setup OpenCL Kernel arguments. + Avoid direct using of set(i, ...) methods. + @code + bool ok = kernel + .args( + srcUMat, dstUMat, + (float)some_float_param + ).run(ndims, globalSize, localSize); + if (!ok) return false; + @endcode + */ + template inline + Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; } + + /** @brief Run the OpenCL kernel (globalsize value may be adjusted) + + @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. + @param globalsize work items for each dimension. It is not the final globalsize passed to + OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding + value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The + adjusted values are greater than or equal to the original values. + @param localsize work-group size for each dimension. + @param sync specify whether to wait for OpenCL computation to finish before return. + @param q command queue + + @note Use run_() if your kernel code doesn't support adjusted globalsize. + */ + bool run(int dims, size_t globalsize[], + size_t localsize[], bool sync, const Queue& q=Queue()); + + /** @brief Run the OpenCL kernel + * + * @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3. + * @param globalsize work items for each dimension. This value is passed to OpenCL without changes. + * @param localsize work-group size for each dimension. + * @param sync specify whether to wait for OpenCL computation to finish before return. + * @param q command queue + */ + bool run_(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q=Queue()); + + bool runTask(bool sync, const Queue& q=Queue()); + + /** @brief Similar to synchronized run_() call with returning of kernel execution time + * + * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE) + * @return Execution time in nanoseconds or negative number on error + */ + int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue()); + + size_t workGroupSize() const; + size_t preferedWorkGroupSizeMultiple() const; + bool compileWorkGroupSize(size_t wsz[]) const; + size_t localMemSize() const; + + void* ptr() const; + struct Impl; + +protected: + Impl* p; +}; + +class CV_EXPORTS Program +{ +public: + Program() CV_NOEXCEPT; + Program(const ProgramSource& src, + const String& buildflags, String& errmsg); + Program(const Program& prog); + Program& operator = (const Program& prog); + Program(Program&& prog) CV_NOEXCEPT; + Program& operator = (Program&& prog) CV_NOEXCEPT; + ~Program(); + + bool create(const ProgramSource& src, + const String& buildflags, String& errmsg); + + void* ptr() const; + + /** + * @brief Query device-specific program binary. + * + * Returns RAW OpenCL executable binary without additional attachments. + * + * @sa ProgramSource::fromBinary + * + * @param[out] binary output buffer + */ + void getBinary(std::vector& binary) const; + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +public: +#ifndef OPENCV_REMOVE_DEPRECATED_API + // TODO Remove this + CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead + CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary) + CV_DEPRECATED const ProgramSource& source() const; // implementation removed + CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced + CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced +#endif +}; + + +class CV_EXPORTS ProgramSource +{ +public: + typedef uint64 hash_t; // deprecated + + ProgramSource() CV_NOEXCEPT; + explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash); + explicit ProgramSource(const String& prog); // deprecated + explicit ProgramSource(const char* prog); // deprecated + ~ProgramSource(); + ProgramSource(const ProgramSource& prog); + ProgramSource& operator = (const ProgramSource& prog); + ProgramSource(ProgramSource&& prog) CV_NOEXCEPT; + ProgramSource& operator = (ProgramSource&& prog) CV_NOEXCEPT; + + const String& source() const; // deprecated + hash_t hash() const; // deprecated + + + /** @brief Describe OpenCL program binary. + * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). + * + * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). + * + * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version. + * + * @param module name of program owner module + * @param name unique name of program (module+name is used as key for OpenCL program caching) + * @param binary buffer address. See buffer lifetime requirement in description. + * @param size buffer size + * @param buildOptions additional program-related build options passed to clBuildProgram() + * @return created ProgramSource object + */ + static ProgramSource fromBinary(const String& module, const String& name, + const unsigned char* binary, const size_t size, + const cv::String& buildOptions = cv::String()); + + /** @brief Describe OpenCL program in SPIR format. + * Do not call clCreateProgramWithBinary() and/or clBuildProgram(). + * + * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior) + * + * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies). + * + * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension: + * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html + * (but they are not portable between different platforms: 32-bit / 64-bit) + * + * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'. + * + * @param module name of program owner module + * @param name unique name of program (module+name is used as key for OpenCL program caching) + * @param binary buffer address. See buffer lifetime requirement in description. + * @param size buffer size + * @param buildOptions additional program-related build options passed to clBuildProgram() + * (these options are added automatically: '-x spir' and '-spir-std=1.2') + * @return created ProgramSource object. + */ + static ProgramSource fromSPIR(const String& module, const String& name, + const unsigned char* binary, const size_t size, + const cv::String& buildOptions = cv::String()); + + //OpenCL 2.1+ only + //static Program fromSPIRV(const String& module, const String& name, + // const unsigned char* binary, const size_t size, + // const cv::String& buildOptions = cv::String()); + + struct Impl; friend struct Impl; + inline Impl* getImpl() const { return (Impl*)p; } + inline bool empty() const { return !p; } +protected: + Impl* p; +}; + +class CV_EXPORTS PlatformInfo +{ +public: + PlatformInfo() CV_NOEXCEPT; + /** + * @param id pointer cl_platform_id (cl_platform_id*) + */ + explicit PlatformInfo(void* id); + ~PlatformInfo(); + + PlatformInfo(const PlatformInfo& i); + PlatformInfo& operator =(const PlatformInfo& i); + PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT; + PlatformInfo& operator = (PlatformInfo&& i) CV_NOEXCEPT; + + String name() const; + String vendor() const; + + /// See CL_PLATFORM_VERSION + String version() const; + int versionMajor() const; + int versionMinor() const; + + int deviceNumber() const; + void getDevice(Device& device, int d) const; + + struct Impl; + bool empty() const { return !p; } +protected: + Impl* p; +}; + +CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf); +CV_EXPORTS const char* typeToStr(int t); +CV_EXPORTS const char* memopTypeToStr(int t); +CV_EXPORTS const char* vecopTypeToStr(int t); +CV_EXPORTS const char* getOpenCLErrorString(int errorCode); +CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL); +CV_EXPORTS void getPlatfomsInfo(std::vector& platform_info); + + +enum OclVectorStrategy +{ + // all matrices have its own vector width + OCL_VECTOR_OWN = 0, + // all matrices have maximal vector width among all matrices + // (useful for cases when matrices have different data types) + OCL_VECTOR_MAX = 1, + + // default strategy + OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN +}; + +CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), + InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), + OclVectorStrategy strat = OCL_VECTOR_DEFAULT); + +CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths, + InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), + InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), + OclVectorStrategy strat = OCL_VECTOR_DEFAULT); + +// with OCL_VECTOR_MAX strategy +CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), + InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); + +CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m); + +class CV_EXPORTS Image2D +{ +public: + Image2D() CV_NOEXCEPT; + + /** + @param src UMat object from which to get image properties and data + @param norm flag to enable the use of normalized channel data types + @param alias flag indicating that the image should alias the src UMat. If true, changes to the + image or src will be reflected in both objects. + */ + explicit Image2D(const UMat &src, bool norm = false, bool alias = false); + Image2D(const Image2D & i); + ~Image2D(); + + Image2D & operator = (const Image2D & i); + Image2D(Image2D &&) CV_NOEXCEPT; + Image2D &operator=(Image2D &&) CV_NOEXCEPT; + + /** Indicates if creating an aliased image should succeed. + Depends on the underlying platform and the dimensions of the UMat. + */ + static bool canCreateAlias(const UMat &u); + + /** Indicates if the image format is supported. + */ + static bool isFormatSupported(int depth, int cn, bool norm); + + void* ptr() const; +protected: + struct Impl; + Impl* p; +}; + +class CV_EXPORTS Timer +{ +public: + Timer(const Queue& q); + ~Timer(); + void start(); + void stop(); + + uint64 durationNS() const; //< duration in nanoseconds + +protected: + struct Impl; + Impl* const p; + +private: + Timer(const Timer&); // disabled + Timer& operator=(const Timer&); // disabled +}; + +CV_EXPORTS MatAllocator* getOpenCLAllocator(); + + +class CV_EXPORTS_W OpenCLExecutionContext +{ +public: + OpenCLExecutionContext() = default; + ~OpenCLExecutionContext() = default; + + OpenCLExecutionContext(const OpenCLExecutionContext&) = default; + OpenCLExecutionContext(OpenCLExecutionContext&&) = default; + + OpenCLExecutionContext& operator=(const OpenCLExecutionContext&) = default; + OpenCLExecutionContext& operator=(OpenCLExecutionContext&&) = default; + + /** Get associated ocl::Context */ + Context& getContext() const; + /** Get the single default associated ocl::Device */ + Device& getDevice() const; + /** Get the single ocl::Queue that is associated with the ocl::Context and + * the single default ocl::Device + */ + Queue& getQueue() const; + + bool useOpenCL() const; + void setUseOpenCL(bool flag); + + /** Get OpenCL execution context of current thread. + * + * Initialize OpenCL execution context if it is empty + * - create new + * - reuse context of the main thread (threadID = 0) + */ + static OpenCLExecutionContext& getCurrent(); + + /** Get OpenCL execution context of current thread (can be empty) */ + static OpenCLExecutionContext& getCurrentRef(); + + /** Bind this OpenCL execution context to current thread. + * + * Context can't be empty. + * + * @note clFinish is not called for queue of previous execution context + */ + void bind() const; + + /** Creates new execution context with same OpenCV context and device + * + * @param q OpenCL queue + */ + OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const; + /** @overload */ + OpenCLExecutionContext cloneWithNewQueue() const; + + /** @brief Creates OpenCL execution context + * OpenCV will check if available OpenCL platform has platformName name, + * then assign context to OpenCV. + * The deviceID device will be used as target device and a new command queue will be created. + * + * @note On success, ownership of one reference of the context and device is taken. + * The caller should additionally call `clRetainContext` and/or `clRetainDevice` + * to increase the reference count if it wishes to continue using them. + * + * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime + * @param platformID ID of platform attached context was created for (cl_platform_id) + * @param context OpenCL context to be attached to OpenCV (cl_context) + * @param deviceID OpenCL device (cl_device_id) + */ + static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID); + + /** @brief Creates OpenCL execution context + * + * @param context non-empty OpenCL context + * @param device non-empty OpenCL device (must be a part of context) + * @param queue non-empty OpenCL queue for provided context and device + */ + static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue); + /** @overload */ + static OpenCLExecutionContext create(const Context& context, const Device& device); + + struct Impl; + inline bool empty() const { return !p; } + void release(); +protected: + std::shared_ptr p; +}; + +class OpenCLExecutionContextScope +{ + OpenCLExecutionContext ctx_; +public: + inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx) + { + CV_Assert(!ctx.empty()); + ctx_ = OpenCLExecutionContext::getCurrentRef(); + ctx.bind(); + } + + inline ~OpenCLExecutionContextScope() + { + if (!ctx_.empty()) + { + ctx_.bind(); + } + } +}; + +#ifdef __OPENCV_BUILD +namespace internal { + +CV_EXPORTS bool isOpenCLForced(); +#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition)) + +CV_EXPORTS bool isPerformanceCheckBypassed(); +#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition)) + +CV_EXPORTS bool isCLBuffer(UMat& u); + +} // namespace internal +#endif + +//! @} + +}} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ocl_genbase.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ocl_genbase.hpp new file mode 100644 index 0000000..5334cf1 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ocl_genbase.hpp @@ -0,0 +1,69 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_OPENCL_GENBASE_HPP +#define OPENCV_OPENCL_GENBASE_HPP + +//! @cond IGNORED + +namespace cv { +namespace ocl { + +class ProgramSource; + +namespace internal { + +struct CV_EXPORTS ProgramEntry +{ + const char* module; + const char* name; + const char* programCode; + const char* programHash; + ProgramSource* pProgramSource; + + operator ProgramSource& () const; +}; + +} } } // namespace + +//! @endcond + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/ocl_defs.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/ocl_defs.hpp new file mode 100644 index 0000000..14df750 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/ocl_defs.hpp @@ -0,0 +1,82 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_CORE_OPENCL_DEFS_HPP +#define OPENCV_CORE_OPENCL_DEFS_HPP + +#include "opencv2/core/utility.hpp" +#include "cvconfig.h" + +namespace cv { namespace ocl { +#ifdef HAVE_OPENCL +/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context +CV_EXPORTS bool isOpenCLActivated(); +#else +static inline bool isOpenCLActivated() { return false; } +#endif +}} // namespace + + +//#define CV_OPENCL_RUN_ASSERT + +#ifdef HAVE_OPENCL + +#ifdef CV_OPENCL_RUN_VERBOSE +#define CV_OCL_RUN_(condition, func, ...) \ + { \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ + { \ + printf("%s: OpenCL implementation is running\n", CV_Func); \ + fflush(stdout); \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + return __VA_ARGS__; \ + } \ + else \ + { \ + printf("%s: Plain implementation is running\n", CV_Func); \ + fflush(stdout); \ + } \ + } +#elif defined CV_OPENCL_RUN_ASSERT +#define CV_OCL_RUN_(condition, func, ...) \ + { \ + if (cv::ocl::isOpenCLActivated() && (condition)) \ + { \ + if(func) \ + { \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + } \ + else \ + { \ + CV_Error(cv::Error::StsAssert, #func); \ + } \ + return __VA_ARGS__; \ + } \ + } +#else +#define CV_OCL_RUN_(condition, func, ...) \ +try \ +{ \ + if (cv::ocl::isOpenCLActivated() && (condition) && func) \ + { \ + CV_IMPL_ADD(CV_IMPL_OCL); \ + return __VA_ARGS__; \ + } \ +} \ +catch (const cv::Exception& e) \ +{ \ + CV_UNUSED(e); /* TODO: Add some logging here */ \ +} +#endif + +#else +#define CV_OCL_RUN_(condition, func, ...) +#endif + +#define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func) + +#endif // OPENCV_CORE_OPENCL_DEFS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/opencl_info.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/opencl_info.hpp new file mode 100644 index 0000000..3ead76e --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/opencl_info.hpp @@ -0,0 +1,212 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include + +#include +#include + +#ifndef DUMP_CONFIG_PROPERTY +#define DUMP_CONFIG_PROPERTY(...) +#endif + +#ifndef DUMP_MESSAGE_STDOUT +#define DUMP_MESSAGE_STDOUT(...) do { std::cout << __VA_ARGS__ << std::endl; } while (false) +#endif + +namespace cv { + +namespace { +static std::string bytesToStringRepr(size_t value) +{ + size_t b = value % 1024; + value /= 1024; + + size_t kb = value % 1024; + value /= 1024; + + size_t mb = value % 1024; + value /= 1024; + + size_t gb = value; + + std::ostringstream stream; + + if (gb > 0) + stream << gb << " GB "; + if (mb > 0) + stream << mb << " MB "; + if (kb > 0) + stream << kb << " KB "; + if (b > 0) + stream << b << " B"; + + std::string s = stream.str(); + if (s[s.size() - 1] == ' ') + s = s.substr(0, s.size() - 1); + return s; +} + +static String getDeviceTypeString(const cv::ocl::Device& device) +{ + if (device.type() == cv::ocl::Device::TYPE_CPU) { + return "CPU"; + } + + if (device.type() == cv::ocl::Device::TYPE_GPU) { + if (device.hostUnifiedMemory()) { + return "iGPU"; + } else { + return "dGPU"; + } + } + + return "unknown"; +} +} // namespace + +static void dumpOpenCLInformation() +{ + using namespace cv::ocl; + + try + { + if (!haveOpenCL() || !useOpenCL()) + { + DUMP_MESSAGE_STDOUT("OpenCL is disabled"); + DUMP_CONFIG_PROPERTY("cv_ocl", "disabled"); + return; + } + + std::vector platforms; + cv::ocl::getPlatfomsInfo(platforms); + if (platforms.empty()) + { + DUMP_MESSAGE_STDOUT("OpenCL is not available"); + DUMP_CONFIG_PROPERTY("cv_ocl", "not available"); + return; + } + + DUMP_MESSAGE_STDOUT("OpenCL Platforms: "); + for (size_t i = 0; i < platforms.size(); i++) + { + const PlatformInfo* platform = &platforms[i]; + DUMP_MESSAGE_STDOUT(" " << platform->name()); + Device current_device; + for (int j = 0; j < platform->deviceNumber(); j++) + { + platform->getDevice(current_device, j); + String deviceTypeStr = getDeviceTypeString(current_device); + DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")"); + DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ), + cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)", + platform->name().c_str(), deviceTypeStr.c_str(), current_device.name().c_str(), current_device.version().c_str()) ); + } + } + const Device& device = Device::getDefault(); + if (!device.available()) + CV_Error(Error::OpenCLInitError, "OpenCL device is not available"); + + DUMP_MESSAGE_STDOUT("Current OpenCL device: "); + + String deviceTypeStr = getDeviceTypeString(device); + DUMP_MESSAGE_STDOUT(" Type = " << deviceTypeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr); + + DUMP_MESSAGE_STDOUT(" Name = " << device.name()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceName", device.name()); + + DUMP_MESSAGE_STDOUT(" Version = " << device.version()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceVersion", device.version()); + + DUMP_MESSAGE_STDOUT(" Driver version = " << device.driverVersion()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_driverVersion", device.driverVersion()); + + DUMP_MESSAGE_STDOUT(" Address bits = " << device.addressBits()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_addressBits", device.addressBits()); + + DUMP_MESSAGE_STDOUT(" Compute units = " << device.maxComputeUnits()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxComputeUnits", device.maxComputeUnits()); + + DUMP_MESSAGE_STDOUT(" Max work group size = " << device.maxWorkGroupSize()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxWorkGroupSize", device.maxWorkGroupSize()); + + std::string localMemorySizeStr = bytesToStringRepr(device.localMemSize()); + DUMP_MESSAGE_STDOUT(" Local memory size = " << localMemorySizeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_localMemSize", device.localMemSize()); + + std::string maxMemAllocSizeStr = bytesToStringRepr(device.maxMemAllocSize()); + DUMP_MESSAGE_STDOUT(" Max memory allocation size = " << maxMemAllocSizeStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_maxMemAllocSize", device.maxMemAllocSize()); + + const char* doubleSupportStr = device.doubleFPConfig() > 0 ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Double support = " << doubleSupportStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0); + + const char* halfSupportStr = device.halfFPConfig() > 0 ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Half support = " << halfSupportStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.halfFPConfig() > 0); + + const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Host unified memory = " << isUnifiedMemoryStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory()); + + DUMP_MESSAGE_STDOUT(" Device extensions:"); + String extensionsStr = device.extensions(); + size_t pos = 0; + while (pos < extensionsStr.size()) + { + size_t pos2 = extensionsStr.find(' ', pos); + if (pos2 == String::npos) + pos2 = extensionsStr.size(); + if (pos2 > pos) + { + String extensionName = extensionsStr.substr(pos, pos2 - pos); + DUMP_MESSAGE_STDOUT(" " << extensionName); + } + pos = pos2 + 1; + } + DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr); + + const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Has AMD Blas = " << haveAmdBlasStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdBlas", haveAmdBlas()); + + const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Has AMD Fft = " << haveAmdFftStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdFft", haveAmdFft()); + + + DUMP_MESSAGE_STDOUT(" Preferred vector width char = " << device.preferredVectorWidthChar()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width short = " << device.preferredVectorWidthShort()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width int = " << device.preferredVectorWidthInt()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width long = " << device.preferredVectorWidthLong()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width float = " << device.preferredVectorWidthFloat()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width double = " << device.preferredVectorWidthDouble()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width half = " << device.preferredVectorWidthHalf()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf()); + } + catch (...) + { + DUMP_MESSAGE_STDOUT("Exception. Can't dump OpenCL info"); + DUMP_MESSAGE_STDOUT("OpenCL device not available"); + DUMP_CONFIG_PROPERTY("cv_ocl", "not available"); + } +} +#undef DUMP_MESSAGE_STDOUT +#undef DUMP_CONFIG_PROPERTY + +} // namespace diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/opencl_svm.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/opencl_svm.hpp new file mode 100644 index 0000000..7453082 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/opencl_svm.hpp @@ -0,0 +1,81 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OPENCL_SVM_HPP +#define OPENCV_CORE_OPENCL_SVM_HPP + +// +// Internal usage only (binary compatibility is not guaranteed) +// +#ifndef __OPENCV_BUILD +#error Internal header file +#endif + +#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM) +#include "runtime/opencl_core.hpp" +#include "runtime/opencl_svm_20.hpp" +#include "runtime/opencl_svm_hsa_extension.hpp" + +namespace cv { namespace ocl { namespace svm { + +struct SVMCapabilities +{ + enum Value + { + SVM_COARSE_GRAIN_BUFFER = (1 << 0), + SVM_FINE_GRAIN_BUFFER = (1 << 1), + SVM_FINE_GRAIN_SYSTEM = (1 << 2), + SVM_ATOMICS = (1 << 3), + }; + int value_; + + SVMCapabilities(int capabilities = 0) : value_(capabilities) { } + operator int() const { return value_; } + + inline bool isNoSVMSupport() const { return value_ == 0; } + inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; } + inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; } + inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; } +}; + +CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context); + +struct SVMFunctions +{ + clSVMAllocAMD_fn fn_clSVMAlloc; + clSVMFreeAMD_fn fn_clSVMFree; + clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer; + //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo; + //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree; + clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy; + clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill; + clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap; + clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap; + + inline SVMFunctions() + : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL), + fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/ + /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL), + fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL) + { + // nothing + } + + inline bool isValid() const + { + return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer && + /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy && + fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap; + } +}; + +// We should guarantee that SVMFunctions lifetime is not less than context's lifetime +CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context); + +CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags); + +}}} //namespace cv::ocl::svm +#endif + +#endif // OPENCV_CORE_OPENCL_SVM_HPP +/* End of file. */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_clblas.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_clblas.hpp new file mode 100644 index 0000000..2749927 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_clblas.hpp @@ -0,0 +1,602 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP +#error "Invalid usage" +#endif + +// generated by parser_clblas.py +#define clblasCaxpy clblasCaxpy_ +#define clblasCcopy clblasCcopy_ +#define clblasCdotc clblasCdotc_ +#define clblasCdotu clblasCdotu_ +#define clblasCgbmv clblasCgbmv_ +#define clblasCgemm clblasCgemm_ +#define clblasCgemv clblasCgemv_ +#define clblasCgerc clblasCgerc_ +#define clblasCgeru clblasCgeru_ +#define clblasChbmv clblasChbmv_ +#define clblasChemm clblasChemm_ +#define clblasChemv clblasChemv_ +#define clblasCher clblasCher_ +#define clblasCher2 clblasCher2_ +#define clblasCher2k clblasCher2k_ +#define clblasCherk clblasCherk_ +#define clblasChpmv clblasChpmv_ +#define clblasChpr clblasChpr_ +#define clblasChpr2 clblasChpr2_ +#define clblasCrotg clblasCrotg_ +#define clblasCscal clblasCscal_ +#define clblasCsrot clblasCsrot_ +#define clblasCsscal clblasCsscal_ +#define clblasCswap clblasCswap_ +#define clblasCsymm clblasCsymm_ +#define clblasCsyr2k clblasCsyr2k_ +#define clblasCsyrk clblasCsyrk_ +#define clblasCtbmv clblasCtbmv_ +#define clblasCtbsv clblasCtbsv_ +#define clblasCtpmv clblasCtpmv_ +#define clblasCtpsv clblasCtpsv_ +#define clblasCtrmm clblasCtrmm_ +#define clblasCtrmv clblasCtrmv_ +#define clblasCtrsm clblasCtrsm_ +#define clblasCtrsv clblasCtrsv_ +#define clblasDasum clblasDasum_ +#define clblasDaxpy clblasDaxpy_ +#define clblasDcopy clblasDcopy_ +#define clblasDdot clblasDdot_ +#define clblasDgbmv clblasDgbmv_ +#define clblasDgemm clblasDgemm_ +#define clblasDgemv clblasDgemv_ +#define clblasDger clblasDger_ +#define clblasDnrm2 clblasDnrm2_ +#define clblasDrot clblasDrot_ +#define clblasDrotg clblasDrotg_ +#define clblasDrotm clblasDrotm_ +#define clblasDrotmg clblasDrotmg_ +#define clblasDsbmv clblasDsbmv_ +#define clblasDscal clblasDscal_ +#define clblasDspmv clblasDspmv_ +#define clblasDspr clblasDspr_ +#define clblasDspr2 clblasDspr2_ +#define clblasDswap clblasDswap_ +#define clblasDsymm clblasDsymm_ +#define clblasDsymv clblasDsymv_ +#define clblasDsyr clblasDsyr_ +#define clblasDsyr2 clblasDsyr2_ +#define clblasDsyr2k clblasDsyr2k_ +#define clblasDsyrk clblasDsyrk_ +#define clblasDtbmv clblasDtbmv_ +#define clblasDtbsv clblasDtbsv_ +#define clblasDtpmv clblasDtpmv_ +#define clblasDtpsv clblasDtpsv_ +#define clblasDtrmm clblasDtrmm_ +#define clblasDtrmv clblasDtrmv_ +#define clblasDtrsm clblasDtrsm_ +#define clblasDtrsv clblasDtrsv_ +#define clblasDzasum clblasDzasum_ +#define clblasDznrm2 clblasDznrm2_ +#define clblasGetVersion clblasGetVersion_ +#define clblasSasum clblasSasum_ +#define clblasSaxpy clblasSaxpy_ +#define clblasScasum clblasScasum_ +#define clblasScnrm2 clblasScnrm2_ +#define clblasScopy clblasScopy_ +#define clblasSdot clblasSdot_ +#define clblasSetup clblasSetup_ +#define clblasSgbmv clblasSgbmv_ +#define clblasSgemm clblasSgemm_ +#define clblasSgemv clblasSgemv_ +#define clblasSger clblasSger_ +#define clblasSnrm2 clblasSnrm2_ +#define clblasSrot clblasSrot_ +#define clblasSrotg clblasSrotg_ +#define clblasSrotm clblasSrotm_ +#define clblasSrotmg clblasSrotmg_ +#define clblasSsbmv clblasSsbmv_ +#define clblasSscal clblasSscal_ +#define clblasSspmv clblasSspmv_ +#define clblasSspr clblasSspr_ +#define clblasSspr2 clblasSspr2_ +#define clblasSswap clblasSswap_ +#define clblasSsymm clblasSsymm_ +#define clblasSsymv clblasSsymv_ +#define clblasSsyr clblasSsyr_ +#define clblasSsyr2 clblasSsyr2_ +#define clblasSsyr2k clblasSsyr2k_ +#define clblasSsyrk clblasSsyrk_ +#define clblasStbmv clblasStbmv_ +#define clblasStbsv clblasStbsv_ +#define clblasStpmv clblasStpmv_ +#define clblasStpsv clblasStpsv_ +#define clblasStrmm clblasStrmm_ +#define clblasStrmv clblasStrmv_ +#define clblasStrsm clblasStrsm_ +#define clblasStrsv clblasStrsv_ +#define clblasTeardown clblasTeardown_ +#define clblasZaxpy clblasZaxpy_ +#define clblasZcopy clblasZcopy_ +#define clblasZdotc clblasZdotc_ +#define clblasZdotu clblasZdotu_ +#define clblasZdrot clblasZdrot_ +#define clblasZdscal clblasZdscal_ +#define clblasZgbmv clblasZgbmv_ +#define clblasZgemm clblasZgemm_ +#define clblasZgemv clblasZgemv_ +#define clblasZgerc clblasZgerc_ +#define clblasZgeru clblasZgeru_ +#define clblasZhbmv clblasZhbmv_ +#define clblasZhemm clblasZhemm_ +#define clblasZhemv clblasZhemv_ +#define clblasZher clblasZher_ +#define clblasZher2 clblasZher2_ +#define clblasZher2k clblasZher2k_ +#define clblasZherk clblasZherk_ +#define clblasZhpmv clblasZhpmv_ +#define clblasZhpr clblasZhpr_ +#define clblasZhpr2 clblasZhpr2_ +#define clblasZrotg clblasZrotg_ +#define clblasZscal clblasZscal_ +#define clblasZswap clblasZswap_ +#define clblasZsymm clblasZsymm_ +#define clblasZsyr2k clblasZsyr2k_ +#define clblasZsyrk clblasZsyrk_ +#define clblasZtbmv clblasZtbmv_ +#define clblasZtbsv clblasZtbsv_ +#define clblasZtpmv clblasZtpmv_ +#define clblasZtpsv clblasZtpsv_ +#define clblasZtrmm clblasZtrmm_ +#define clblasZtrmv clblasZtrmv_ +#define clblasZtrsm clblasZtrsm_ +#define clblasZtrsv clblasZtrsv_ +#define clblasiCamax clblasiCamax_ +#define clblasiDamax clblasiDamax_ +#define clblasiSamax clblasiSamax_ +#define clblasiZamax clblasiZamax_ + +#include + +// generated by parser_clblas.py +#undef clblasCaxpy +//#define clblasCaxpy clblasCaxpy_pfn +#undef clblasCcopy +//#define clblasCcopy clblasCcopy_pfn +#undef clblasCdotc +//#define clblasCdotc clblasCdotc_pfn +#undef clblasCdotu +//#define clblasCdotu clblasCdotu_pfn +#undef clblasCgbmv +//#define clblasCgbmv clblasCgbmv_pfn +#undef clblasCgemm +#define clblasCgemm clblasCgemm_pfn +#undef clblasCgemv +//#define clblasCgemv clblasCgemv_pfn +#undef clblasCgerc +//#define clblasCgerc clblasCgerc_pfn +#undef clblasCgeru +//#define clblasCgeru clblasCgeru_pfn +#undef clblasChbmv +//#define clblasChbmv clblasChbmv_pfn +#undef clblasChemm +//#define clblasChemm clblasChemm_pfn +#undef clblasChemv +//#define clblasChemv clblasChemv_pfn +#undef clblasCher +//#define clblasCher clblasCher_pfn +#undef clblasCher2 +//#define clblasCher2 clblasCher2_pfn +#undef clblasCher2k +//#define clblasCher2k clblasCher2k_pfn +#undef clblasCherk +//#define clblasCherk clblasCherk_pfn +#undef clblasChpmv +//#define clblasChpmv clblasChpmv_pfn +#undef clblasChpr +//#define clblasChpr clblasChpr_pfn +#undef clblasChpr2 +//#define clblasChpr2 clblasChpr2_pfn +#undef clblasCrotg +//#define clblasCrotg clblasCrotg_pfn +#undef clblasCscal +//#define clblasCscal clblasCscal_pfn +#undef clblasCsrot +//#define clblasCsrot clblasCsrot_pfn +#undef clblasCsscal +//#define clblasCsscal clblasCsscal_pfn +#undef clblasCswap +//#define clblasCswap clblasCswap_pfn +#undef clblasCsymm +//#define clblasCsymm clblasCsymm_pfn +#undef clblasCsyr2k +//#define clblasCsyr2k clblasCsyr2k_pfn +#undef clblasCsyrk +//#define clblasCsyrk clblasCsyrk_pfn +#undef clblasCtbmv +//#define clblasCtbmv clblasCtbmv_pfn +#undef clblasCtbsv +//#define clblasCtbsv clblasCtbsv_pfn +#undef clblasCtpmv +//#define clblasCtpmv clblasCtpmv_pfn +#undef clblasCtpsv +//#define clblasCtpsv clblasCtpsv_pfn +#undef clblasCtrmm +//#define clblasCtrmm clblasCtrmm_pfn +#undef clblasCtrmv +//#define clblasCtrmv clblasCtrmv_pfn +#undef clblasCtrsm +//#define clblasCtrsm clblasCtrsm_pfn +#undef clblasCtrsv +//#define clblasCtrsv clblasCtrsv_pfn +#undef clblasDasum +//#define clblasDasum clblasDasum_pfn +#undef clblasDaxpy +//#define clblasDaxpy clblasDaxpy_pfn +#undef clblasDcopy +//#define clblasDcopy clblasDcopy_pfn +#undef clblasDdot +//#define clblasDdot clblasDdot_pfn +#undef clblasDgbmv +//#define clblasDgbmv clblasDgbmv_pfn +#undef clblasDgemm +#define clblasDgemm clblasDgemm_pfn +#undef clblasDgemv +//#define clblasDgemv clblasDgemv_pfn +#undef clblasDger +//#define clblasDger clblasDger_pfn +#undef clblasDnrm2 +//#define clblasDnrm2 clblasDnrm2_pfn +#undef clblasDrot +//#define clblasDrot clblasDrot_pfn +#undef clblasDrotg +//#define clblasDrotg clblasDrotg_pfn +#undef clblasDrotm +//#define clblasDrotm clblasDrotm_pfn +#undef clblasDrotmg +//#define clblasDrotmg clblasDrotmg_pfn +#undef clblasDsbmv +//#define clblasDsbmv clblasDsbmv_pfn +#undef clblasDscal +//#define clblasDscal clblasDscal_pfn +#undef clblasDspmv +//#define clblasDspmv clblasDspmv_pfn +#undef clblasDspr +//#define clblasDspr clblasDspr_pfn +#undef clblasDspr2 +//#define clblasDspr2 clblasDspr2_pfn +#undef clblasDswap +//#define clblasDswap clblasDswap_pfn +#undef clblasDsymm +//#define clblasDsymm clblasDsymm_pfn +#undef clblasDsymv +//#define clblasDsymv clblasDsymv_pfn +#undef clblasDsyr +//#define clblasDsyr clblasDsyr_pfn +#undef clblasDsyr2 +//#define clblasDsyr2 clblasDsyr2_pfn +#undef clblasDsyr2k +//#define clblasDsyr2k clblasDsyr2k_pfn +#undef clblasDsyrk +//#define clblasDsyrk clblasDsyrk_pfn +#undef clblasDtbmv +//#define clblasDtbmv clblasDtbmv_pfn +#undef clblasDtbsv +//#define clblasDtbsv clblasDtbsv_pfn +#undef clblasDtpmv +//#define clblasDtpmv clblasDtpmv_pfn +#undef clblasDtpsv +//#define clblasDtpsv clblasDtpsv_pfn +#undef clblasDtrmm +//#define clblasDtrmm clblasDtrmm_pfn +#undef clblasDtrmv +//#define clblasDtrmv clblasDtrmv_pfn +#undef clblasDtrsm +//#define clblasDtrsm clblasDtrsm_pfn +#undef clblasDtrsv +//#define clblasDtrsv clblasDtrsv_pfn +#undef clblasDzasum +//#define clblasDzasum clblasDzasum_pfn +#undef clblasDznrm2 +//#define clblasDznrm2 clblasDznrm2_pfn +#undef clblasGetVersion +//#define clblasGetVersion clblasGetVersion_pfn +#undef clblasSasum +//#define clblasSasum clblasSasum_pfn +#undef clblasSaxpy +//#define clblasSaxpy clblasSaxpy_pfn +#undef clblasScasum +//#define clblasScasum clblasScasum_pfn +#undef clblasScnrm2 +//#define clblasScnrm2 clblasScnrm2_pfn +#undef clblasScopy +//#define clblasScopy clblasScopy_pfn +#undef clblasSdot +//#define clblasSdot clblasSdot_pfn +#undef clblasSetup +#define clblasSetup clblasSetup_pfn +#undef clblasSgbmv +//#define clblasSgbmv clblasSgbmv_pfn +#undef clblasSgemm +#define clblasSgemm clblasSgemm_pfn +#undef clblasSgemv +//#define clblasSgemv clblasSgemv_pfn +#undef clblasSger +//#define clblasSger clblasSger_pfn +#undef clblasSnrm2 +//#define clblasSnrm2 clblasSnrm2_pfn +#undef clblasSrot +//#define clblasSrot clblasSrot_pfn +#undef clblasSrotg +//#define clblasSrotg clblasSrotg_pfn +#undef clblasSrotm +//#define clblasSrotm clblasSrotm_pfn +#undef clblasSrotmg +//#define clblasSrotmg clblasSrotmg_pfn +#undef clblasSsbmv +//#define clblasSsbmv clblasSsbmv_pfn +#undef clblasSscal +//#define clblasSscal clblasSscal_pfn +#undef clblasSspmv +//#define clblasSspmv clblasSspmv_pfn +#undef clblasSspr +//#define clblasSspr clblasSspr_pfn +#undef clblasSspr2 +//#define clblasSspr2 clblasSspr2_pfn +#undef clblasSswap +//#define clblasSswap clblasSswap_pfn +#undef clblasSsymm +//#define clblasSsymm clblasSsymm_pfn +#undef clblasSsymv +//#define clblasSsymv clblasSsymv_pfn +#undef clblasSsyr +//#define clblasSsyr clblasSsyr_pfn +#undef clblasSsyr2 +//#define clblasSsyr2 clblasSsyr2_pfn +#undef clblasSsyr2k +//#define clblasSsyr2k clblasSsyr2k_pfn +#undef clblasSsyrk +//#define clblasSsyrk clblasSsyrk_pfn +#undef clblasStbmv +//#define clblasStbmv clblasStbmv_pfn +#undef clblasStbsv +//#define clblasStbsv clblasStbsv_pfn +#undef clblasStpmv +//#define clblasStpmv clblasStpmv_pfn +#undef clblasStpsv +//#define clblasStpsv clblasStpsv_pfn +#undef clblasStrmm +//#define clblasStrmm clblasStrmm_pfn +#undef clblasStrmv +//#define clblasStrmv clblasStrmv_pfn +#undef clblasStrsm +//#define clblasStrsm clblasStrsm_pfn +#undef clblasStrsv +//#define clblasStrsv clblasStrsv_pfn +#undef clblasTeardown +#define clblasTeardown clblasTeardown_pfn +#undef clblasZaxpy +//#define clblasZaxpy clblasZaxpy_pfn +#undef clblasZcopy +//#define clblasZcopy clblasZcopy_pfn +#undef clblasZdotc +//#define clblasZdotc clblasZdotc_pfn +#undef clblasZdotu +//#define clblasZdotu clblasZdotu_pfn +#undef clblasZdrot +//#define clblasZdrot clblasZdrot_pfn +#undef clblasZdscal +//#define clblasZdscal clblasZdscal_pfn +#undef clblasZgbmv +//#define clblasZgbmv clblasZgbmv_pfn +#undef clblasZgemm +#define clblasZgemm clblasZgemm_pfn +#undef clblasZgemv +//#define clblasZgemv clblasZgemv_pfn +#undef clblasZgerc +//#define clblasZgerc clblasZgerc_pfn +#undef clblasZgeru +//#define clblasZgeru clblasZgeru_pfn +#undef clblasZhbmv +//#define clblasZhbmv clblasZhbmv_pfn +#undef clblasZhemm +//#define clblasZhemm clblasZhemm_pfn +#undef clblasZhemv +//#define clblasZhemv clblasZhemv_pfn +#undef clblasZher +//#define clblasZher clblasZher_pfn +#undef clblasZher2 +//#define clblasZher2 clblasZher2_pfn +#undef clblasZher2k +//#define clblasZher2k clblasZher2k_pfn +#undef clblasZherk +//#define clblasZherk clblasZherk_pfn +#undef clblasZhpmv +//#define clblasZhpmv clblasZhpmv_pfn +#undef clblasZhpr +//#define clblasZhpr clblasZhpr_pfn +#undef clblasZhpr2 +//#define clblasZhpr2 clblasZhpr2_pfn +#undef clblasZrotg +//#define clblasZrotg clblasZrotg_pfn +#undef clblasZscal +//#define clblasZscal clblasZscal_pfn +#undef clblasZswap +//#define clblasZswap clblasZswap_pfn +#undef clblasZsymm +//#define clblasZsymm clblasZsymm_pfn +#undef clblasZsyr2k +//#define clblasZsyr2k clblasZsyr2k_pfn +#undef clblasZsyrk +//#define clblasZsyrk clblasZsyrk_pfn +#undef clblasZtbmv +//#define clblasZtbmv clblasZtbmv_pfn +#undef clblasZtbsv +//#define clblasZtbsv clblasZtbsv_pfn +#undef clblasZtpmv +//#define clblasZtpmv clblasZtpmv_pfn +#undef clblasZtpsv +//#define clblasZtpsv clblasZtpsv_pfn +#undef clblasZtrmm +//#define clblasZtrmm clblasZtrmm_pfn +#undef clblasZtrmv +//#define clblasZtrmv clblasZtrmv_pfn +#undef clblasZtrsm +//#define clblasZtrsm clblasZtrsm_pfn +#undef clblasZtrsv +//#define clblasZtrsv clblasZtrsv_pfn +#undef clblasiCamax +//#define clblasiCamax clblasiCamax_pfn +#undef clblasiDamax +//#define clblasiDamax clblasiDamax_pfn +#undef clblasiSamax +//#define clblasiSamax clblasiSamax_pfn +#undef clblasiZamax +//#define clblasiZamax clblasiZamax_pfn + +// generated by parser_clblas.py +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgerc)(clblasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgeru)(clblasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChemm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChemv)(clblasOrder order, clblasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher2k)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCherk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDger)(clblasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsymv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clblasStatus (*clblasSetup)(); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSger)(clblasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsymv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT void (*clblasTeardown)(); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgerc)(clblasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgeru)(clblasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhemm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhemv)(clblasOrder order, clblasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher2k)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZherk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); +//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events); diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_clfft.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_clfft.hpp new file mode 100644 index 0000000..dff3b40 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_clfft.hpp @@ -0,0 +1,146 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP +#error "Invalid usage" +#endif + +// generated by parser_clfft.py +#define clfftBakePlan clfftBakePlan_ +#define clfftCopyPlan clfftCopyPlan_ +#define clfftCreateDefaultPlan clfftCreateDefaultPlan_ +#define clfftDestroyPlan clfftDestroyPlan_ +#define clfftEnqueueTransform clfftEnqueueTransform_ +#define clfftGetLayout clfftGetLayout_ +#define clfftGetPlanBatchSize clfftGetPlanBatchSize_ +#define clfftGetPlanContext clfftGetPlanContext_ +#define clfftGetPlanDim clfftGetPlanDim_ +#define clfftGetPlanDistance clfftGetPlanDistance_ +#define clfftGetPlanInStride clfftGetPlanInStride_ +#define clfftGetPlanLength clfftGetPlanLength_ +#define clfftGetPlanOutStride clfftGetPlanOutStride_ +#define clfftGetPlanPrecision clfftGetPlanPrecision_ +#define clfftGetPlanScale clfftGetPlanScale_ +#define clfftGetPlanTransposeResult clfftGetPlanTransposeResult_ +#define clfftGetResultLocation clfftGetResultLocation_ +#define clfftGetTmpBufSize clfftGetTmpBufSize_ +#define clfftGetVersion clfftGetVersion_ +#define clfftSetLayout clfftSetLayout_ +#define clfftSetPlanBatchSize clfftSetPlanBatchSize_ +#define clfftSetPlanCallback clfftSetPlanCallback_ +#define clfftSetPlanDim clfftSetPlanDim_ +#define clfftSetPlanDistance clfftSetPlanDistance_ +#define clfftSetPlanInStride clfftSetPlanInStride_ +#define clfftSetPlanLength clfftSetPlanLength_ +#define clfftSetPlanOutStride clfftSetPlanOutStride_ +#define clfftSetPlanPrecision clfftSetPlanPrecision_ +#define clfftSetPlanScale clfftSetPlanScale_ +#define clfftSetPlanTransposeResult clfftSetPlanTransposeResult_ +#define clfftSetResultLocation clfftSetResultLocation_ +#define clfftSetup clfftSetup_ +#define clfftTeardown clfftTeardown_ + +#include + +// generated by parser_clfft.py +#undef clfftBakePlan +#define clfftBakePlan clfftBakePlan_pfn +#undef clfftCopyPlan +//#define clfftCopyPlan clfftCopyPlan_pfn +#undef clfftCreateDefaultPlan +#define clfftCreateDefaultPlan clfftCreateDefaultPlan_pfn +#undef clfftDestroyPlan +#define clfftDestroyPlan clfftDestroyPlan_pfn +#undef clfftEnqueueTransform +#define clfftEnqueueTransform clfftEnqueueTransform_pfn +#undef clfftGetLayout +//#define clfftGetLayout clfftGetLayout_pfn +#undef clfftGetPlanBatchSize +//#define clfftGetPlanBatchSize clfftGetPlanBatchSize_pfn +#undef clfftGetPlanContext +//#define clfftGetPlanContext clfftGetPlanContext_pfn +#undef clfftGetPlanDim +//#define clfftGetPlanDim clfftGetPlanDim_pfn +#undef clfftGetPlanDistance +//#define clfftGetPlanDistance clfftGetPlanDistance_pfn +#undef clfftGetPlanInStride +//#define clfftGetPlanInStride clfftGetPlanInStride_pfn +#undef clfftGetPlanLength +//#define clfftGetPlanLength clfftGetPlanLength_pfn +#undef clfftGetPlanOutStride +//#define clfftGetPlanOutStride clfftGetPlanOutStride_pfn +#undef clfftGetPlanPrecision +//#define clfftGetPlanPrecision clfftGetPlanPrecision_pfn +#undef clfftGetPlanScale +//#define clfftGetPlanScale clfftGetPlanScale_pfn +#undef clfftGetPlanTransposeResult +//#define clfftGetPlanTransposeResult clfftGetPlanTransposeResult_pfn +#undef clfftGetResultLocation +//#define clfftGetResultLocation clfftGetResultLocation_pfn +#undef clfftGetTmpBufSize +#define clfftGetTmpBufSize clfftGetTmpBufSize_pfn +#undef clfftGetVersion +#define clfftGetVersion clfftGetVersion_pfn +#undef clfftSetLayout +#define clfftSetLayout clfftSetLayout_pfn +#undef clfftSetPlanBatchSize +#define clfftSetPlanBatchSize clfftSetPlanBatchSize_pfn +#undef clfftSetPlanCallback +//#define clfftSetPlanCallback clfftSetPlanCallback_pfn +#undef clfftSetPlanDim +//#define clfftSetPlanDim clfftSetPlanDim_pfn +#undef clfftSetPlanDistance +#define clfftSetPlanDistance clfftSetPlanDistance_pfn +#undef clfftSetPlanInStride +#define clfftSetPlanInStride clfftSetPlanInStride_pfn +#undef clfftSetPlanLength +//#define clfftSetPlanLength clfftSetPlanLength_pfn +#undef clfftSetPlanOutStride +#define clfftSetPlanOutStride clfftSetPlanOutStride_pfn +#undef clfftSetPlanPrecision +#define clfftSetPlanPrecision clfftSetPlanPrecision_pfn +#undef clfftSetPlanScale +#define clfftSetPlanScale clfftSetPlanScale_pfn +#undef clfftSetPlanTransposeResult +//#define clfftSetPlanTransposeResult clfftSetPlanTransposeResult_pfn +#undef clfftSetResultLocation +#define clfftSetResultLocation clfftSetResultLocation_pfn +#undef clfftSetup +#define clfftSetup clfftSetup_pfn +#undef clfftTeardown +#define clfftTeardown clfftTeardown_pfn + +// generated by parser_clfft.py +extern CL_RUNTIME_EXPORT clfftStatus (*clfftBakePlan)(clfftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clfftPlanHandle plHandle, void* user_data), void* user_data); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftCopyPlan)(clfftPlanHandle* out_plHandle, cl_context new_context, clfftPlanHandle in_plHandle); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftCreateDefaultPlan)(clfftPlanHandle* plHandle, cl_context context, const clfftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftDestroyPlan)(clfftPlanHandle* plHandle); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftEnqueueTransform)(clfftPlanHandle plHandle, clfftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetLayout)(const clfftPlanHandle plHandle, clfftLayout* iLayout, clfftLayout* oLayout); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanBatchSize)(const clfftPlanHandle plHandle, size_t* batchSize); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanContext)(const clfftPlanHandle plHandle, cl_context* context); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanDim)(const clfftPlanHandle plHandle, clfftDim* dim, cl_uint* size); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanDistance)(const clfftPlanHandle plHandle, size_t* iDist, size_t* oDist); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanInStride)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanLength)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clLengths); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanOutStride)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanPrecision)(const clfftPlanHandle plHandle, clfftPrecision* precision); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanScale)(const clfftPlanHandle plHandle, clfftDirection dir, cl_float* scale); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanTransposeResult)(const clfftPlanHandle plHandle, clfftResultTransposed* transposed); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetResultLocation)(const clfftPlanHandle plHandle, clfftResultLocation* placeness); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetTmpBufSize)(const clfftPlanHandle plHandle, size_t* buffersize); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetLayout)(clfftPlanHandle plHandle, clfftLayout iLayout, clfftLayout oLayout); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanBatchSize)(clfftPlanHandle plHandle, size_t batchSize); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanCallback)(clfftPlanHandle plHandle, const char* funcName, const char* funcString, int localMemSize, clfftCallbackType callbackType, cl_mem* userdata, int numUserdataBuffers); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanDim)(clfftPlanHandle plHandle, const clfftDim dim); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanDistance)(clfftPlanHandle plHandle, size_t iDist, size_t oDist); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanInStride)(clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanLength)(clfftPlanHandle plHandle, const clfftDim dim, const size_t* clLengths); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanOutStride)(clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanPrecision)(clfftPlanHandle plHandle, clfftPrecision precision); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanScale)(clfftPlanHandle plHandle, clfftDirection dir, cl_float scale); +//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanTransposeResult)(clfftPlanHandle plHandle, clfftResultTransposed transposed); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetResultLocation)(clfftPlanHandle plHandle, clfftResultLocation placeness); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetup)(const clfftSetupData* setupData); +extern CL_RUNTIME_EXPORT clfftStatus (*clfftTeardown)(); diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp new file mode 100644 index 0000000..28618a1 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp @@ -0,0 +1,371 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#define clBuildProgram clBuildProgram_ +#define clCompileProgram clCompileProgram_ +#define clCreateBuffer clCreateBuffer_ +#define clCreateCommandQueue clCreateCommandQueue_ +#define clCreateContext clCreateContext_ +#define clCreateContextFromType clCreateContextFromType_ +#define clCreateImage clCreateImage_ +#define clCreateImage2D clCreateImage2D_ +#define clCreateImage3D clCreateImage3D_ +#define clCreateKernel clCreateKernel_ +#define clCreateKernelsInProgram clCreateKernelsInProgram_ +#define clCreateProgramWithBinary clCreateProgramWithBinary_ +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_ +#define clCreateProgramWithSource clCreateProgramWithSource_ +#define clCreateSampler clCreateSampler_ +#define clCreateSubBuffer clCreateSubBuffer_ +#define clCreateSubDevices clCreateSubDevices_ +#define clCreateUserEvent clCreateUserEvent_ +#define clEnqueueBarrier clEnqueueBarrier_ +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_ +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_ +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_ +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_ +#define clEnqueueCopyImage clEnqueueCopyImage_ +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_ +#define clEnqueueFillBuffer clEnqueueFillBuffer_ +#define clEnqueueFillImage clEnqueueFillImage_ +#define clEnqueueMapBuffer clEnqueueMapBuffer_ +#define clEnqueueMapImage clEnqueueMapImage_ +#define clEnqueueMarker clEnqueueMarker_ +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_ +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_ +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_ +#define clEnqueueNativeKernel clEnqueueNativeKernel_ +#define clEnqueueReadBuffer clEnqueueReadBuffer_ +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_ +#define clEnqueueReadImage clEnqueueReadImage_ +#define clEnqueueTask clEnqueueTask_ +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_ +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_ +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_ +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_ +#define clEnqueueWriteImage clEnqueueWriteImage_ +#define clFinish clFinish_ +#define clFlush clFlush_ +#define clGetCommandQueueInfo clGetCommandQueueInfo_ +#define clGetContextInfo clGetContextInfo_ +#define clGetDeviceIDs clGetDeviceIDs_ +#define clGetDeviceInfo clGetDeviceInfo_ +#define clGetEventInfo clGetEventInfo_ +#define clGetEventProfilingInfo clGetEventProfilingInfo_ +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_ +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_ +#define clGetImageInfo clGetImageInfo_ +#define clGetKernelArgInfo clGetKernelArgInfo_ +#define clGetKernelInfo clGetKernelInfo_ +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_ +#define clGetMemObjectInfo clGetMemObjectInfo_ +#define clGetPlatformIDs clGetPlatformIDs_ +#define clGetPlatformInfo clGetPlatformInfo_ +#define clGetProgramBuildInfo clGetProgramBuildInfo_ +#define clGetProgramInfo clGetProgramInfo_ +#define clGetSamplerInfo clGetSamplerInfo_ +#define clGetSupportedImageFormats clGetSupportedImageFormats_ +#define clLinkProgram clLinkProgram_ +#define clReleaseCommandQueue clReleaseCommandQueue_ +#define clReleaseContext clReleaseContext_ +#define clReleaseDevice clReleaseDevice_ +#define clReleaseEvent clReleaseEvent_ +#define clReleaseKernel clReleaseKernel_ +#define clReleaseMemObject clReleaseMemObject_ +#define clReleaseProgram clReleaseProgram_ +#define clReleaseSampler clReleaseSampler_ +#define clRetainCommandQueue clRetainCommandQueue_ +#define clRetainContext clRetainContext_ +#define clRetainDevice clRetainDevice_ +#define clRetainEvent clRetainEvent_ +#define clRetainKernel clRetainKernel_ +#define clRetainMemObject clRetainMemObject_ +#define clRetainProgram clRetainProgram_ +#define clRetainSampler clRetainSampler_ +#define clSetEventCallback clSetEventCallback_ +#define clSetKernelArg clSetKernelArg_ +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_ +#define clSetUserEventStatus clSetUserEventStatus_ +#define clUnloadCompiler clUnloadCompiler_ +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_ +#define clWaitForEvents clWaitForEvents_ + +#if defined __APPLE__ +#define CL_SILENCE_DEPRECATION +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clBuildProgram +#define clBuildProgram clBuildProgram_pfn +#undef clCompileProgram +#define clCompileProgram clCompileProgram_pfn +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_pfn +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_pfn +#undef clCreateContext +#define clCreateContext clCreateContext_pfn +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_pfn +#undef clCreateImage +#define clCreateImage clCreateImage_pfn +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_pfn +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_pfn +#undef clCreateKernel +#define clCreateKernel clCreateKernel_pfn +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_pfn +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_pfn +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_pfn +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_pfn +#undef clCreateSampler +#define clCreateSampler clCreateSampler_pfn +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_pfn +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_pfn +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_pfn +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_pfn +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_pfn +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_pfn +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_pfn +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_pfn +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_pfn +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_pfn +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_pfn +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_pfn +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_pfn +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_pfn +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_pfn +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_pfn +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_pfn +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_pfn +#undef clFinish +#define clFinish clFinish_pfn +#undef clFlush +#define clFlush clFlush_pfn +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_pfn +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_pfn +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_pfn +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_pfn +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_pfn +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_pfn +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_pfn +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_pfn +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_pfn +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_pfn +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_pfn +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_pfn +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_pfn +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_pfn +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_pfn +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_pfn +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_pfn +#undef clLinkProgram +#define clLinkProgram clLinkProgram_pfn +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_pfn +#undef clReleaseContext +#define clReleaseContext clReleaseContext_pfn +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_pfn +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_pfn +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_pfn +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_pfn +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_pfn +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_pfn +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_pfn +#undef clRetainContext +#define clRetainContext clRetainContext_pfn +#undef clRetainDevice +#define clRetainDevice clRetainDevice_pfn +#undef clRetainEvent +#define clRetainEvent clRetainEvent_pfn +#undef clRetainKernel +#define clRetainKernel clRetainKernel_pfn +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_pfn +#undef clRetainProgram +#define clRetainProgram clRetainProgram_pfn +#undef clRetainSampler +#define clRetainSampler clRetainSampler_pfn +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_pfn +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_pfn +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_pfn +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_pfn +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_pfn +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_pfn + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*); +extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*); +extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*); +extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainDevice)(cl_device_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)(); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*); diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp new file mode 100644 index 0000000..216b22b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp @@ -0,0 +1,272 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#undef clBuildProgram +#define clBuildProgram clBuildProgram_fn +inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCompileProgram +#define clCompileProgram clCompileProgram_fn +inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_fn +inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_fn +inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } +#undef clCreateContext +#define clCreateContext clCreateContext_fn +inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_fn +inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } +#undef clCreateImage +#define clCreateImage clCreateImage_fn +inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_fn +inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_fn +inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clCreateKernel +#define clCreateKernel clCreateKernel_fn +inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_fn +inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_fn +inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn +inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_fn +inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSampler +#define clCreateSampler clCreateSampler_fn +inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_fn +inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_fn +inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); } +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_fn +inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_fn +inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn +inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn +inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn +inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn +inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_fn +inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn +inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_fn +inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_fn +inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_fn +inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_fn +inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_fn +inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn +inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn +inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn +inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_fn +inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_fn +inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn +inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_fn +inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_fn +inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn +inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn +inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn +inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn +inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_fn +inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clFinish +#define clFinish clFinish_fn +inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } +#undef clFlush +#define clFlush clFlush_fn +inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_fn +inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_fn +inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_fn +inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_fn +inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_fn +inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_fn +inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn +inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn +inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); } +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_fn +inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_fn +inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_fn +inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn +inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_fn +inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_fn +inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_fn +inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_fn +inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_fn +inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_fn +inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_fn +inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } +#undef clLinkProgram +#define clLinkProgram clLinkProgram_fn +inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_fn +inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } +#undef clReleaseContext +#define clReleaseContext clReleaseContext_fn +inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_fn +inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); } +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_fn +inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_fn +inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_fn +inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_fn +inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_fn +inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_fn +inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } +#undef clRetainContext +#define clRetainContext clRetainContext_fn +inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } +#undef clRetainDevice +#define clRetainDevice clRetainDevice_fn +inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); } +#undef clRetainEvent +#define clRetainEvent clRetainEvent_fn +inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } +#undef clRetainKernel +#define clRetainKernel clRetainKernel_fn +inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_fn +inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } +#undef clRetainProgram +#define clRetainProgram clRetainProgram_fn +inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } +#undef clRetainSampler +#define clRetainSampler clRetainSampler_fn +inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_fn +inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_fn +inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn +inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_fn +inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_fn +inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn +inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); } +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_fn +inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp new file mode 100644 index 0000000..0b12aed --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp @@ -0,0 +1,62 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP +#error "Invalid usage" +#endif + +// generated by parser_cl.py +#define clCreateFromGLBuffer clCreateFromGLBuffer_ +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_ +#define clCreateFromGLTexture clCreateFromGLTexture_ +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_ +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_ +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_ +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_ +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_ +#define clGetGLObjectInfo clGetGLObjectInfo_ +#define clGetGLTextureInfo clGetGLTextureInfo_ + +#if defined __APPLE__ +#include +#else +#include +#endif + +// generated by parser_cl.py +#undef clCreateFromGLBuffer +#define clCreateFromGLBuffer clCreateFromGLBuffer_pfn +#undef clCreateFromGLRenderbuffer +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_pfn +#undef clCreateFromGLTexture +#define clCreateFromGLTexture clCreateFromGLTexture_pfn +#undef clCreateFromGLTexture2D +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_pfn +#undef clCreateFromGLTexture3D +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_pfn +#undef clEnqueueAcquireGLObjects +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_pfn +#undef clEnqueueReleaseGLObjects +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_pfn +#undef clGetGLContextInfoKHR +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_pfn +#undef clGetGLObjectInfo +#define clGetGLObjectInfo clGetGLObjectInfo_pfn +#undef clGetGLTextureInfo +#define clGetGLTextureInfo clGetGLTextureInfo_pfn + +#ifdef cl_khr_gl_sharing + +// generated by parser_cl.py +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)(cl_context, cl_mem_flags, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture2D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture3D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLContextInfoKHR)(const cl_context_properties*, cl_gl_context_info, size_t, void*, size_t*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLObjectInfo)(cl_mem, cl_gl_object_type*, cl_GLuint*); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLTextureInfo)(cl_mem, cl_gl_texture_info, size_t, void*, size_t*); + +#endif // cl_khr_gl_sharing diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp new file mode 100644 index 0000000..12f342b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp @@ -0,0 +1,42 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP +#error "Invalid usage" +#endif + +#ifdef cl_khr_gl_sharing + +// generated by parser_cl.py +#undef clCreateFromGLBuffer +#define clCreateFromGLBuffer clCreateFromGLBuffer_fn +inline cl_mem clCreateFromGLBuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, int* p3) { return clCreateFromGLBuffer_pfn(p0, p1, p2, p3); } +#undef clCreateFromGLRenderbuffer +#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_fn +inline cl_mem clCreateFromGLRenderbuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, cl_int* p3) { return clCreateFromGLRenderbuffer_pfn(p0, p1, p2, p3); } +#undef clCreateFromGLTexture +#define clCreateFromGLTexture clCreateFromGLTexture_fn +inline cl_mem clCreateFromGLTexture(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateFromGLTexture2D +#define clCreateFromGLTexture2D clCreateFromGLTexture2D_fn +inline cl_mem clCreateFromGLTexture2D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture2D_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateFromGLTexture3D +#define clCreateFromGLTexture3D clCreateFromGLTexture3D_fn +inline cl_mem clCreateFromGLTexture3D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture3D_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueAcquireGLObjects +#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_fn +inline cl_int clEnqueueAcquireGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueAcquireGLObjects_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueReleaseGLObjects +#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_fn +inline cl_int clEnqueueReleaseGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueReleaseGLObjects_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetGLContextInfoKHR +#define clGetGLContextInfoKHR clGetGLContextInfoKHR_fn +inline cl_int clGetGLContextInfoKHR(const cl_context_properties* p0, cl_gl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLContextInfoKHR_pfn(p0, p1, p2, p3, p4); } +#undef clGetGLObjectInfo +#define clGetGLObjectInfo clGetGLObjectInfo_fn +inline cl_int clGetGLObjectInfo(cl_mem p0, cl_gl_object_type* p1, cl_GLuint* p2) { return clGetGLObjectInfo_pfn(p0, p1, p2); } +#undef clGetGLTextureInfo +#define clGetGLTextureInfo clGetGLTextureInfo_fn +inline cl_int clGetGLTextureInfo(cl_mem p0, cl_gl_texture_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLTextureInfo_pfn(p0, p1, p2, p3, p4); } + +#endif // cl_khr_gl_sharing diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_clblas.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_clblas.hpp new file mode 100644 index 0000000..ccddf8f --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_clblas.hpp @@ -0,0 +1,53 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP +#define OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP + +#ifdef HAVE_CLAMDBLAS + +#include "opencl_core.hpp" + +#include "autogenerated/opencl_clblas.hpp" + +#endif // HAVE_CLAMDBLAS + +#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_clfft.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_clfft.hpp new file mode 100644 index 0000000..7f4af5e --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_clfft.hpp @@ -0,0 +1,53 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP +#define OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP + +#ifdef HAVE_CLAMDFFT + +#include "opencl_core.hpp" + +#include "autogenerated/opencl_clfft.hpp" + +#endif // HAVE_CLAMDFFT + +#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_core.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_core.hpp new file mode 100644 index 0000000..0404b31 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_core.hpp @@ -0,0 +1,84 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP + +#ifdef HAVE_OPENCL + +#ifndef CL_RUNTIME_EXPORT +#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_CORE_SHARED)) && (defined _WIN32 || defined WINCE) && \ + !(defined(__OPENCV_BUILD) && defined(OPENCV_MODULE_IS_PART_OF_WORLD)) +#define CL_RUNTIME_EXPORT __declspec(dllimport) +#else +#define CL_RUNTIME_EXPORT +#endif +#endif + +#ifdef HAVE_OPENCL_SVM +#define clSVMAlloc clSVMAlloc_ +#define clSVMFree clSVMFree_ +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_ +#define clSetKernelExecInfo clSetKernelExecInfo_ +#define clEnqueueSVMFree clEnqueueSVMFree_ +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_ +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_ +#define clEnqueueSVMMap clEnqueueSVMMap_ +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_ +#endif + +#include "autogenerated/opencl_core.hpp" + +#ifndef CL_DEVICE_DOUBLE_FP_CONFIG +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif + +#ifndef CL_DEVICE_HALF_FP_CONFIG +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 +#endif + +#ifndef CL_VERSION_1_2 +#define CV_REQUIRE_OPENCL_1_2_ERROR CV_Error(cv::Error::OpenCLApiCallError, "OpenCV compiled without OpenCL v1.2 support, so we can't use functionality from OpenCL v1.2") +#endif + +#endif // HAVE_OPENCL + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp new file mode 100644 index 0000000..38fcae9 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp @@ -0,0 +1,47 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP + +#include "autogenerated/opencl_core_wrappers.hpp" + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_gl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_gl.hpp new file mode 100644 index 0000000..659c7d8 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_gl.hpp @@ -0,0 +1,53 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP + +#if defined HAVE_OPENCL && defined HAVE_OPENGL + +#include "opencl_core.hpp" + +#include "autogenerated/opencl_gl.hpp" + +#endif // defined HAVE_OPENCL && defined HAVE_OPENGL + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp new file mode 100644 index 0000000..9700004 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp @@ -0,0 +1,47 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP + +#include "autogenerated/opencl_gl_wrappers.hpp" + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp new file mode 100644 index 0000000..9636b19 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp @@ -0,0 +1,48 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#include "opencl_svm_definitions.hpp" + +#undef clSVMAlloc +#define clSVMAlloc clSVMAlloc_pfn +#undef clSVMFree +#define clSVMFree clSVMFree_pfn +#undef clSetKernelArgSVMPointer +#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn +#undef clSetKernelExecInfo +//#define clSetKernelExecInfo clSetKernelExecInfo_pfn +#undef clEnqueueSVMFree +//#define clEnqueueSVMFree clEnqueueSVMFree_pfn +#undef clEnqueueSVMMemcpy +#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn +#undef clEnqueueSVMMemFill +#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn +#undef clEnqueueSVMMap +#define clEnqueueSVMMap clEnqueueSVMMap_pfn +#undef clEnqueueSVMUnmap +#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn + +extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment); +extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value); +//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value); +//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[], +// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data, +// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); +extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); + +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp new file mode 100644 index 0000000..97c927b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp @@ -0,0 +1,42 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP + +#if defined(HAVE_OPENCL_SVM) +#if defined(CL_VERSION_2_0) + +// OpenCL 2.0 contains SVM definitions + +#else + +typedef cl_bitfield cl_device_svm_capabilities; +typedef cl_bitfield cl_svm_mem_flags; +typedef cl_uint cl_kernel_exec_info; + +// +// TODO Add real values after OpenCL 2.0 release +// + +#ifndef CL_DEVICE_SVM_CAPABILITIES +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 + +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) +#endif + +#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) +#endif + +#ifndef CL_MEM_SVM_ATOMICS +#define CL_MEM_SVM_ATOMICS (1 << 11) +#endif + + +#endif // CL_VERSION_2_0 +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp new file mode 100644 index 0000000..497bc3d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp @@ -0,0 +1,166 @@ +/* See LICENSE file in the root OpenCV directory */ + +#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP +#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP + +#if defined(HAVE_OPENCL_SVM) +#include "opencl_core.hpp" + +#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD +// +// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package. +// Below is the original copyright. +// +/******************************************************************************* + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/******************************************* + * Shared Virtual Memory (SVM) extension + *******************************************/ +typedef cl_bitfield cl_device_svm_capabilities_amd; +typedef cl_bitfield cl_svm_mem_flags_amd; +typedef cl_uint cl_kernel_exec_info_amd; + +/* cl_device_info */ +#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054 + +/* cl_device_svm_capabilities_amd */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3) + +/* cl_svm_mem_flags_amd */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10) +#define CL_MEM_SVM_ATOMICS_AMD (1 << 11) + +/* cl_mem_info */ +#define CL_MEM_USES_SVM_POINTER_AMD 0x1109 + +/* cl_kernel_exec_info_amd */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7 + +/* cl_command_type */ +#define CL_COMMAND_SVM_FREE_AMD 0x1209 +#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A +#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B +#define CL_COMMAND_SVM_MAP_AMD 0x120C +#define CL_COMMAND_SVM_UNMAP_AMD 0x120D + +typedef CL_API_ENTRY void* +(CL_API_CALL * clSVMAllocAMD_fn)( + cl_context /* context */, + cl_svm_mem_flags_amd /* flags */, + size_t /* size */, + unsigned int /* alignment */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY void +(CL_API_CALL * clSVMFreeAMD_fn)( + cl_context /* context */, + void* /* svm_pointer */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMFreeAMD_fn)( + cl_command_queue /* command_queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void (CL_CALLBACK *)( /*pfn_free_func*/ + cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void** /* svm_pointers */, + void* /* user_data */), + void* /* user_data */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_copy */, + void* /* dst_ptr */, + const void* /* src_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + const void* /* pattern */, + size_t /* pattern_size */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMMapAMD_fn)( + cl_command_queue /* command_queue */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + void* /* svm_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)( + cl_command_queue /* command_queue */, + void* /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)( + cl_kernel /* kernel */, + cl_uint /* arg_index */, + const void * /* arg_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clSetKernelExecInfoAMD_fn)( + cl_kernel /* kernel */, + cl_kernel_exec_info_amd /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */ +) CL_EXT_SUFFIX__VERSION_1_2; + +#endif + +#endif // HAVE_OPENCL_SVM + +#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opengl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opengl.hpp new file mode 100644 index 0000000..a311ce2 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/opengl.hpp @@ -0,0 +1,725 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OPENGL_HPP +#define OPENCV_CORE_OPENGL_HPP + +#ifndef __cplusplus +# error opengl.hpp header must be compiled as C++ +#endif + +#include "opencv2/core.hpp" +#include "ocl.hpp" + +namespace cv { namespace ogl { + +/** @addtogroup core_opengl +This section describes OpenGL interoperability. + +To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is +supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not +supported). For GTK backend gtkglext-1.0 library is required. + +To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can +do this with namedWindow function or with other OpenGL toolkit (GLUT, for example). +*/ +//! @{ + +/////////////////// OpenGL Objects /////////////////// + +/** @brief Smart pointer for OpenGL buffer object with reference counting. + +Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL +context. These can be used to store vertex data, pixel data retrieved from images or the +framebuffer, and a variety of other things. + +ogl::Buffer has interface similar with Mat interface and represents 2D array memory. + +ogl::Buffer supports memory transfers between host and device and also can be mapped to CUDA memory. + */ +class CV_EXPORTS Buffer +{ +public: + /** @brief The target defines how you intend to use the buffer object. + */ + enum Target + { + ARRAY_BUFFER = 0x8892, //!< The buffer will be used as a source for vertex data + ELEMENT_ARRAY_BUFFER = 0x8893, //!< The buffer will be used for indices (in glDrawElements, for example) + PIXEL_PACK_BUFFER = 0x88EB, //!< The buffer will be used for reading from OpenGL textures + PIXEL_UNPACK_BUFFER = 0x88EC //!< The buffer will be used for writing to OpenGL textures + }; + + enum Access + { + READ_ONLY = 0x88B8, + WRITE_ONLY = 0x88B9, + READ_WRITE = 0x88BA + }; + + /** @brief The constructors. + + Creates empty ogl::Buffer object, creates ogl::Buffer object from existed buffer ( abufId + parameter), allocates memory for ogl::Buffer object or copies from host/device memory. + */ + Buffer(); + + /** @overload + @param arows Number of rows in a 2D array. + @param acols Number of columns in a 2D array. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param abufId Buffer object name. + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param abufId Buffer object name. + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false); + + /** @overload + @param arows Number of rows in a 2D array. + @param acols Number of columns in a 2D array. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ). + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + explicit Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @brief Allocates memory for ogl::Buffer object. + + @param arows Number of rows in a 2D array. + @param acols Number of columns in a 2D array. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details. + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @brief Decrements the reference counter and destroys the buffer object if needed. + + The function will call setAutoRelease(true) . + */ + void release(); + + /** @brief Sets auto release mode. + + The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was + bound to a window it could be released at any time (user can close a window). If object's destructor + is called after destruction of the context it will cause an error. Thus ogl::Buffer doesn't destroy + OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context). + This function can force ogl::Buffer destructor to destroy OpenGL object. + @param flag Auto release mode (if true, release will be called in object's destructor). + */ + void setAutoRelease(bool flag); + + /** @brief Copies from host/device memory to OpenGL buffer. + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ). + @param target Buffer usage. See cv::ogl::Buffer::Target . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @overload */ + void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false); + + /** @brief Copies from OpenGL buffer to host/device memory or another OpenGL buffer object. + + @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , std::vector or + ogl::Buffer ). + */ + void copyTo(OutputArray arr) const; + + /** @overload */ + void copyTo(OutputArray arr, cuda::Stream& stream) const; + + /** @brief Creates a full copy of the buffer object and the underlying data. + + @param target Buffer usage for destination buffer. + @param autoRelease Auto release mode for destination buffer. + */ + Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const; + + /** @brief Binds OpenGL buffer to the specified buffer binding point. + + @param target Binding point. See cv::ogl::Buffer::Target . + */ + void bind(Target target) const; + + /** @brief Unbind any buffers from the specified binding point. + + @param target Binding point. See cv::ogl::Buffer::Target . + */ + static void unbind(Target target); + + /** @brief Maps OpenGL buffer to host memory. + + mapHost maps to the client's address space the entire data store of the buffer object. The data can + then be directly read and/or written relative to the returned pointer, depending on the specified + access policy. + + A mapped data store must be unmapped with ogl::Buffer::unmapHost before its buffer object is used. + + This operation can lead to memory transfers between host and device. + + Only one buffer object can be mapped at a time. + @param access Access policy, indicating whether it will be possible to read from, write to, or both + read from and write to the buffer object's mapped data store. The symbolic constant must be + ogl::Buffer::READ_ONLY , ogl::Buffer::WRITE_ONLY or ogl::Buffer::READ_WRITE . + */ + Mat mapHost(Access access); + + /** @brief Unmaps OpenGL buffer. + */ + void unmapHost(); + + //! map to device memory (blocking) + cuda::GpuMat mapDevice(); + void unmapDevice(); + + /** @brief Maps OpenGL buffer to CUDA device memory. + + This operation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time. + + A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used. + */ + cuda::GpuMat mapDevice(cuda::Stream& stream); + + /** @brief Unmaps OpenGL buffer. + */ + void unmapDevice(cuda::Stream& stream); + + int rows() const; + int cols() const; + Size size() const; + bool empty() const; + + int type() const; + int depth() const; + int channels() const; + int elemSize() const; + int elemSize1() const; + + //! get OpenGL opject id + unsigned int bufId() const; + + class Impl; + +private: + Ptr impl_; + int rows_; + int cols_; + int type_; +}; + +/** @brief Smart pointer for OpenGL 2D texture memory with reference counting. + */ +class CV_EXPORTS Texture2D +{ +public: + /** @brief An Image Format describes the way that the images in Textures store their data. + */ + enum Format + { + NONE = 0, + DEPTH_COMPONENT = 0x1902, //!< Depth + RGB = 0x1907, //!< Red, Green, Blue + RGBA = 0x1908 //!< Red, Green, Blue, Alpha + }; + + /** @brief The constructors. + + Creates empty ogl::Texture2D object, allocates memory for ogl::Texture2D object or copies from + host/device memory. + */ + Texture2D(); + + /** @overload */ + Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false); + + /** @overload */ + Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false); + + /** @overload + @param arows Number of rows. + @param acols Number of columns. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Texture2D(int arows, int acols, Format aformat, bool autoRelease = false); + + /** @overload + @param asize 2D array size. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + Texture2D(Size asize, Format aformat, bool autoRelease = false); + + /** @overload + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ). + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + explicit Texture2D(InputArray arr, bool autoRelease = false); + + /** @brief Allocates memory for ogl::Texture2D object. + + @param arows Number of rows. + @param acols Number of columns. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(int arows, int acols, Format aformat, bool autoRelease = false); + /** @overload + @param asize 2D array size. + @param aformat Image format. See cv::ogl::Texture2D::Format . + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void create(Size asize, Format aformat, bool autoRelease = false); + + /** @brief Decrements the reference counter and destroys the texture object if needed. + + The function will call setAutoRelease(true) . + */ + void release(); + + /** @brief Sets auto release mode. + + @param flag Auto release mode (if true, release will be called in object's destructor). + + The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was + bound to a window it could be released at any time (user can close a window). If object's destructor + is called after destruction of the context it will cause an error. Thus ogl::Texture2D doesn't + destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL + context). This function can force ogl::Texture2D destructor to destroy OpenGL object. + */ + void setAutoRelease(bool flag); + + /** @brief Copies from host/device memory to OpenGL texture. + + @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ). + @param autoRelease Auto release mode (if true, release will be called in object's destructor). + */ + void copyFrom(InputArray arr, bool autoRelease = false); + + /** @brief Copies from OpenGL texture to host/device memory or another OpenGL texture object. + + @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , ogl::Buffer or + ogl::Texture2D ). + @param ddepth Destination depth. + @param autoRelease Auto release mode for destination buffer (if arr is OpenGL buffer or texture). + */ + void copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const; + + /** @brief Binds texture to current active texture unit for GL_TEXTURE_2D target. + */ + void bind() const; + + int rows() const; + int cols() const; + Size size() const; + bool empty() const; + + Format format() const; + + //! get OpenGL opject id + unsigned int texId() const; + + class Impl; + +private: + Ptr impl_; + int rows_; + int cols_; + Format format_; +}; + +/** @brief Wrapper for OpenGL Client-Side Vertex arrays. + +ogl::Arrays stores vertex data in ogl::Buffer objects. + */ +class CV_EXPORTS Arrays +{ +public: + /** @brief Default constructor + */ + Arrays(); + + /** @brief Sets an array of vertex coordinates. + @param vertex array with vertex coordinates, can be both host and device memory. + */ + void setVertexArray(InputArray vertex); + + /** @brief Resets vertex coordinates. + */ + void resetVertexArray(); + + /** @brief Sets an array of vertex colors. + @param color array with vertex colors, can be both host and device memory. + */ + void setColorArray(InputArray color); + + /** @brief Resets vertex colors. + */ + void resetColorArray(); + + /** @brief Sets an array of vertex normals. + @param normal array with vertex normals, can be both host and device memory. + */ + void setNormalArray(InputArray normal); + + /** @brief Resets vertex normals. + */ + void resetNormalArray(); + + /** @brief Sets an array of vertex texture coordinates. + @param texCoord array with vertex texture coordinates, can be both host and device memory. + */ + void setTexCoordArray(InputArray texCoord); + + /** @brief Resets vertex texture coordinates. + */ + void resetTexCoordArray(); + + /** @brief Releases all inner buffers. + */ + void release(); + + /** @brief Sets auto release mode all inner buffers. + @param flag Auto release mode. + */ + void setAutoRelease(bool flag); + + /** @brief Binds all vertex arrays. + */ + void bind() const; + + /** @brief Returns the vertex count. + */ + int size() const; + bool empty() const; + +private: + int size_; + Buffer vertex_; + Buffer color_; + Buffer normal_; + Buffer texCoord_; +}; + +/////////////////// Render Functions /////////////////// + +//! render mode +enum RenderModes { + POINTS = 0x0000, + LINES = 0x0001, + LINE_LOOP = 0x0002, + LINE_STRIP = 0x0003, + TRIANGLES = 0x0004, + TRIANGLE_STRIP = 0x0005, + TRIANGLE_FAN = 0x0006, + QUADS = 0x0007, + QUAD_STRIP = 0x0008, + POLYGON = 0x0009 +}; + +/** @brief Render OpenGL texture or primitives. +@param tex Texture to draw. +@param wndRect Region of window, where to draw a texture (normalized coordinates). +@param texRect Region of texture to draw (normalized coordinates). + */ +CV_EXPORTS void render(const Texture2D& tex, + Rect_ wndRect = Rect_(0.0, 0.0, 1.0, 1.0), + Rect_ texRect = Rect_(0.0, 0.0, 1.0, 1.0)); + +/** @overload +@param arr Array of privitives vertices. +@param mode Render mode. One of cv::ogl::RenderModes +@param color Color for all vertices. Will be used if arr doesn't contain color array. +*/ +CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255)); + +/** @overload +@param arr Array of privitives vertices. +@param indices Array of vertices indices (host or device memory). +@param mode Render mode. One of cv::ogl::RenderModes +@param color Color for all vertices. Will be used if arr doesn't contain color array. +*/ +CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255)); + +/////////////////// CL-GL Interoperability Functions /////////////////// + +namespace ocl { +using namespace cv::ocl; + +// TODO static functions in the Context class +/** @brief Creates OpenCL context from GL. +@return Returns reference to OpenCL Context + */ +CV_EXPORTS Context& initializeContextFromGL(); + +} // namespace cv::ogl::ocl + +/** @brief Converts InputArray to Texture2D object. +@param src - source InputArray. +@param texture - destination Texture2D object. + */ +CV_EXPORTS void convertToGLTexture2D(InputArray src, Texture2D& texture); + +/** @brief Converts Texture2D object to OutputArray. +@param texture - source Texture2D object. +@param dst - destination OutputArray. + */ +CV_EXPORTS void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst); + +/** @brief Maps Buffer object to process on CL side (convert to UMat). + +Function creates CL buffer from GL one, and then constructs UMat that can be used +to process buffer data with OpenCV functions. Note that in current implementation +UMat constructed this way doesn't own corresponding GL buffer object, so it is +the user responsibility to close down CL/GL buffers relationships by explicitly +calling unmapGLBuffer() function. +@param buffer - source Buffer object. +@param accessFlags - data access flags (ACCESS_READ|ACCESS_WRITE). +@return Returns UMat object + */ +CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, AccessFlag accessFlags = ACCESS_READ | ACCESS_WRITE); + +/** @brief Unmaps Buffer object (releases UMat, previously mapped from Buffer). + +Function must be called explicitly by the user for each UMat previously constructed +by the call to mapGLBuffer() function. +@param u - source UMat, created by mapGLBuffer(). + */ +CV_EXPORTS void unmapGLBuffer(UMat& u); + +//! @} +}} // namespace cv::ogl + +namespace cv { namespace cuda { + +/** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability. + +This function should be explicitly called after OpenGL context creation and before any CUDA calls. +@param device System index of a CUDA device starting with 0. +@ingroup core_opengl + */ +CV_EXPORTS void setGlDevice(int device = 0); + +}} + +//! @cond IGNORED + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +inline +cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0) +{ + create(arows, acols, atype, target, autoRelease); +} + +inline +cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0) +{ + create(asize, atype, target, autoRelease); +} + +inline +void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease) +{ + create(asize.height, asize.width, atype, target, autoRelease); +} + +inline +int cv::ogl::Buffer::rows() const +{ + return rows_; +} + +inline +int cv::ogl::Buffer::cols() const +{ + return cols_; +} + +inline +cv::Size cv::ogl::Buffer::size() const +{ + return Size(cols_, rows_); +} + +inline +bool cv::ogl::Buffer::empty() const +{ + return rows_ == 0 || cols_ == 0; +} + +inline +int cv::ogl::Buffer::type() const +{ + return type_; +} + +inline +int cv::ogl::Buffer::depth() const +{ + return CV_MAT_DEPTH(type_); +} + +inline +int cv::ogl::Buffer::channels() const +{ + return CV_MAT_CN(type_); +} + +inline +int cv::ogl::Buffer::elemSize() const +{ + return CV_ELEM_SIZE(type_); +} + +inline +int cv::ogl::Buffer::elemSize1() const +{ + return CV_ELEM_SIZE1(type_); +} + +/////// + +inline +cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE) +{ + create(arows, acols, aformat, autoRelease); +} + +inline +cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE) +{ + create(asize, aformat, autoRelease); +} + +inline +void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease) +{ + create(asize.height, asize.width, aformat, autoRelease); +} + +inline +int cv::ogl::Texture2D::rows() const +{ + return rows_; +} + +inline +int cv::ogl::Texture2D::cols() const +{ + return cols_; +} + +inline +cv::Size cv::ogl::Texture2D::size() const +{ + return Size(cols_, rows_); +} + +inline +bool cv::ogl::Texture2D::empty() const +{ + return rows_ == 0 || cols_ == 0; +} + +inline +cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const +{ + return format_; +} + +/////// + +inline +cv::ogl::Arrays::Arrays() : size_(0) +{ +} + +inline +int cv::ogl::Arrays::size() const +{ + return size_; +} + +inline +bool cv::ogl::Arrays::empty() const +{ + return size_ == 0; +} + +//! @endcond + +#endif /* OPENCV_CORE_OPENGL_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/operations.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/operations.hpp new file mode 100644 index 0000000..43a9eb8 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/operations.hpp @@ -0,0 +1,610 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_OPERATIONS_HPP +#define OPENCV_CORE_OPERATIONS_HPP + +#ifndef __cplusplus +# error operations.hpp header must be compiled as C++ +#endif + +#include + +#if defined(__GNUC__) || defined(__clang__) // at least GCC 3.1+, clang 3.5+ +# if defined(__MINGW_PRINTF_FORMAT) // https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/. +# define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (__MINGW_PRINTF_FORMAT, string_idx, first_to_check))) +# else +# define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (printf, string_idx, first_to_check))) +# endif +#else +# define CV_FORMAT_PRINTF(A, B) +#endif + +//! @cond IGNORED + +namespace cv +{ + +////////////////////////////// Matx methods depending on core API ///////////////////////////// + +namespace internal +{ + +template struct Matx_FastInvOp +{ + bool operator()(const Matx<_Tp, m, n>& a, Matx<_Tp, n, m>& b, int method) const + { + return invert(a, b, method) != 0; + } +}; + +template struct Matx_FastInvOp<_Tp, m, m> +{ + bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const + { + if (method == DECOMP_LU || method == DECOMP_CHOLESKY) + { + Matx<_Tp, m, m> temp = a; + + // assume that b is all 0's on input => make it a unity matrix + for (int i = 0; i < m; i++) + b(i, i) = (_Tp)1; + + if (method == DECOMP_CHOLESKY) + return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m); + + return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0; + } + else + { + return invert(a, b, method) != 0; + } + } +}; + +template struct Matx_FastInvOp<_Tp, 2, 2> +{ + bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int /*method*/) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + b(1,1) = a(0,0)*d; + b(0,0) = a(1,1)*d; + b(0,1) = -a(0,1)*d; + b(1,0) = -a(1,0)*d; + return true; + } +}; + +template struct Matx_FastInvOp<_Tp, 3, 3> +{ + bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int /*method*/) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d; + b(0,1) = (a(0,2) * a(2,1) - a(0,1) * a(2,2)) * d; + b(0,2) = (a(0,1) * a(1,2) - a(0,2) * a(1,1)) * d; + + b(1,0) = (a(1,2) * a(2,0) - a(1,0) * a(2,2)) * d; + b(1,1) = (a(0,0) * a(2,2) - a(0,2) * a(2,0)) * d; + b(1,2) = (a(0,2) * a(1,0) - a(0,0) * a(1,2)) * d; + + b(2,0) = (a(1,0) * a(2,1) - a(1,1) * a(2,0)) * d; + b(2,1) = (a(0,1) * a(2,0) - a(0,0) * a(2,1)) * d; + b(2,2) = (a(0,0) * a(1,1) - a(0,1) * a(1,0)) * d; + return true; + } +}; + + +template struct Matx_FastSolveOp +{ + bool operator()(const Matx<_Tp, m, l>& a, const Matx<_Tp, m, n>& b, + Matx<_Tp, l, n>& x, int method) const + { + return cv::solve(a, b, x, method); + } +}; + +template struct Matx_FastSolveOp<_Tp, m, m, n> +{ + bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b, + Matx<_Tp, m, n>& x, int method) const + { + if (method == DECOMP_LU || method == DECOMP_CHOLESKY) + { + Matx<_Tp, m, m> temp = a; + x = b; + if( method == DECOMP_CHOLESKY ) + return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n); + + return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0; + } + else + { + return cv::solve(a, b, x, method); + } + } +}; + +template struct Matx_FastSolveOp<_Tp, 2, 2, 1> +{ + bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b, + Matx<_Tp, 2, 1>& x, int) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d; + x(1) = (b(1)*a(0,0) - b(0)*a(1,0))*d; + return true; + } +}; + +template struct Matx_FastSolveOp<_Tp, 3, 3, 1> +{ + bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b, + Matx<_Tp, 3, 1>& x, int) const + { + _Tp d = (_Tp)determinant(a); + if (d == 0) + return false; + d = 1/d; + x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) - + a(0,1)*(b(1)*a(2,2) - a(1,2)*b(2)) + + a(0,2)*(b(1)*a(2,1) - a(1,1)*b(2))); + + x(1) = d*(a(0,0)*(b(1)*a(2,2) - a(1,2)*b(2)) - + b(0)*(a(1,0)*a(2,2) - a(1,2)*a(2,0)) + + a(0,2)*(a(1,0)*b(2) - b(1)*a(2,0))); + + x(2) = d*(a(0,0)*(a(1,1)*b(2) - b(1)*a(2,1)) - + a(0,1)*(a(1,0)*b(2) - b(1)*a(2,0)) + + b(0)*(a(1,0)*a(2,1) - a(1,1)*a(2,0))); + return true; + } +}; + +} // internal + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::randu(_Tp a, _Tp b) +{ + Matx<_Tp,m,n> M; + cv::randu(M, Scalar(a), Scalar(b)); + return M; +} + +template inline +Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b) +{ + Matx<_Tp,m,n> M; + cv::randn(M, Scalar(a), Scalar(b)); + return M; +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randu(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randu(V, Scalar(a), Scalar(b)); + return V; +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randn(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randn(V, Scalar(a), Scalar(b)); + return V; +} + +template inline +Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const +{ + Matx<_Tp, n, m> b; + bool ok = cv::internal::Matx_FastInvOp<_Tp, m, n>()(*this, b, method); + if (p_is_ok) *p_is_ok = ok; + return ok ? b : Matx<_Tp, n, m>::zeros(); +} + +template template inline +Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const +{ + Matx<_Tp, n, l> x; + bool ok = cv::internal::Matx_FastSolveOp<_Tp, m, n, l>()(*this, rhs, x, method); + return ok ? x : Matx<_Tp, n, l>::zeros(); +} + + + +////////////////////////// Augmenting algebraic & logical operations ////////////////////////// + +#define CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \ + static inline A& operator op (A& a, const B& b) { cvop; return a; } + +#define CV_MAT_AUG_OPERATOR(op, cvop, A, B) \ + CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \ + CV_MAT_AUG_OPERATOR1(op, cvop, const A, B) + +#define CV_MAT_AUG_OPERATOR_T(op, cvop, A, B) \ + template CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \ + template CV_MAT_AUG_OPERATOR1(op, cvop, const A, B) + +#define CV_MAT_AUG_OPERATOR_TN(op, cvop, A) \ + template static inline A& operator op (A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } \ + template static inline const A& operator op (const A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } + +CV_MAT_AUG_OPERATOR (+=, cv::add(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (+=, cv::add(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (-=, cv::subtract(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (-=, cv::subtract(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat) +CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR (*=, a.convertTo(a, -1, b), Mat, double) +CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double) +CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat) +CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (/=, cv::divide(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double) +CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double) +CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Mat) +CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Scalar) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Scalar) +CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>) +CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat) +CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat_<_Tp>) + +#undef CV_MAT_AUG_OPERATOR_TN +#undef CV_MAT_AUG_OPERATOR_T +#undef CV_MAT_AUG_OPERATOR +#undef CV_MAT_AUG_OPERATOR1 + + + +///////////////////////////////////////////// SVD ///////////////////////////////////////////// + +inline SVD::SVD() {} +inline SVD::SVD( InputArray m, int flags ) { operator ()(m, flags); } +inline void SVD::solveZ( InputArray m, OutputArray _dst ) +{ + Mat mtx = m.getMat(); + SVD svd(mtx, (mtx.rows >= mtx.cols ? 0 : SVD::FULL_UV)); + _dst.create(svd.vt.cols, 1, svd.vt.type()); + Mat dst = _dst.getMat(); + svd.vt.row(svd.vt.rows-1).reshape(1,svd.vt.cols).copyTo(dst); +} + +template inline void + SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt ) +{ + CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector."); + Mat _a(a, false), _u(u, false), _w(w, false), _vt(vt, false); + SVD::compute(_a, _w, _u, _vt); + CV_Assert(_w.data == (uchar*)&w.val[0] && _u.data == (uchar*)&u.val[0] && _vt.data == (uchar*)&vt.val[0]); +} + +template inline void +SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w ) +{ + CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector."); + Mat _a(a, false), _w(w, false); + SVD::compute(_a, _w); + CV_Assert(_w.data == (uchar*)&w.val[0]); +} + +template inline void +SVD::backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u, + const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs, + Matx<_Tp, n, nb>& dst ) +{ + CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector."); + Mat _u(u, false), _w(w, false), _vt(vt, false), _rhs(rhs, false), _dst(dst, false); + SVD::backSubst(_w, _u, _vt, _rhs, _dst); + CV_Assert(_dst.data == (uchar*)&dst.val[0]); +} + + + +/////////////////////////////////// Multiply-with-Carry RNG /////////////////////////////////// + +inline RNG::RNG() { state = 0xffffffff; } +inline RNG::RNG(uint64 _state) { state = _state ? _state : 0xffffffff; } + +inline RNG::operator uchar() { return (uchar)next(); } +inline RNG::operator schar() { return (schar)next(); } +inline RNG::operator ushort() { return (ushort)next(); } +inline RNG::operator short() { return (short)next(); } +inline RNG::operator int() { return (int)next(); } +inline RNG::operator unsigned() { return next(); } +inline RNG::operator float() { return next()*2.3283064365386962890625e-10f; } +inline RNG::operator double() { unsigned t = next(); return (((uint64)t << 32) | next()) * 5.4210108624275221700372640043497e-20; } + +inline unsigned RNG::operator ()(unsigned N) { return (unsigned)uniform(0,N); } +inline unsigned RNG::operator ()() { return next(); } + +inline int RNG::uniform(int a, int b) { return a == b ? a : (int)(next() % (b - a) + a); } +inline float RNG::uniform(float a, float b) { return ((float)*this)*(b - a) + a; } +inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; } + +inline bool RNG::operator ==(const RNG& other) const { return state == other.state; } + +inline unsigned RNG::next() +{ + state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32); + return (unsigned)state; +} + +//! returns the next uniformly-distributed random number of the specified type +template static inline _Tp randu() +{ + return (_Tp)theRNG(); +} + +///////////////////////////////// Formatted string generation ///////////////////////////////// + +/** @brief Returns a text string formatted using the printf-like expression. + +The function acts like sprintf but forms and returns an STL string. It can be used to form an error +message in the Exception constructor. +@param fmt printf-compatible formatting specifiers. + +**Note**: +|Type|Specifier| +|-|-| +|`const char*`|`%s`| +|`char`|`%c`| +|`float` / `double`|`%f`,`%g`| +|`int`, `long`, `long long`|`%d`, `%ld`, ``%lld`| +|`unsigned`, `unsigned long`, `unsigned long long`|`%u`, `%lu`, `%llu`| +|`uint64` -> `uintmax_t`, `int64` -> `intmax_t`|`%ju`, `%jd`| +|`size_t`|`%zu`| + */ +CV_EXPORTS String format( const char* fmt, ... ) CV_FORMAT_PRINTF(1, 2); + +///////////////////////////////// Formatted output of cv::Mat ///////////////////////////////// + +static inline +Ptr format(InputArray mtx, Formatter::FormatType fmt) +{ + return Formatter::get(fmt)->format(mtx.getMat()); +} + +static inline +int print(Ptr fmtd, FILE* stream = stdout) +{ + int written = 0; + fmtd->reset(); + for(const char* str = fmtd->next(); str; str = fmtd->next()) + written += fputs(str, stream); + + return written; +} + +static inline +int print(const Mat& mtx, FILE* stream = stdout) +{ + return print(Formatter::get()->format(mtx), stream); +} + +static inline +int print(const UMat& mtx, FILE* stream = stdout) +{ + return print(Formatter::get()->format(mtx.getMat(ACCESS_READ)), stream); +} + +template static inline +int print(const std::vector >& vec, FILE* stream = stdout) +{ + return print(Formatter::get()->format(Mat(vec)), stream); +} + +template static inline +int print(const std::vector >& vec, FILE* stream = stdout) +{ + return print(Formatter::get()->format(Mat(vec)), stream); +} + +template static inline +int print(const Matx<_Tp, m, n>& matx, FILE* stream = stdout) +{ + return print(Formatter::get()->format(cv::Mat(matx)), stream); +} + +//! @endcond + +/****************************************************************************************\ +* Auxiliary algorithms * +\****************************************************************************************/ + +/** @brief Splits an element set into equivalency classes. + +The generic function partition implements an \f$O(N^2)\f$ algorithm for splitting a set of \f$N\f$ elements +into one or more equivalency classes, as described in + . The function returns the number of +equivalency classes. +@param _vec Set of elements stored as a vector. +@param labels Output vector of labels. It contains as many elements as vec. Each label labels[i] is +a 0-based cluster index of `vec[i]`. +@param predicate Equivalence predicate (pointer to a boolean function of two arguments or an +instance of the class that has the method bool operator()(const _Tp& a, const _Tp& b) ). The +predicate returns true when the elements are certainly in the same class, and returns false if they +may or may not be in the same class. +@ingroup core_cluster +*/ +template int +partition( const std::vector<_Tp>& _vec, std::vector& labels, + _EqPredicate predicate=_EqPredicate()) +{ + int i, j, N = (int)_vec.size(); + const _Tp* vec = &_vec[0]; + + const int PARENT=0; + const int RANK=1; + + std::vector _nodes(N*2); + int (*nodes)[2] = (int(*)[2])&_nodes[0]; + + // The first O(N) pass: create N single-vertex trees + for(i = 0; i < N; i++) + { + nodes[i][PARENT]=-1; + nodes[i][RANK] = 0; + } + + // The main O(N^2) pass: merge connected components + for( i = 0; i < N; i++ ) + { + int root = i; + + // find root + while( nodes[root][PARENT] >= 0 ) + root = nodes[root][PARENT]; + + for( j = 0; j < N; j++ ) + { + if( i == j || !predicate(vec[i], vec[j])) + continue; + int root2 = j; + + while( nodes[root2][PARENT] >= 0 ) + root2 = nodes[root2][PARENT]; + + if( root2 != root ) + { + // unite both trees + int rank = nodes[root][RANK], rank2 = nodes[root2][RANK]; + if( rank > rank2 ) + nodes[root2][PARENT] = root; + else + { + nodes[root][PARENT] = root2; + nodes[root2][RANK] += rank == rank2; + root = root2; + } + CV_Assert( nodes[root][PARENT] < 0 ); + + int k = j, parent; + + // compress the path from node2 to root + while( (parent = nodes[k][PARENT]) >= 0 ) + { + nodes[k][PARENT] = root; + k = parent; + } + + // compress the path from node to root + k = i; + while( (parent = nodes[k][PARENT]) >= 0 ) + { + nodes[k][PARENT] = root; + k = parent; + } + } + } + } + + // Final O(N) pass: enumerate classes + labels.resize(N); + int nclasses = 0; + + for( i = 0; i < N; i++ ) + { + int root = i; + while( nodes[root][PARENT] >= 0 ) + root = nodes[root][PARENT]; + // re-use the rank as the class label + if( nodes[root][RANK] >= 0 ) + nodes[root][RANK] = ~nclasses++; + labels[i] = ~nodes[root][RANK]; + } + + return nclasses; +} + +} // cv + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/optim.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/optim.hpp new file mode 100644 index 0000000..f61a2b9 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/optim.hpp @@ -0,0 +1,302 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_OPTIM_HPP +#define OPENCV_OPTIM_HPP + +#include "opencv2/core.hpp" + +namespace cv +{ + +/** @addtogroup core_optim +The algorithms in this section minimize or maximize function value within specified constraints or +without any constraints. +@{ +*/ + +/** @brief Basic interface for all solvers + */ +class CV_EXPORTS MinProblemSolver : public Algorithm +{ +public: + /** @brief Represents function being optimized + */ + class CV_EXPORTS Function + { + public: + virtual ~Function() {} + virtual int getDims() const = 0; + virtual double getGradientEps() const; + virtual double calc(const double* x) const = 0; + virtual void getGradient(const double* x,double* grad); + }; + + /** @brief Getter for the optimized function. + + The optimized function is represented by Function interface, which requires derivatives to + implement the calc(double*) and getDim() methods to evaluate the function. + + @return Smart-pointer to an object that implements Function interface - it represents the + function that is being optimized. It can be empty, if no function was given so far. + */ + virtual Ptr getFunction() const = 0; + + /** @brief Setter for the optimized function. + + *It should be called at least once before the call to* minimize(), as default value is not usable. + + @param f The new function to optimize. + */ + virtual void setFunction(const Ptr& f) = 0; + + /** @brief Getter for the previously set terminal criteria for this algorithm. + + @return Deep copy of the terminal criteria used at the moment. + */ + virtual TermCriteria getTermCriteria() const = 0; + + /** @brief Set terminal criteria for solver. + + This method *is not necessary* to be called before the first call to minimize(), as the default + value is sensible. + + Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when + the function values at the vertices of simplex are within termcrit.epsilon range or simplex + becomes so small that it can enclosed in a box with termcrit.epsilon sides, whatever comes + first. + @param termcrit Terminal criteria to be used, represented as cv::TermCriteria structure. + */ + virtual void setTermCriteria(const TermCriteria& termcrit) = 0; + + /** @brief actually runs the algorithm and performs the minimization. + + The sole input parameter determines the centroid of the starting simplex (roughly, it tells + where to start), all the others (terminal criteria, initial step, function to be minimized) are + supposed to be set via the setters before the call to this method or the default values (not + always sensible) will be used. + + @param x The initial point, that will become a centroid of an initial simplex. After the algorithm + will terminate, it will be set to the point where the algorithm stops, the point of possible + minimum. + @return The value of a function at the point found. + */ + virtual double minimize(InputOutputArray x) = 0; +}; + +/** @brief This class is used to perform the non-linear non-constrained minimization of a function, + +defined on an `n`-dimensional Euclidean space, using the **Nelder-Mead method**, also known as +**downhill simplex method**. The basic idea about the method can be obtained from +. + +It should be noted, that this method, although deterministic, is rather a heuristic and therefore +may converge to a local minima, not necessary a global one. It is iterative optimization technique, +which at each step uses an information about the values of a function evaluated only at `n+1` +points, arranged as a *simplex* in `n`-dimensional space (hence the second name of the method). At +each step new point is chosen to evaluate function at, obtained value is compared with previous +ones and based on this information simplex changes it's shape , slowly moving to the local minimum. +Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear +Conjugate Gradient method (which is also implemented in optim). + +Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when the +function values at the vertices of simplex are within termcrit.epsilon range or simplex becomes so +small that it can enclosed in a box with termcrit.epsilon sides, whatever comes first, for some +defined by user positive integer termcrit.maxCount and positive non-integer termcrit.epsilon. + +@note DownhillSolver is a derivative of the abstract interface +cv::MinProblemSolver, which in turn is derived from the Algorithm interface and is used to +encapsulate the functionality, common to all non-linear optimization algorithms in the optim +module. + +@note term criteria should meet following condition: +@code + termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0 +@endcode + */ +class CV_EXPORTS DownhillSolver : public MinProblemSolver +{ +public: + /** @brief Returns the initial step that will be used in downhill simplex algorithm. + + @param step Initial step that will be used in algorithm. Note, that although corresponding setter + accepts column-vectors as well as row-vectors, this method will return a row-vector. + @see DownhillSolver::setInitStep + */ + virtual void getInitStep(OutputArray step) const=0; + + /** @brief Sets the initial step that will be used in downhill simplex algorithm. + + Step, together with initial point (given in DownhillSolver::minimize) are two `n`-dimensional + vectors that are used to determine the shape of initial simplex. Roughly said, initial point + determines the position of a simplex (it will become simplex's centroid), while step determines the + spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are + the initial step and initial point respectively, the vertices of a simplex will be: + \f$v_0:=x_0-\frac{1}{2} s\f$ and \f$v_i:=x_0+s_i\f$ for \f$i=1,2,\dots,n\f$ where \f$s_i\f$ denotes + projections of the initial step of *n*-th coordinate (the result of projection is treated to be + vector given by \f$s_i:=e_i\cdot\left\f$, where \f$e_i\f$ form canonical basis) + + @param step Initial step that will be used in algorithm. Roughly said, it determines the spread + (size in each dimension) of an initial simplex. + */ + virtual void setInitStep(InputArray step)=0; + + /** @brief This function returns the reference to the ready-to-use DownhillSolver object. + + All the parameters are optional, so this procedure can be called even without parameters at + all. In this case, the default values will be used. As default value for terminal criteria are + the only sensible ones, MinProblemSolver::setFunction() and DownhillSolver::setInitStep() + should be called upon the obtained object, if the respective parameters were not given to + create(). Otherwise, the two ways (give parameters to createDownhillSolver() or miss them out + and call the MinProblemSolver::setFunction() and DownhillSolver::setInitStep()) are absolutely + equivalent (and will drop the same errors in the same way, should invalid input be detected). + @param f Pointer to the function that will be minimized, similarly to the one you submit via + MinProblemSolver::setFunction. + @param initStep Initial step, that will be used to construct the initial simplex, similarly to the one + you submit via MinProblemSolver::setInitStep. + @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via + MinProblemSolver::setTermCriteria. + */ + static Ptr create(const Ptr& f=Ptr(), + InputArray initStep=Mat_(1,1,0.0), + TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001)); +}; + +/** @brief This class is used to perform the non-linear non-constrained minimization of a function +with known gradient, + +defined on an *n*-dimensional Euclidean space, using the **Nonlinear Conjugate Gradient method**. +The implementation was done based on the beautifully clear explanatory article [An Introduction to +the Conjugate Gradient Method Without the Agonizing +Pain](http://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf) by Jonathan Richard +Shewchuk. The method can be seen as an adaptation of a standard Conjugate Gradient method (see, for +example ) for numerically solving the +systems of linear equations. + +It should be noted, that this method, although deterministic, is rather a heuristic method and +therefore may converge to a local minima, not necessary a global one. What is even more disastrous, +most of its behaviour is ruled by gradient, therefore it essentially cannot distinguish between +local minima and maxima. Therefore, if it starts sufficiently near to the local maximum, it may +converge to it. Another obvious restriction is that it should be possible to compute the gradient of +a function at any point, thus it is preferable to have analytic expression for gradient and +computational burden should be born by the user. + +The latter responsibility is accomplished via the getGradient method of a +MinProblemSolver::Function interface (which represents function being optimized). This method takes +point a point in *n*-dimensional space (first argument represents the array of coordinates of that +point) and compute its gradient (it should be stored in the second argument as an array). + +@note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface. + +@note term criteria should meet following condition: +@code + termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0 + // or + termcrit.type == TermCriteria::MAX_ITER) && termcrit.maxCount > 0 +@endcode + */ +class CV_EXPORTS ConjGradSolver : public MinProblemSolver +{ +public: + /** @brief This function returns the reference to the ready-to-use ConjGradSolver object. + + All the parameters are optional, so this procedure can be called even without parameters at + all. In this case, the default values will be used. As default value for terminal criteria are + the only sensible ones, MinProblemSolver::setFunction() should be called upon the obtained + object, if the function was not given to create(). Otherwise, the two ways (submit it to + create() or miss it out and call the MinProblemSolver::setFunction()) are absolutely equivalent + (and will drop the same errors in the same way, should invalid input be detected). + @param f Pointer to the function that will be minimized, similarly to the one you submit via + MinProblemSolver::setFunction. + @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via + MinProblemSolver::setTermCriteria. + */ + static Ptr create(const Ptr& f=Ptr(), + TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001)); +}; + +//! return codes for cv::solveLP() function +enum SolveLPResult +{ + SOLVELP_UNBOUNDED = -2, //!< problem is unbounded (target function can achieve arbitrary high values) + SOLVELP_UNFEASIBLE = -1, //!< problem is unfeasible (there are no points that satisfy all the constraints imposed) + SOLVELP_SINGLE = 0, //!< there is only one maximum for target function + SOLVELP_MULTI = 1 //!< there are multiple maxima for target function - the arbitrary one is returned +}; + +/** @brief Solve given (non-integer) linear programming problem using the Simplex Algorithm (Simplex Method). + +What we mean here by "linear programming problem" (or LP problem, for short) can be formulated as: + +\f[\mbox{Maximize } c\cdot x\\ + \mbox{Subject to:}\\ + Ax\leq b\\ + x\geq 0\f] + +Where \f$c\f$ is fixed `1`-by-`n` row-vector, \f$A\f$ is fixed `m`-by-`n` matrix, \f$b\f$ is fixed `m`-by-`1` +column vector and \f$x\f$ is an arbitrary `n`-by-`1` column vector, which satisfies the constraints. + +Simplex algorithm is one of many algorithms that are designed to handle this sort of problems +efficiently. Although it is not optimal in theoretical sense (there exist algorithms that can solve +any problem written as above in polynomial time, while simplex method degenerates to exponential +time for some special cases), it is well-studied, easy to implement and is shown to work well for +real-life purposes. + +The particular implementation is taken almost verbatim from **Introduction to Algorithms, third +edition** by T. H. Cormen, C. E. Leiserson, R. L. Rivest and Clifford Stein. In particular, the +Bland's rule is used to prevent cycling. + +@param Func This row-vector corresponds to \f$c\f$ in the LP problem formulation (see above). It should +contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted, +in the latter case it is understood to correspond to \f$c^T\f$. +@param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above +and the remaining to \f$A\f$. It should contain 32- or 64-bit floating point numbers. +@param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the +formulation above. It will contain 64-bit floating point numbers. +@return One of cv::SolveLPResult + */ +CV_EXPORTS_W int solveLP(InputArray Func, InputArray Constr, OutputArray z); + +//! @} + +}// cv + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ovx.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ovx.hpp new file mode 100644 index 0000000..8bb7d54 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/ovx.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +// OpenVX related definitions and declarations + +#pragma once +#ifndef OPENCV_OVX_HPP +#define OPENCV_OVX_HPP + +#include "cvdef.h" + +namespace cv +{ +/// Check if use of OpenVX is possible +CV_EXPORTS_W bool haveOpenVX(); + +/// Check if use of OpenVX is enabled +CV_EXPORTS_W bool useOpenVX(); + +/// Enable/disable use of OpenVX +CV_EXPORTS_W void setUseOpenVX(bool flag); +} // namespace cv + +#endif // OPENCV_OVX_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp new file mode 100644 index 0000000..b172cac --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_FOR_OPENMP_HPP +#define OPENCV_CORE_PARALLEL_FOR_OPENMP_HPP + +#include "opencv2/core/parallel/parallel_backend.hpp" + +#if !defined(_OPENMP) && !defined(OPENCV_SKIP_OPENMP_PRESENSE_CHECK) +#error "This file must be compiled with enabled OpenMP" +#endif + +#include + +namespace cv { namespace parallel { namespace openmp { + +/** OpenMP parallel_for API implementation + * + * @sa setParallelForBackend + * @ingroup core_parallel_backend + */ +class ParallelForBackend : public ParallelForAPI +{ +protected: + int numThreads; + int numThreadsMax; +public: + ParallelForBackend() + { + numThreads = 0; + numThreadsMax = omp_get_max_threads(); + } + + virtual ~ParallelForBackend() {} + + virtual void parallel_for(int tasks, FN_parallel_for_body_cb_t body_callback, void* callback_data) CV_OVERRIDE + { +#pragma omp parallel for schedule(dynamic) num_threads(numThreads > 0 ? numThreads : numThreadsMax) + for (int i = 0; i < tasks; ++i) + body_callback(i, i + 1, callback_data); + } + + virtual int getThreadNum() const CV_OVERRIDE + { + return omp_get_thread_num(); + } + + virtual int getNumThreads() const CV_OVERRIDE + { + return numThreads > 0 + ? numThreads + : numThreadsMax; + } + + virtual int setNumThreads(int nThreads) CV_OVERRIDE + { + int oldNumThreads = numThreads; + numThreads = nThreads; + // nothing needed as numThreads is used in #pragma omp parallel for directly + return oldNumThreads; + } + + const char* getName() const CV_OVERRIDE + { + return "openmp"; + } +}; + +}}} // namespace + +#endif // OPENCV_CORE_PARALLEL_FOR_OPENMP_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp new file mode 100644 index 0000000..04b0c4c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp @@ -0,0 +1,153 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_FOR_TBB_HPP +#define OPENCV_CORE_PARALLEL_FOR_TBB_HPP + +#include "opencv2/core/parallel/parallel_backend.hpp" +#include + +#ifndef TBB_SUPPRESS_DEPRECATED_MESSAGES // supress warning +#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 +#endif +#include "tbb/tbb.h" +#if !defined(TBB_INTERFACE_VERSION) +#error "Unknows/unsupported TBB version" +#endif + +#if TBB_INTERFACE_VERSION >= 8000 +#include "tbb/task_arena.h" +#endif + +namespace cv { namespace parallel { namespace tbb { + +using namespace ::tbb; + +#if TBB_INTERFACE_VERSION >= 8000 +static tbb::task_arena& getArena() +{ + static tbb::task_arena tbbArena(tbb::task_arena::automatic); + return tbbArena; +} +#else +static tbb::task_scheduler_init& getScheduler() +{ + static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred); + return tbbScheduler; +} +#endif + +/** TBB parallel_for API implementation + * + * @sa setParallelForBackend + * @ingroup core_parallel_backend + */ +class ParallelForBackend : public ParallelForAPI +{ +protected: + int numThreads; + int numThreadsMax; +public: + ParallelForBackend() + { + CV_LOG_INFO(NULL, "Initializing TBB parallel backend: TBB_INTERFACE_VERSION=" << TBB_INTERFACE_VERSION); + numThreads = 0; +#if TBB_INTERFACE_VERSION >= 8000 + (void)getArena(); +#else + (void)getScheduler(); +#endif + } + + virtual ~ParallelForBackend() {} + + class CallbackProxy + { + const FN_parallel_for_body_cb_t& callback; + void* const callback_data; + const int tasks; + public: + inline CallbackProxy(int tasks_, FN_parallel_for_body_cb_t& callback_, void* callback_data_) + : callback(callback_), callback_data(callback_data_), tasks(tasks_) + { + // nothing + } + + void operator()(const tbb::blocked_range& range) const + { + this->callback(range.begin(), range.end(), callback_data); + } + + void operator()() const + { + tbb::parallel_for(tbb::blocked_range(0, tasks), *this); + } + }; + + virtual void parallel_for(int tasks, FN_parallel_for_body_cb_t body_callback, void* callback_data) CV_OVERRIDE + { + CallbackProxy task(tasks, body_callback, callback_data); +#if TBB_INTERFACE_VERSION >= 8000 + getArena().execute(task); +#else + task(); +#endif + } + + virtual int getThreadNum() const CV_OVERRIDE + { +#if TBB_INTERFACE_VERSION >= 9100 + return tbb::this_task_arena::current_thread_index(); +#elif TBB_INTERFACE_VERSION >= 8000 + return tbb::task_arena::current_thread_index(); +#else + return 0; +#endif + } + + virtual int getNumThreads() const CV_OVERRIDE + { +#if TBB_INTERFACE_VERSION >= 9100 + return getArena().max_concurrency(); +#elif TBB_INTERFACE_VERSION >= 8000 + return numThreads > 0 + ? numThreads + : tbb::task_scheduler_init::default_num_threads(); +#else + return getScheduler().is_active() + ? numThreads + : tbb::task_scheduler_init::default_num_threads(); +#endif + } + + virtual int setNumThreads(int nThreads) CV_OVERRIDE + { + int oldNumThreads = numThreads; + numThreads = nThreads; + +#if TBB_INTERFACE_VERSION >= 8000 + auto& tbbArena = getArena(); + if (tbbArena.is_active()) + tbbArena.terminate(); + if (numThreads > 0) + tbbArena.initialize(numThreads); +#else + auto& tbbScheduler = getScheduler(); + if (tbbScheduler.is_active()) + tbbScheduler.terminate(); + if (numThreads > 0) + tbbScheduler.initialize(numThreads); +#endif + return oldNumThreads; + } + + const char* getName() const CV_OVERRIDE + { + return "tbb"; + } +}; + +}}} // namespace + +#endif // OPENCV_CORE_PARALLEL_FOR_TBB_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/parallel_backend.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/parallel_backend.hpp new file mode 100644 index 0000000..c3e8333 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/parallel/parallel_backend.hpp @@ -0,0 +1,90 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_BACKEND_HPP +#define OPENCV_CORE_PARALLEL_BACKEND_HPP + +#include "opencv2/core/cvdef.h" +#include + +namespace cv { namespace parallel { +#ifndef CV_API_CALL +#define CV_API_CALL +#endif + +/** @addtogroup core_parallel_backend + * @{ + * API below is provided to resolve problem of CPU resource over-subscription by multiple thread pools from different multi-threading frameworks. + * This is common problem for cases when OpenCV compiled threading framework is different from the Users Applications framework. + * + * Applications can replace OpenCV `parallel_for()` backend with own implementation (to reuse Application's thread pool). + * + * + * ### Backend API usage examples + * + * #### Intel TBB + * + * - include header with simple implementation of TBB backend: + * @snippet parallel_backend/example-tbb.cpp tbb_include + * - execute backend replacement code: + * @snippet parallel_backend/example-tbb.cpp tbb_backend + * - configuration of compiler/linker options is responsibility of Application's scripts + * + * #### OpenMP + * + * - include header with simple implementation of OpenMP backend: + * @snippet parallel_backend/example-openmp.cpp openmp_include + * - execute backend replacement code: + * @snippet parallel_backend/example-openmp.cpp openmp_backend + * - Configuration of compiler/linker options is responsibility of Application's scripts + * + * + * ### Plugins support + * + * Runtime configuration options: + * - change backend priority: `OPENCV_PARALLEL_PRIORITY_=9999` + * - disable backend: `OPENCV_PARALLEL_PRIORITY_=0` + * - specify list of backends with high priority (>100000): `OPENCV_PARALLEL_PRIORITY_LIST=TBB,OPENMP`. Unknown backends are registered as new plugins. + * + */ + +/** Interface for parallel_for backends implementations + * + * @sa setParallelForBackend + */ +class CV_EXPORTS ParallelForAPI +{ +public: + virtual ~ParallelForAPI(); + + typedef void (CV_API_CALL *FN_parallel_for_body_cb_t)(int start, int end, void* data); + + virtual void parallel_for(int tasks, FN_parallel_for_body_cb_t body_callback, void* callback_data) = 0; + + virtual int getThreadNum() const = 0; + + virtual int getNumThreads() const = 0; + + virtual int setNumThreads(int nThreads) = 0; + + virtual const char* getName() const = 0; +}; + +/** @brief Replace OpenCV parallel_for backend + * + * Application can replace OpenCV `parallel_for()` backend with own implementation. + * + * @note This call is not thread-safe. Consider calling this function from the `main()` before any other OpenCV processing functions (and without any other created threads). + */ +CV_EXPORTS void setParallelForBackend(const std::shared_ptr& api, bool propagateNumThreads = true); + +/** @brief Change OpenCV parallel_for backend + * + * @note This call is not thread-safe. Consider calling this function from the `main()` before any other OpenCV processing functions (and without any other created threads). + */ +CV_EXPORTS_W bool setParallelForBackend(const std::string& backendName, bool propagateNumThreads = true); + +//! @} +}} // namespace +#endif // OPENCV_CORE_PARALLEL_BACKEND_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/persistence.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/persistence.hpp new file mode 100644 index 0000000..8e135d1 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/persistence.hpp @@ -0,0 +1,1350 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_PERSISTENCE_HPP +#define OPENCV_CORE_PERSISTENCE_HPP + +#ifndef CV_DOXYGEN +/// Define to support persistence legacy formats +#define CV__LEGACY_PERSISTENCE +#endif + +#ifndef __cplusplus +# error persistence.hpp header must be compiled as C++ +#endif + +//! @addtogroup core_c +//! @{ + +/** @brief "black box" representation of the file storage associated with a file on disk. + +Several functions that are described below take CvFileStorage\* as inputs and allow the user to +save or to load hierarchical collections that consist of scalar values, standard CXCore objects +(such as matrices, sequences, graphs), and user-defined objects. + +OpenCV can read and write data in XML (), YAML () or +JSON () formats. Below is an example of 3x3 floating-point identity matrix A, +stored in XML and YAML files +using CXCore functions: +XML: +@code{.xml} + + + + 3 + 3 +
f
+ 1. 0. 0. 0. 1. 0. 0. 0. 1. +
+
+@endcode +YAML: +@code{.yaml} + %YAML:1.0 + A: !!opencv-matrix + rows: 3 + cols: 3 + dt: f + data: [ 1., 0., 0., 0., 1., 0., 0., 0., 1.] +@endcode +As it can be seen from the examples, XML uses nested tags to represent hierarchy, while YAML uses +indentation for that purpose (similar to the Python programming language). + +The same functions can read and write data in both formats; the particular format is determined by +the extension of the opened file, ".xml" for XML files, ".yml" or ".yaml" for YAML and ".json" for +JSON. + */ + +//! @} core_c + +#include "opencv2/core/types.hpp" +#include "opencv2/core/mat.hpp" + +namespace cv { + +/** @addtogroup core_xml + +XML/YAML/JSON file storages. {#xml_storage} +======================= +Writing to a file storage. +-------------------------- +You can store and then restore various OpenCV data structures to/from XML (), +YAML () or JSON () formats. Also, it is possible to store +and load arbitrarily complex data structures, which include OpenCV data structures, as well as +primitive data types (integer and floating-point numbers and text strings) as their elements. + +Use the following procedure to write something to XML, YAML or JSON: +-# Create new FileStorage and open it for writing. It can be done with a single call to +FileStorage::FileStorage constructor that takes a filename, or you can use the default constructor +and then call FileStorage::open. Format of the file (XML, YAML or JSON) is determined from the filename +extension (".xml", ".yml"/".yaml" and ".json", respectively) +-# Write all the data you want using the streaming operator `<<`, just like in the case of STL +streams. +-# Close the file using FileStorage::release. FileStorage destructor also closes the file. + +Here is an example: +@code + #include "opencv2/core.hpp" + #include + + using namespace cv; + + int main(int, char** argv) + { + FileStorage fs("test.yml", FileStorage::WRITE); + + fs << "frameCount" << 5; + time_t rawtime; time(&rawtime); + fs << "calibrationDate" << asctime(localtime(&rawtime)); + Mat cameraMatrix = (Mat_(3,3) << 1000, 0, 320, 0, 1000, 240, 0, 0, 1); + Mat distCoeffs = (Mat_(5,1) << 0.1, 0.01, -0.001, 0, 0); + fs << "cameraMatrix" << cameraMatrix << "distCoeffs" << distCoeffs; + fs << "features" << "["; + for( int i = 0; i < 3; i++ ) + { + int x = rand() % 640; + int y = rand() % 480; + uchar lbp = rand() % 256; + + fs << "{:" << "x" << x << "y" << y << "lbp" << "[:"; + for( int j = 0; j < 8; j++ ) + fs << ((lbp >> j) & 1); + fs << "]" << "}"; + } + fs << "]"; + fs.release(); + return 0; + } +@endcode +The sample above stores to YML an integer, a text string (calibration date), 2 matrices, and a custom +structure "feature", which includes feature coordinates and LBP (local binary pattern) value. Here +is output of the sample: +@code{.yaml} +%YAML:1.0 +frameCount: 5 +calibrationDate: "Fri Jun 17 14:09:29 2011\n" +cameraMatrix: !!opencv-matrix + rows: 3 + cols: 3 + dt: d + data: [ 1000., 0., 320., 0., 1000., 240., 0., 0., 1. ] +distCoeffs: !!opencv-matrix + rows: 5 + cols: 1 + dt: d + data: [ 1.0000000000000001e-01, 1.0000000000000000e-02, + -1.0000000000000000e-03, 0., 0. ] +features: + - { x:167, y:49, lbp:[ 1, 0, 0, 1, 1, 0, 1, 1 ] } + - { x:298, y:130, lbp:[ 0, 0, 0, 1, 0, 0, 1, 1 ] } + - { x:344, y:158, lbp:[ 1, 1, 0, 0, 0, 0, 1, 0 ] } +@endcode + +As an exercise, you can replace ".yml" with ".xml" or ".json" in the sample above and see, how the +corresponding XML file will look like. + +Several things can be noted by looking at the sample code and the output: + +- The produced YAML (and XML/JSON) consists of heterogeneous collections that can be nested. There are + 2 types of collections: named collections (mappings) and unnamed collections (sequences). In mappings + each element has a name and is accessed by name. This is similar to structures and std::map in + C/C++ and dictionaries in Python. In sequences elements do not have names, they are accessed by + indices. This is similar to arrays and std::vector in C/C++ and lists, tuples in Python. + "Heterogeneous" means that elements of each single collection can have different types. + + Top-level collection in YAML/XML/JSON is a mapping. Each matrix is stored as a mapping, and the matrix + elements are stored as a sequence. Then, there is a sequence of features, where each feature is + represented a mapping, and lbp value in a nested sequence. + +- When you write to a mapping (a structure), you write element name followed by its value. When you + write to a sequence, you simply write the elements one by one. OpenCV data structures (such as + cv::Mat) are written in absolutely the same way as simple C data structures - using `<<` + operator. + +- To write a mapping, you first write the special string `{` to the storage, then write the + elements as pairs (`fs << << `) and then write the closing + `}`. + +- To write a sequence, you first write the special string `[`, then write the elements, then + write the closing `]`. + +- In YAML/JSON (but not XML), mappings and sequences can be written in a compact Python-like inline + form. In the sample above matrix elements, as well as each feature, including its lbp value, is + stored in such inline form. To store a mapping/sequence in a compact form, put `:` after the + opening character, e.g. use `{:` instead of `{` and `[:` instead of `[`. When the + data is written to XML, those extra `:` are ignored. + +Reading data from a file storage. +--------------------------------- +To read the previously written XML, YAML or JSON file, do the following: +-# Open the file storage using FileStorage::FileStorage constructor or FileStorage::open method. + In the current implementation the whole file is parsed and the whole representation of file + storage is built in memory as a hierarchy of file nodes (see FileNode) + +-# Read the data you are interested in. Use FileStorage::operator [], FileNode::operator [] + and/or FileNodeIterator. + +-# Close the storage using FileStorage::release. + +Here is how to read the file created by the code sample above: +@code + FileStorage fs2("test.yml", FileStorage::READ); + + // first method: use (type) operator on FileNode. + int frameCount = (int)fs2["frameCount"]; + + String date; + // second method: use FileNode::operator >> + fs2["calibrationDate"] >> date; + + Mat cameraMatrix2, distCoeffs2; + fs2["cameraMatrix"] >> cameraMatrix2; + fs2["distCoeffs"] >> distCoeffs2; + + cout << "frameCount: " << frameCount << endl + << "calibration date: " << date << endl + << "camera matrix: " << cameraMatrix2 << endl + << "distortion coeffs: " << distCoeffs2 << endl; + + FileNode features = fs2["features"]; + FileNodeIterator it = features.begin(), it_end = features.end(); + int idx = 0; + std::vector lbpval; + + // iterate through a sequence using FileNodeIterator + for( ; it != it_end; ++it, idx++ ) + { + cout << "feature #" << idx << ": "; + cout << "x=" << (int)(*it)["x"] << ", y=" << (int)(*it)["y"] << ", lbp: ("; + // you can also easily read numerical arrays using FileNode >> std::vector operator. + (*it)["lbp"] >> lbpval; + for( int i = 0; i < (int)lbpval.size(); i++ ) + cout << " " << (int)lbpval[i]; + cout << ")" << endl; + } + fs2.release(); +@endcode + +Format specification {#format_spec} +-------------------- +`([count]{u|c|w|s|i|f|d})`... where the characters correspond to fundamental C++ types: +- `u` 8-bit unsigned number +- `c` 8-bit signed number +- `w` 16-bit unsigned number +- `s` 16-bit signed number +- `i` 32-bit signed number +- `f` single precision floating-point number +- `d` double precision floating-point number +- `r` pointer, 32 lower bits of which are written as a signed integer. The type can be used to + store structures with links between the elements. + +`count` is the optional counter of values of a given type. For example, `2if` means that each array +element is a structure of 2 integers, followed by a single-precision floating-point number. The +equivalent notations of the above specification are `iif`, `2i1f` and so forth. Other examples: `u` +means that the array consists of bytes, and `2d` means the array consists of pairs of doubles. + +@see @ref samples/cpp/filestorage.cpp +*/ + +//! @{ + +/** @example samples/cpp/filestorage.cpp +A complete example using the FileStorage interface +*/ + +////////////////////////// XML & YAML I/O ////////////////////////// + +class CV_EXPORTS FileNode; +class CV_EXPORTS FileNodeIterator; + +/** @brief XML/YAML/JSON file storage class that encapsulates all the information necessary for writing or +reading data to/from a file. + */ +class CV_EXPORTS_W FileStorage +{ +public: + //! file storage mode + enum Mode + { + READ = 0, //!< value, open the file for reading + WRITE = 1, //!< value, open the file for writing + APPEND = 2, //!< value, open the file for appending + MEMORY = 4, /**< flag, read data from source or write data to the internal buffer (which is + returned by FileStorage::release) */ + FORMAT_MASK = (7<<3), //!< mask for format flags + FORMAT_AUTO = 0, //!< flag, auto format + FORMAT_XML = (1<<3), //!< flag, XML format + FORMAT_YAML = (2<<3), //!< flag, YAML format + FORMAT_JSON = (3<<3), //!< flag, JSON format + + BASE64 = 64, //!< flag, write rawdata in Base64 by default. (consider using WRITE_BASE64) + WRITE_BASE64 = BASE64 | WRITE, //!< flag, enable both WRITE and BASE64 + }; + enum State + { + UNDEFINED = 0, + VALUE_EXPECTED = 1, + NAME_EXPECTED = 2, + INSIDE_MAP = 4 + }; + + /** @brief The constructors. + + The full constructor opens the file. Alternatively you can use the default constructor and then + call FileStorage::open. + */ + CV_WRAP FileStorage(); + + /** @overload + @copydoc open() + */ + CV_WRAP FileStorage(const String& filename, int flags, const String& encoding=String()); + + //! the destructor. calls release() + virtual ~FileStorage(); + + /** @brief Opens a file. + + See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release + before opening the file. + @param filename Name of the file to open or the text string to read the data from. + Extension of the file (.xml, .yml/.yaml or .json) determines its format (XML, YAML or JSON + respectively). Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both + FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify + the output file format (e.g. mydata.xml, .yml etc.). A file name can also contain parameters. + You can use this format, "*?base64" (e.g. "file.json?base64" (case sensitive)), as an alternative to + FileStorage::BASE64 flag. + @param flags Mode of operation. One of FileStorage::Mode + @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and + you should use 8-bit encoding instead of it. + */ + CV_WRAP virtual bool open(const String& filename, int flags, const String& encoding=String()); + + /** @brief Checks whether the file is opened. + + @returns true if the object is associated with the current file and false otherwise. It is a + good practice to call this method after you tried to open a file. + */ + CV_WRAP virtual bool isOpened() const; + + /** @brief Closes the file and releases all the memory buffers. + + Call this method after all I/O operations with the storage are finished. + */ + CV_WRAP virtual void release(); + + /** @brief Closes the file and releases all the memory buffers. + + Call this method after all I/O operations with the storage are finished. If the storage was + opened for writing data and FileStorage::WRITE was specified + */ + CV_WRAP virtual String releaseAndGetString(); + + /** @brief Returns the first element of the top-level mapping. + @returns The first element of the top-level mapping. + */ + CV_WRAP FileNode getFirstTopLevelNode() const; + + /** @brief Returns the top-level mapping + @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file. + However, YAML supports multiple streams and so there can be several. + @returns The top-level mapping. + */ + CV_WRAP FileNode root(int streamidx=0) const; + + /** @brief Returns the specified element of the top-level mapping. + @param nodename Name of the file node. + @returns Node with the given name. + */ + FileNode operator[](const String& nodename) const; + + /** @overload */ + CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const; + + /** + * @brief Simplified writing API to use with bindings. + * @param name Name of the written object. When writing to sequences (a.k.a. "arrays"), pass an empty string. + * @param val Value of the written object. + */ + CV_WRAP void write(const String& name, int val); + /// @overload + CV_WRAP void write(const String& name, double val); + /// @overload + CV_WRAP void write(const String& name, const String& val); + /// @overload + CV_WRAP void write(const String& name, const Mat& val); + /// @overload + CV_WRAP void write(const String& name, const std::vector& val); + + /** @brief Writes multiple numbers. + + Writes one or more numbers of the specified format to the currently written structure. Usually it is + more convenient to use operator `<<` instead of this method. + @param fmt Specification of each array element, see @ref format_spec "format specification" + @param vec Pointer to the written array. + @param len Number of the uchar elements to write. + */ + void writeRaw( const String& fmt, const void* vec, size_t len ); + + /** @brief Writes a comment. + + The function writes a comment into file storage. The comments are skipped when the storage is read. + @param comment The written comment, single-line or multi-line + @param append If true, the function tries to put the comment at the end of current line. + Else if the comment is multi-line, or if it does not fit at the end of the current + line, the comment starts a new line. + */ + CV_WRAP void writeComment(const String& comment, bool append = false); + + /** @brief Starts to write a nested structure (sequence or a mapping). + @param name name of the structure. When writing to sequences (a.k.a. "arrays"), pass an empty string. + @param flags type of the structure (FileNode::MAP or FileNode::SEQ (both with optional FileNode::FLOW)). + @param typeName optional name of the type you store. The effect of setting this depends on the storage format. + I.e. if the format has a specification for storing type information, this parameter is used. + */ + CV_WRAP void startWriteStruct(const String& name, int flags, const String& typeName=String()); + + /** @brief Finishes writing nested structure (should pair startWriteStruct()) + */ + CV_WRAP void endWriteStruct(); + + /** @brief Returns the normalized object name for the specified name of a file. + @param filename Name of a file + @returns The normalized object name. + */ + static String getDefaultObjectName(const String& filename); + + /** @brief Returns the current format. + * @returns The current format, see FileStorage::Mode + */ + CV_WRAP int getFormat() const; + + int state; + std::string elname; + + class Impl; + Ptr p; +}; + +/** @brief File Storage Node class. + +The node is used to store each and every element of the file storage opened for reading. When +XML/YAML file is read, it is first parsed and stored in the memory as a hierarchical collection of +nodes. Each node can be a "leaf" that is contain a single number or a string, or be a collection of +other nodes. There can be named collections (mappings) where each element has a name and it is +accessed by a name, and ordered collections (sequences) where elements do not have names but rather +accessed by index. Type of the file node can be determined using FileNode::type method. + +Note that file nodes are only used for navigating file storages opened for reading. When a file +storage is opened for writing, no data is stored in memory after it is written. + */ +class CV_EXPORTS_W_SIMPLE FileNode +{ +public: + //! type of the file storage node + enum + { + NONE = 0, //!< empty node + INT = 1, //!< an integer + REAL = 2, //!< floating-point number + FLOAT = REAL, //!< synonym or REAL + STR = 3, //!< text string in UTF-8 encoding + STRING = STR, //!< synonym for STR + SEQ = 4, //!< sequence + MAP = 5, //!< mapping + TYPE_MASK = 7, + + FLOW = 8, //!< compact representation of a sequence or mapping. Used only by YAML writer + UNIFORM = 8, //!< if set, means that all the collection elements are numbers of the same type (real's or int's). + //!< UNIFORM is used only when reading FileStorage; FLOW is used only when writing. So they share the same bit + EMPTY = 16, //!< empty structure (sequence or mapping) + NAMED = 32 //!< the node has a name (i.e. it is element of a mapping). + }; + /** @brief The constructors. + + These constructors are used to create a default file node, construct it from obsolete structures or + from the another file node. + */ + CV_WRAP FileNode(); + + /** @overload + @param fs Pointer to the file storage structure. + @param blockIdx Index of the memory block where the file node is stored + @param ofs Offset in bytes from the beginning of the serialized storage + + @deprecated + */ + FileNode(const FileStorage* fs, size_t blockIdx, size_t ofs); + + /** @overload + @param node File node to be used as initialization for the created file node. + */ + FileNode(const FileNode& node); + + FileNode& operator=(const FileNode& node); + + /** @brief Returns element of a mapping node or a sequence node. + @param nodename Name of an element in the mapping node. + @returns Returns the element with the given identifier. + */ + FileNode operator[](const String& nodename) const; + + /** @overload + @param nodename Name of an element in the mapping node. + */ + CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const; + + /** @overload + @param i Index of an element in the sequence node. + */ + CV_WRAP_AS(at) FileNode operator[](int i) const; + + /** @brief Returns keys of a mapping node. + @returns Keys of a mapping node. + */ + CV_WRAP std::vector keys() const; + + /** @brief Returns type of the node. + @returns Type of the node. See FileNode::Type + */ + CV_WRAP int type() const; + + //! returns true if the node is empty + CV_WRAP bool empty() const; + //! returns true if the node is a "none" object + CV_WRAP bool isNone() const; + //! returns true if the node is a sequence + CV_WRAP bool isSeq() const; + //! returns true if the node is a mapping + CV_WRAP bool isMap() const; + //! returns true if the node is an integer + CV_WRAP bool isInt() const; + //! returns true if the node is a floating-point number + CV_WRAP bool isReal() const; + //! returns true if the node is a text string + CV_WRAP bool isString() const; + //! returns true if the node has a name + CV_WRAP bool isNamed() const; + //! returns the node name or an empty string if the node is nameless + CV_WRAP std::string name() const; + //! returns the number of elements in the node, if it is a sequence or mapping, or 1 otherwise. + CV_WRAP size_t size() const; + //! returns raw size of the FileNode in bytes + CV_WRAP size_t rawSize() const; + //! returns the node content as an integer. If the node stores floating-point number, it is rounded. + operator int() const; + //! returns the node content as float + operator float() const; + //! returns the node content as double + operator double() const; + //! returns the node content as text string + inline operator std::string() const { return this->string(); } + + static bool isMap(int flags); + static bool isSeq(int flags); + static bool isCollection(int flags); + static bool isEmptyCollection(int flags); + static bool isFlow(int flags); + + uchar* ptr(); + const uchar* ptr() const; + + //! returns iterator pointing to the first node element + FileNodeIterator begin() const; + //! returns iterator pointing to the element following the last node element + FileNodeIterator end() const; + + /** @brief Reads node elements to the buffer with the specified format. + + Usually it is more convenient to use operator `>>` instead of this method. + @param fmt Specification of each array element. See @ref format_spec "format specification" + @param vec Pointer to the destination array. + @param len Number of bytes to read (buffer size limit). If it is greater than number of + remaining elements then all of them will be read. + */ + void readRaw( const String& fmt, void* vec, size_t len ) const; + + /** Internal method used when reading FileStorage. + Sets the type (int, real or string) and value of the previously created node. + */ + void setValue( int type, const void* value, int len=-1 ); + + //! Simplified reading API to use with bindings. + CV_WRAP double real() const; + //! Simplified reading API to use with bindings. + CV_WRAP std::string string() const; + //! Simplified reading API to use with bindings. + CV_WRAP Mat mat() const; + + //protected: + FileNode(FileStorage::Impl* fs, size_t blockIdx, size_t ofs); + + FileStorage::Impl* fs; + size_t blockIdx; + size_t ofs; +}; + + +/** @brief used to iterate through sequences and mappings. + + A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a + sequence, stored in node. See the data reading sample in the beginning of the section. + */ +class CV_EXPORTS FileNodeIterator +{ +public: + /** @brief The constructors. + + These constructors are used to create a default iterator, set it to specific element in a file node + or construct it from another iterator. + */ + FileNodeIterator(); + + /** @overload + @param node File node - the collection to iterate over; + it can be a scalar (equivalent to 1-element collection) or "none" (equivalent to empty collection). + @param seekEnd - true if iterator needs to be set after the last element of the node; + that is: + * node.begin() => FileNodeIterator(node, false) + * node.end() => FileNodeIterator(node, true) + */ + FileNodeIterator(const FileNode& node, bool seekEnd); + + /** @overload + @param it Iterator to be used as initialization for the created iterator. + */ + FileNodeIterator(const FileNodeIterator& it); + + FileNodeIterator& operator=(const FileNodeIterator& it); + + //! returns the currently observed element + FileNode operator *() const; + + //! moves iterator to the next node + FileNodeIterator& operator ++ (); + //! moves iterator to the next node + FileNodeIterator operator ++ (int); + //! moves iterator forward by the specified offset (possibly negative) + FileNodeIterator& operator += (int ofs); + + /** @brief Reads node elements to the buffer with the specified format. + + Usually it is more convenient to use operator `>>` instead of this method. + @param fmt Specification of each array element. See @ref format_spec "format specification" + @param vec Pointer to the destination array. + @param len Number of bytes to read (buffer size limit). If it is greater than number of + remaining elements then all of them will be read. + */ + FileNodeIterator& readRaw( const String& fmt, void* vec, + size_t len=(size_t)INT_MAX ); + + //! returns the number of remaining (not read yet) elements + size_t remaining() const; + + bool equalTo(const FileNodeIterator& it) const; + +protected: + FileStorage::Impl* fs; + size_t blockIdx; + size_t ofs; + size_t blockSize; + size_t nodeNElems; + size_t idx; +}; + +//! @} core_xml + +/////////////////// XML & YAML I/O implementation ////////////////// + +//! @relates cv::FileStorage +//! @{ + +CV_EXPORTS void write( FileStorage& fs, const String& name, int value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, float value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, double value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, const String& value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, const Mat& value ); +CV_EXPORTS void write( FileStorage& fs, const String& name, const SparseMat& value ); +#ifdef CV__LEGACY_PERSISTENCE +CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector& value); +CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector& value); +#endif + +CV_EXPORTS void writeScalar( FileStorage& fs, int value ); +CV_EXPORTS void writeScalar( FileStorage& fs, float value ); +CV_EXPORTS void writeScalar( FileStorage& fs, double value ); +CV_EXPORTS void writeScalar( FileStorage& fs, const String& value ); + +//! @} + +//! @relates cv::FileNode +//! @{ + +CV_EXPORTS void read(const FileNode& node, int& value, int default_value); +CV_EXPORTS void read(const FileNode& node, float& value, float default_value); +CV_EXPORTS void read(const FileNode& node, double& value, double default_value); +CV_EXPORTS void read(const FileNode& node, std::string& value, const std::string& default_value); +CV_EXPORTS void read(const FileNode& node, Mat& mat, const Mat& default_mat = Mat() ); +CV_EXPORTS void read(const FileNode& node, SparseMat& mat, const SparseMat& default_mat = SparseMat() ); +#ifdef CV__LEGACY_PERSISTENCE +CV_EXPORTS void read(const FileNode& node, std::vector& keypoints); +CV_EXPORTS void read(const FileNode& node, std::vector& matches); +#endif +CV_EXPORTS void read(const FileNode& node, KeyPoint& value, const KeyPoint& default_value); +CV_EXPORTS void read(const FileNode& node, DMatch& value, const DMatch& default_value); + +template static inline void read(const FileNode& node, Point_<_Tp>& value, const Point_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 2 ? default_value : Point_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1])); +} + +template static inline void read(const FileNode& node, Point3_<_Tp>& value, const Point3_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 3 ? default_value : Point3_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]), + saturate_cast<_Tp>(temp[2])); +} + +template static inline void read(const FileNode& node, Size_<_Tp>& value, const Size_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 2 ? default_value : Size_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1])); +} + +template static inline void read(const FileNode& node, Complex<_Tp>& value, const Complex<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 2 ? default_value : Complex<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1])); +} + +template static inline void read(const FileNode& node, Rect_<_Tp>& value, const Rect_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 4 ? default_value : Rect_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]), + saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3])); +} + +template static inline void read(const FileNode& node, Vec<_Tp, cn>& value, const Vec<_Tp, cn>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != cn ? default_value : Vec<_Tp, cn>(&temp[0]); +} + +template static inline void read(const FileNode& node, Matx<_Tp, m, n>& value, const Matx<_Tp, m, n>& default_matx = Matx<_Tp, m, n>()) +{ + Mat temp; + read(node, temp); // read as a Mat class + + if (temp.empty()) + value = default_matx; + else + value = Matx<_Tp, m, n>(temp); +} + +template static inline void read(const FileNode& node, Scalar_<_Tp>& value, const Scalar_<_Tp>& default_value) +{ + std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp; + value = temp.size() != 4 ? default_value : Scalar_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]), + saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3])); +} + +static inline void read(const FileNode& node, Range& value, const Range& default_value) +{ + Point2i temp(value.start, value.end); const Point2i default_temp = Point2i(default_value.start, default_value.end); + read(node, temp, default_temp); + value.start = temp.x; value.end = temp.y; +} + +//! @} + +/** @brief Writes string to a file storage. +@relates cv::FileStorage + */ +CV_EXPORTS FileStorage& operator << (FileStorage& fs, const String& str); + +//! @cond IGNORED + +namespace internal +{ + class CV_EXPORTS WriteStructContext + { + public: + WriteStructContext(FileStorage& _fs, const String& name, int flags, const String& typeName = String()); + ~WriteStructContext(); + private: + FileStorage* fs; + }; + + template class VecWriterProxy + { + public: + VecWriterProxy( FileStorage* _fs ) : fs(_fs) {} + void operator()(const std::vector<_Tp>& vec) const + { + size_t count = vec.size(); + for (size_t i = 0; i < count; i++) + write(*fs, vec[i]); + } + private: + FileStorage* fs; + }; + + template class VecWriterProxy<_Tp, 1> + { + public: + VecWriterProxy( FileStorage* _fs ) : fs(_fs) {} + void operator()(const std::vector<_Tp>& vec) const + { + int _fmt = traits::SafeFmt<_Tp>::fmt; + char fmt[] = { (char)((_fmt >> 8) + '1'), (char)_fmt, '\0' }; + fs->writeRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, vec.size() * sizeof(_Tp)); + } + private: + FileStorage* fs; + }; + + template class VecReaderProxy + { + public: + VecReaderProxy( FileNodeIterator* _it ) : it(_it) {} + void operator()(std::vector<_Tp>& vec, size_t count) const + { + count = std::min(count, it->remaining()); + vec.resize(count); + for (size_t i = 0; i < count; i++, ++(*it)) + read(**it, vec[i], _Tp()); + } + private: + FileNodeIterator* it; + }; + + template class VecReaderProxy<_Tp, 1> + { + public: + VecReaderProxy( FileNodeIterator* _it ) : it(_it) {} + void operator()(std::vector<_Tp>& vec, size_t count) const + { + size_t remaining = it->remaining(); + size_t cn = DataType<_Tp>::channels; + int _fmt = traits::SafeFmt<_Tp>::fmt; + CV_Assert((_fmt >> 8) < 9); + char fmt[] = { (char)((_fmt >> 8)+'1'), (char)_fmt, '\0' }; + CV_Assert((remaining % cn) == 0); + size_t remaining1 = remaining / cn; + count = count > remaining1 ? remaining1 : count; + vec.resize(count); + it->readRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, count*sizeof(_Tp)); + } + private: + FileNodeIterator* it; + }; + +} // internal + +//! @endcond + +//! @relates cv::FileStorage +//! @{ + +template static inline +void write(FileStorage& fs, const _Tp& value) +{ + write(fs, String(), value); +} + +template<> inline +void write( FileStorage& fs, const int& value ) +{ + writeScalar(fs, value); +} + +template<> inline +void write( FileStorage& fs, const float& value ) +{ + writeScalar(fs, value); +} + +template<> inline +void write( FileStorage& fs, const double& value ) +{ + writeScalar(fs, value); +} + +template<> inline +void write( FileStorage& fs, const String& value ) +{ + writeScalar(fs, value); +} + +template static inline +void write(FileStorage& fs, const Point_<_Tp>& pt ) +{ + write(fs, pt.x); + write(fs, pt.y); +} + +template static inline +void write(FileStorage& fs, const Point3_<_Tp>& pt ) +{ + write(fs, pt.x); + write(fs, pt.y); + write(fs, pt.z); +} + +template static inline +void write(FileStorage& fs, const Size_<_Tp>& sz ) +{ + write(fs, sz.width); + write(fs, sz.height); +} + +template static inline +void write(FileStorage& fs, const Complex<_Tp>& c ) +{ + write(fs, c.re); + write(fs, c.im); +} + +template static inline +void write(FileStorage& fs, const Rect_<_Tp>& r ) +{ + write(fs, r.x); + write(fs, r.y); + write(fs, r.width); + write(fs, r.height); +} + +template static inline +void write(FileStorage& fs, const Vec<_Tp, cn>& v ) +{ + for(int i = 0; i < cn; i++) + write(fs, v.val[i]); +} + +template static inline +void write(FileStorage& fs, const Matx<_Tp, m, n>& x ) +{ + write(fs, Mat(x)); // write as a Mat class +} + +template static inline +void write(FileStorage& fs, const Scalar_<_Tp>& s ) +{ + write(fs, s.val[0]); + write(fs, s.val[1]); + write(fs, s.val[2]); + write(fs, s.val[3]); +} + +static inline +void write(FileStorage& fs, const Range& r ) +{ + write(fs, r.start); + write(fs, r.end); +} + +template static inline +void write( FileStorage& fs, const std::vector<_Tp>& vec ) +{ + cv::internal::VecWriterProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> w(&fs); + w(vec); +} + +template static inline +void write(FileStorage& fs, const String& name, const Point_<_Tp>& pt ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, pt); +} + +template static inline +void write(FileStorage& fs, const String& name, const Point3_<_Tp>& pt ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, pt); +} + +template static inline +void write(FileStorage& fs, const String& name, const Size_<_Tp>& sz ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, sz); +} + +template static inline +void write(FileStorage& fs, const String& name, const Complex<_Tp>& c ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, c); +} + +template static inline +void write(FileStorage& fs, const String& name, const Rect_<_Tp>& r ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, r); +} + +template static inline +void write(FileStorage& fs, const String& name, const Vec<_Tp, cn>& v ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, v); +} + +template static inline +void write(FileStorage& fs, const String& name, const Matx<_Tp, m, n>& x ) +{ + write(fs, name, Mat(x)); // write as a Mat class +} + +template static inline +void write(FileStorage& fs, const String& name, const Scalar_<_Tp>& s ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, s); +} + +static inline +void write(FileStorage& fs, const String& name, const Range& r ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, r); +} + +static inline +void write(FileStorage& fs, const String& name, const KeyPoint& kpt) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, kpt.pt.x); + write(fs, kpt.pt.y); + write(fs, kpt.size); + write(fs, kpt.angle); + write(fs, kpt.response); + write(fs, kpt.octave); + write(fs, kpt.class_id); +} + +static inline +void write(FileStorage& fs, const String& name, const DMatch& m) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW); + write(fs, m.queryIdx); + write(fs, m.trainIdx); + write(fs, m.imgIdx); + write(fs, m.distance); +} + +template::value >::type* = nullptr> +static inline void write( FileStorage& fs, const String& name, const _Tp& val ) +{ + write(fs, name, static_cast(val)); +} + +template static inline +void write( FileStorage& fs, const String& name, const std::vector<_Tp>& vec ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0)); + write(fs, vec); +} + +template static inline +void write( FileStorage& fs, const String& name, const std::vector< std::vector<_Tp> >& vec ) +{ + cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ); + for(size_t i = 0; i < vec.size(); i++) + { + cv::internal::WriteStructContext ws_(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0)); + write(fs, vec[i]); + } +} + +#ifdef CV__LEGACY_PERSISTENCE +// This code is not needed anymore, but it is preserved here to keep source compatibility +// Implementation is similar to templates instantiations +static inline void write(FileStorage& fs, const KeyPoint& kpt) { write(fs, String(), kpt); } +static inline void write(FileStorage& fs, const DMatch& m) { write(fs, String(), m); } +static inline void write(FileStorage& fs, const std::vector& vec) +{ + cv::internal::VecWriterProxy w(&fs); + w(vec); +} +static inline void write(FileStorage& fs, const std::vector& vec) +{ + cv::internal::VecWriterProxy w(&fs); + w(vec); + +} +#endif + +//! @} FileStorage + +//! @relates cv::FileNode +//! @{ + +static inline +void read(const FileNode& node, bool& value, bool default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = temp != 0; +} + +static inline +void read(const FileNode& node, uchar& value, uchar default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +static inline +void read(const FileNode& node, schar& value, schar default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +static inline +void read(const FileNode& node, ushort& value, ushort default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +static inline +void read(const FileNode& node, short& value, short default_value) +{ + int temp; + read(node, temp, (int)default_value); + value = saturate_cast(temp); +} + +template static inline +void read( FileNodeIterator& it, std::vector<_Tp>& vec, size_t maxCount = (size_t)INT_MAX ) +{ + cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it); + r(vec, maxCount); +} + +template::value >::type* = nullptr> +static inline void read(const FileNode& node, _Tp& value, const _Tp& default_value = static_cast<_Tp>(0)) +{ + int temp; + read(node, temp, static_cast(default_value)); + value = static_cast<_Tp>(temp); +} + +template static inline +void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>& default_value = std::vector<_Tp>() ) +{ + if(node.empty()) + vec = default_value; + else + { + FileNodeIterator it = node.begin(); + read( it, vec ); + } +} + +static inline +void read( const FileNode& node, std::vector& vec, const std::vector& default_value ) +{ + if(node.empty()) + vec = default_value; + else + read(node, vec); +} + +static inline +void read( const FileNode& node, std::vector& vec, const std::vector& default_value ) +{ + if(node.empty()) + vec = default_value; + else + read(node, vec); +} + +//! @} FileNode + +//! @relates cv::FileStorage +//! @{ + +/** @brief Writes data to a file storage. + */ +template static inline +FileStorage& operator << (FileStorage& fs, const _Tp& value) +{ + if( !fs.isOpened() ) + return fs; + if( fs.state == FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP ) + CV_Error( Error::StsError, "No element name has been given" ); + write( fs, fs.elname, value ); + if( fs.state & FileStorage::INSIDE_MAP ) + fs.state = FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP; + return fs; +} + +/** @brief Writes data to a file storage. + */ +static inline +FileStorage& operator << (FileStorage& fs, const char* str) +{ + return (fs << String(str)); +} + +/** @brief Writes data to a file storage. + */ +static inline +FileStorage& operator << (FileStorage& fs, char* value) +{ + return (fs << String(value)); +} + +//! @} FileStorage + +//! @relates cv::FileNodeIterator +//! @{ + +/** @brief Reads data from a file storage. + */ +template static inline +FileNodeIterator& operator >> (FileNodeIterator& it, _Tp& value) +{ + read( *it, value, _Tp()); + return ++it; +} + +/** @brief Reads data from a file storage. + */ +template static inline +FileNodeIterator& operator >> (FileNodeIterator& it, std::vector<_Tp>& vec) +{ + cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it); + r(vec, (size_t)INT_MAX); + return it; +} + +//! @} FileNodeIterator + +//! @relates cv::FileNode +//! @{ + +/** @brief Reads data from a file storage. + */ +template static inline +void operator >> (const FileNode& n, _Tp& value) +{ + read( n, value, _Tp()); +} + +/** @brief Reads data from a file storage. + */ +template static inline +void operator >> (const FileNode& n, std::vector<_Tp>& vec) +{ + FileNodeIterator it = n.begin(); + it >> vec; +} + +/** @brief Reads KeyPoint from a file storage. +*/ +//It needs special handling because it contains two types of fields, int & float. +static inline +void operator >> (const FileNode& n, KeyPoint& kpt) +{ + FileNodeIterator it = n.begin(); + it >> kpt.pt.x >> kpt.pt.y >> kpt.size >> kpt.angle >> kpt.response >> kpt.octave >> kpt.class_id; +} + +#ifdef CV__LEGACY_PERSISTENCE +static inline +void operator >> (const FileNode& n, std::vector& vec) +{ + read(n, vec); +} +static inline +void operator >> (const FileNode& n, std::vector& vec) +{ + read(n, vec); +} +#endif + +/** @brief Reads DMatch from a file storage. +*/ +//It needs special handling because it contains two types of fields, int & float. +static inline +void operator >> (const FileNode& n, DMatch& m) +{ + FileNodeIterator it = n.begin(); + it >> m.queryIdx >> m.trainIdx >> m.imgIdx >> m.distance; +} + +//! @} FileNode + +//! @relates cv::FileNodeIterator +//! @{ + +CV_EXPORTS bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2); +CV_EXPORTS bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2); + +static inline +ptrdiff_t operator - (const FileNodeIterator& it1, const FileNodeIterator& it2) +{ + return it2.remaining() - it1.remaining(); +} + +static inline +bool operator < (const FileNodeIterator& it1, const FileNodeIterator& it2) +{ + return it1.remaining() > it2.remaining(); +} + +//! @} FileNodeIterator + +} // cv + +#endif // OPENCV_CORE_PERSISTENCE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/quaternion.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/quaternion.hpp new file mode 100644 index 0000000..ae35fd1 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/quaternion.hpp @@ -0,0 +1,1696 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang +#ifndef OPENCV_CORE_QUATERNION_HPP +#define OPENCV_CORE_QUATERNION_HPP + +#include +#include +#include +namespace cv +{ +//! @addtogroup core +//! @{ + +//! Unit quaternion flag +enum QuatAssumeType +{ + /** + * This flag is specified by default. + * If this flag is specified, the input quaternions are assumed to be not unit quaternions. + * It can guarantee the correctness of the calculations, + * although the calculation speed will be slower than the flag QUAT_ASSUME_UNIT. + */ + QUAT_ASSUME_NOT_UNIT, + /** + * If this flag is specified, the input quaternions are assumed to be unit quaternions which + * will save some computations. However, if this flag is specified without unit quaternion, + * the program correctness of the result will not be guaranteed. + */ + QUAT_ASSUME_UNIT +}; + +class QuatEnum +{ +public: + /** @brief Enum of Euler angles type. + * + * Without considering the possibility of using two different convertions for the definition of the rotation axes , + * there exists twelve possible sequences of rotation axes, divided into two groups: + * - Proper Euler angles (Z-X-Z, X-Y-X, Y-Z-Y, Z-Y-Z, X-Z-X, Y-X-Y) + * - Tait–Bryan angles (X-Y-Z, Y-Z-X, Z-X-Y, X-Z-Y, Z-Y-X, Y-X-Z). + * + * The three elemental rotations may be [extrinsic](https://en.wikipedia.org/wiki/Euler_angles#Definition_by_extrinsic_rotations) + * (rotations about the axes *xyz* of the original coordinate system, which is assumed to remain motionless), + * or [intrinsic](https://en.wikipedia.org/wiki/Euler_angles#Definition_by_intrinsic_rotations)(rotations about the axes of the rotating coordinate system *XYZ*, solidary with the moving body, which changes its orientation after each elemental rotation). + * + * + * Extrinsic and intrinsic rotations are relevant. + * + * The definition of the Euler angles is as following, + * - \f$\theta_1 \f$ represents the first rotation angle, + * - \f$\theta_2 \f$ represents the second rotation angle, + * - \f$\theta_3 \f$ represents the third rotation angle. + * + * For intrinsic rotations in the order of X-Y-Z, the rotation matrix R can be calculated by:\f[R =X(\theta_1) Y(\theta_2) Z(\theta_3) \f] + * For extrinsic rotations in the order of X-Y-Z, the rotation matrix R can be calculated by:\f[R =Z({\theta_3}) Y({\theta_2}) X({\theta_1})\f] + * where + * \f[X({\theta})={\begin{bmatrix}1&0&0\\0&\cos {\theta_1} &-\sin {\theta_1} \\0&\sin {\theta_1} &\cos {\theta_1} \\\end{bmatrix}}, + * Y({\theta})={\begin{bmatrix}\cos \theta_{2}&0&\sin \theta_{2}\\0&1 &0 \\\ -sin \theta_2& 0&\cos \theta_{2} \\\end{bmatrix}}, + * Z({\theta})={\begin{bmatrix}\cos\theta_{3} &-\sin \theta_3&0\\\sin \theta_3 &\cos \theta_3 &0\\0&0&1\\\end{bmatrix}}. + * \f] + * + * The function is designed according to this set of conventions: + * - [Right handed](https://en.wikipedia.org/wiki/Right_hand_rule) reference frames are adopted, and the [right hand rule](https://en.wikipedia.org/wiki/Right_hand_rule) is used to determine the sign of angles. + * - Each matrix is meant to represent an [active rotation](https://en.wikipedia.org/wiki/Active_and_passive_transformation) (the composing and composed matrices + * are supposed to act on the coordinates of vectors defined in the initial fixed reference frame and give as a result the coordinates of a rotated vector defined in the same reference frame). + * - For \f$\theta_1\f$ and \f$\theta_3\f$, the valid range is (−π, π]. + * + * For \f$\theta_2\f$, the valid range is [−π/2, π/2] or [0, π]. + * + * For Tait–Bryan angles, the valid range of \f$\theta_2\f$ is [−π/2, π/2]. When transforming a quaternion to Euler angles, the solution of Euler angles is unique in condition of \f$ \theta_2 \in (−π/2, π/2)\f$ . + * If \f$\theta_2 = −π/2 \f$ or \f$ \theta_2 = π/2\f$, there are infinite solutions. The common name for this situation is gimbal lock_. + * For Proper Euler angles,the valid range of \f$\theta_2\f$ is in [0, π]. The solutions of Euler angles are unique in condition of \f$ \theta_2 \in (0, π)\f$ . If \f$\theta_2 =0 \f$ or \f$\theta_2 =π \f$, + * there are infinite solutions and gimbal lock_ will occur. + */ + enum EulerAnglesType + { + INT_XYZ, ///< Intrinsic rotations with the Euler angles type X-Y-Z + INT_XZY, ///< Intrinsic rotations with the Euler angles type X-Z-Y + INT_YXZ, ///< Intrinsic rotations with the Euler angles type Y-X-Z + INT_YZX, ///< Intrinsic rotations with the Euler angles type Y-Z-X + INT_ZXY, ///< Intrinsic rotations with the Euler angles type Z-X-Y + INT_ZYX, ///< Intrinsic rotations with the Euler angles type Z-Y-X + INT_XYX, ///< Intrinsic rotations with the Euler angles type X-Y-X + INT_XZX, ///< Intrinsic rotations with the Euler angles type X-Z-X + INT_YXY, ///< Intrinsic rotations with the Euler angles type Y-X-Y + INT_YZY, ///< Intrinsic rotations with the Euler angles type Y-Z-Y + INT_ZXZ, ///< Intrinsic rotations with the Euler angles type Z-X-Z + INT_ZYZ, ///< Intrinsic rotations with the Euler angles type Z-Y-Z + + EXT_XYZ, ///< Extrinsic rotations with the Euler angles type X-Y-Z + EXT_XZY, ///< Extrinsic rotations with the Euler angles type X-Z-Y + EXT_YXZ, ///< Extrinsic rotations with the Euler angles type Y-X-Z + EXT_YZX, ///< Extrinsic rotations with the Euler angles type Y-Z-X + EXT_ZXY, ///< Extrinsic rotations with the Euler angles type Z-X-Y + EXT_ZYX, ///< Extrinsic rotations with the Euler angles type Z-Y-X + EXT_XYX, ///< Extrinsic rotations with the Euler angles type X-Y-X + EXT_XZX, ///< Extrinsic rotations with the Euler angles type X-Z-X + EXT_YXY, ///< Extrinsic rotations with the Euler angles type Y-X-Y + EXT_YZY, ///< Extrinsic rotations with the Euler angles type Y-Z-Y + EXT_ZXZ, ///< Extrinsic rotations with the Euler angles type Z-X-Z + EXT_ZYZ, ///< Extrinsic rotations with the Euler angles type Z-Y-Z + #ifndef CV_DOXYGEN + EULER_ANGLES_MAX_VALUE + #endif + }; + +}; + +template class Quat; +template std::ostream& operator<<(std::ostream&, const Quat<_Tp>&); + +/** + * Quaternion is a number system that extends the complex numbers. It can be expressed as a + * rotation in three-dimensional space. + * A quaternion is generally represented in the form: + * \f[q = w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\f] + * \f[q = [w, x, y, z]\f] + * \f[q = [w, \boldsymbol{v}] \f] + * \f[q = ||q||[\cos\psi, u_x\sin\psi,u_y\sin\psi, u_z\sin\psi].\f] + * \f[q = ||q||[\cos\psi, \boldsymbol{u}\sin\psi]\f] + * where \f$\psi = \frac{\theta}{2}\f$, \f$\theta\f$ represents rotation angle, + * \f$\boldsymbol{u} = [u_x, u_y, u_z]\f$ represents normalized rotation axis, + * and \f$||q||\f$ represents the norm of \f$q\f$. + * + * A unit quaternion is usually represents rotation, which has the form: + * \f[q = [\cos\psi, u_x\sin\psi,u_y\sin\psi, u_z\sin\psi].\f] + * + * To create a quaternion representing the rotation around the axis \f$\boldsymbol{u}\f$ + * with angle \f$\theta\f$, you can use + * ``` + * using namespace cv; + * double angle = CV_PI; + * Vec3d axis = {0, 0, 1}; + * Quatd q = Quatd::createFromAngleAxis(angle, axis); + * ``` + * + * You can simply use four same type number to create a quaternion + * ``` + * Quatd q(1, 2, 3, 4); + * ``` + * Or use a Vec4d or Vec4f vector. + * ``` + * Vec4d vec{1, 2, 3, 4}; + * Quatd q(vec); + * ``` + * + * ``` + * Vec4f vec{1, 2, 3, 4}; + * Quatf q(vec); + * ``` + * + * If you already have a 3x3 rotation matrix R, then you can use + * ``` + * Quatd q = Quatd::createFromRotMat(R); + * ``` + * + * If you already have a rotation vector rvec which has the form of `angle * axis`, then you can use + * ``` + * Quatd q = Quatd::createFromRvec(rvec); + * ``` + * + * To extract the rotation matrix from quaternion, see toRotMat3x3() + * + * To extract the Vec4d or Vec4f, see toVec() + * + * To extract the rotation vector, see toRotVec() + * + * If there are two quaternions \f$q_0, q_1\f$ are needed to interpolate, you can use nlerp(), slerp() or spline() + * ``` + * Quatd::nlerp(q0, q1, t) + * + * Quatd::slerp(q0, q1, t) + * + * Quatd::spline(q0, q0, q1, q1, t) + * ``` + * spline can smoothly connect rotations of multiple quaternions + * + * Three ways to get an element in Quaternion + * ``` + * Quatf q(1,2,3,4); + * std::cout << q.w << std::endl; // w=1, x=2, y=3, z=4 + * std::cout << q[0] << std::endl; // q[0]=1, q[1]=2, q[2]=3, q[3]=4 + * std::cout << q.at(0) << std::endl; + * ``` + */ +template +class Quat +{ + static_assert(std::is_floating_point<_Tp>::value, "Quaternion only make sense with type of float or double"); + using value_type = _Tp; +public: + static constexpr _Tp CV_QUAT_EPS = (_Tp)1.e-6; + static constexpr _Tp CV_QUAT_CONVERT_THRESHOLD = (_Tp)1.e-6; + + Quat(); + + /** + * @brief From Vec4d or Vec4f. + */ + explicit Quat(const Vec<_Tp, 4> &coeff); + + /** + * @brief from four numbers. + */ + Quat(_Tp w, _Tp x, _Tp y, _Tp z); + + /** + * @brief from an angle, axis. Axis will be normalized in this function. And + * it generates + * \f[q = [\cos\psi, u_x\sin\psi,u_y\sin\psi, u_z\sin\psi].\f] + * where \f$\psi = \frac{\theta}{2}\f$, \f$\theta\f$ is the rotation angle. + */ + static Quat<_Tp> createFromAngleAxis(const _Tp angle, const Vec<_Tp, 3> &axis); + + /** + * @brief from a 3x3 rotation matrix. + */ + static Quat<_Tp> createFromRotMat(InputArray R); + + /** + * @brief from a rotation vector + * \f$r\f$ has the form \f$\theta \cdot \boldsymbol{u}\f$, where \f$\theta\f$ + * represents rotation angle and \f$\boldsymbol{u}\f$ represents normalized rotation axis. + * + * Angle and axis could be easily derived as: + * \f[ + * \begin{equation} + * \begin{split} + * \psi &= ||r||\\ + * \boldsymbol{u} &= \frac{r}{\theta} + * \end{split} + * \end{equation} + * \f] + * Then a quaternion can be calculated by + * \f[q = [\cos\psi, \boldsymbol{u}\sin\psi]\f] + * where \f$\psi = \theta / 2 \f$ + */ + static Quat<_Tp> createFromRvec(InputArray rvec); + + /** + * @brief + * from Euler angles + * + * A quaternion can be generated from Euler angles by combining the quaternion representations of the Euler rotations. + * + * For example, if we use intrinsic rotations in the order of X-Y-Z,\f$\theta_1 \f$ is rotation around the X-axis, \f$\theta_2 \f$ is rotation around the Y-axis, + * \f$\theta_3 \f$ is rotation around the Z-axis. The final quaternion q can be calculated by + * + * \f[ {q} = q_{X, \theta_1} q_{Y, \theta_2} q_{Z, \theta_3}\f] + * where \f$ q_{X, \theta_1} \f$ is created from @ref createFromXRot, \f$ q_{Y, \theta_2} \f$ is created from @ref createFromYRot, + * \f$ q_{Z, \theta_3} \f$ is created from @ref createFromZRot. + * @param angles the Euler angles in a vector of length 3 + * @param eulerAnglesType the convertion Euler angles type + */ + static Quat<_Tp> createFromEulerAngles(const Vec<_Tp, 3> &angles, QuatEnum::EulerAnglesType eulerAnglesType); + + /** + * @brief get a quaternion from a rotation about the Y-axis by \f$\theta\f$ . + * \f[q = \cos(\theta/2)+0 i+ sin(\theta/2) j +0k \f] + */ + static Quat<_Tp> createFromYRot(const _Tp theta); + + /** + * @brief get a quaternion from a rotation about the X-axis by \f$\theta\f$ . + * \f[q = \cos(\theta/2)+sin(\theta/2) i +0 j +0 k \f] + */ + static Quat<_Tp> createFromXRot(const _Tp theta); + + /** + * @brief get a quaternion from a rotation about the Z-axis by \f$\theta\f$. + * \f[q = \cos(\theta/2)+0 i +0 j +sin(\theta/2) k \f] + */ + static Quat<_Tp> createFromZRot(const _Tp theta); + + /** + * @brief a way to get element. + * @param index over a range [0, 3]. + * + * A quaternion q + * + * q.at(0) is equivalent to q.w, + * + * q.at(1) is equivalent to q.x, + * + * q.at(2) is equivalent to q.y, + * + * q.at(3) is equivalent to q.z. + */ + _Tp at(size_t index) const; + + /** + * @brief return the conjugate of this quaternion. + * \f[q.conjugate() = (w, -x, -y, -z).\f] + */ + Quat<_Tp> conjugate() const; + + /** + * + * @brief return the value of exponential value. + * \f[\exp(q) = e^w (\cos||\boldsymbol{v}||+ \frac{v}{||\boldsymbol{v}||})\sin||\boldsymbol{v}||\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example: + * ``` + * Quatd q{1,2,3,4}; + * cout << exp(q) << endl; + * ``` + */ + template + friend Quat exp(const Quat &q); + + /** + * @brief return the value of exponential value. + * \f[\exp(q) = e^w (\cos||\boldsymbol{v}||+ \frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q{1,2,3,4}; + * cout << q.exp() << endl; + * ``` + */ + Quat<_Tp> exp() const; + + /** + * @brief return the value of logarithm function. + * \f[\ln(q) = \ln||q|| + \frac{\boldsymbol{v}}{||\boldsymbol{v}||}\arccos\frac{w}{||q||}.\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q1{1,2,3,4}; + * cout << log(q1) << endl; + * ``` + */ + template + friend Quat log(const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return the value of logarithm function. + * \f[\ln(q) = \ln||q|| + \frac{\boldsymbol{v}}{||\boldsymbol{v}||}\arccos\frac{w}{||q||}\f]. + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.log(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * Quatd q1(1,2,3,4); + * q1.normalize().log(assumeUnit); + * ``` + */ + Quat<_Tp> log(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of power function with index \f$x\f$. + * \f[q^x = ||q||(cos(x\theta) + \boldsymbol{u}sin(x\theta))).\f] + * @param q a quaternion. + * @param x index of exponentiation. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * power(q, 2.0); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * double angle = CV_PI; + * Vec3d axis{0, 0, 1}; + * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle + * power(q1, 2.0, assumeUnit);//This assumeUnit means q1 is a unit quaternion. + * ``` + * @note the type of the index should be the same as the quaternion. + */ + template + friend Quat power(const Quat &q, const T x, QuatAssumeType assumeUnit); + + /** + * @brief return the value of power function with index \f$x\f$. + * \f[q^x = ||q||(\cos(x\theta) + \boldsymbol{u}\sin(x\theta))).\f] + * @param x index of exponentiation. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.power(2.0); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * double angle = CV_PI; + * Vec3d axis{0, 0, 1}; + * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle + * q1.power(2.0, assumeUnit); //This assumeUnt means q1 is a unit quaternion + * ``` + */ + Quat<_Tp> power(const _Tp x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return \f$\sqrt{q}\f$. + * @param q a quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatf q(1,2,3,4); + * sqrt(q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = {1,0,0,0}; + * sqrt(q, assumeUnit); //This assumeUnit means q is a unit quaternion. + * ``` + */ + template + friend Quat sqrt(const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return \f$\sqrt{q}\f$. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatf q(1,2,3,4); + * q.sqrt(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = {1,0,0,0}; + * q.sqrt(assumeUnit); //This assumeUnit means q is a unit quaternion + * ``` + */ + Quat<_Tp> sqrt(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of power function with quaternion \f$q\f$. + * \f[p^q = e^{q\ln(p)}.\f] + * @param p base quaternion of power function. + * @param q index quaternion of power function. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion \f$p\f$ assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd p(1,2,3,4); + * Quatd q(5,6,7,8); + * power(p, q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * p = p.normalize(); + * power(p, q, assumeUnit); //This assumeUnit means p is a unit quaternion + * ``` + */ + template + friend Quat power(const Quat &p, const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return the value of power function with quaternion \f$q\f$. + * \f[p^q = e^{q\ln(p)}.\f] + * @param q index quaternion of power function. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd p(1,2,3,4); + * Quatd q(5,6,7,8); + * p.power(q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * p = p.normalize(); + * p.power(q, assumeUnit); //This assumeUnit means p is a unit quaternion + * ``` + */ + Quat<_Tp> power(const Quat<_Tp> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the crossProduct between \f$p = (a, b, c, d) = (a, \boldsymbol{u})\f$ and \f$q = (w, x, y, z) = (w, \boldsymbol{v})\f$. + * \f[p \times q = \frac{pq- qp}{2}\f] + * \f[p \times q = \boldsymbol{u} \times \boldsymbol{v}\f] + * \f[p \times q = (cz-dy)i + (dx-bz)j + (by-xc)k \f] + * + * For example + * ``` + * Quatd q{1,2,3,4}; + * Quatd p{5,6,7,8}; + * crossProduct(p, q); + * ``` + */ + template + friend Quat crossProduct(const Quat &p, const Quat &q); + + /** + * @brief return the crossProduct between \f$p = (a, b, c, d) = (a, \boldsymbol{u})\f$ and \f$q = (w, x, y, z) = (w, \boldsymbol{v})\f$. + * \f[p \times q = \frac{pq- qp}{2}.\f] + * \f[p \times q = \boldsymbol{u} \times \boldsymbol{v}.\f] + * \f[p \times q = (cz-dy)i + (dx-bz)j + (by-xc)k. \f] + * + * For example + * ``` + * Quatd q{1,2,3,4}; + * Quatd p{5,6,7,8}; + * p.crossProduct(q) + * ``` + */ + Quat<_Tp> crossProduct(const Quat<_Tp> &q) const; + + /** + * @brief return the norm of quaternion. + * \f[||q|| = \sqrt{w^2 + x^2 + y^2 + z^2}.\f] + */ + _Tp norm() const; + + /** + * @brief return a normalized \f$p\f$. + * \f[p = \frac{q}{||q||}\f] + * where \f$p\f$ satisfies \f$(p.x)^2 + (p.y)^2 + (p.z)^2 + (p.w)^2 = 1.\f$ + */ + Quat<_Tp> normalize() const; + + /** + * @brief return \f$q^{-1}\f$ which is an inverse of \f$q\f$ + * which satisfies \f$q * q^{-1} = 1\f$. + * @param q a quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * inv(q); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = q.normalize(); + * inv(q, assumeUnit);//This assumeUnit means p is a unit quaternion + * ``` + */ + template + friend Quat inv(const Quat &q, QuatAssumeType assumeUnit); + + /** + * @brief return \f$q^{-1}\f$ which is an inverse of \f$q\f$ + * satisfying \f$q * q^{-1} = 1\f$. + * @param assumeUnit if QUAT_ASSUME_UNIT, quaternion q assume to be a unit quaternion and this function will save some computations. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.inv(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q = q.normalize(); + * q.inv(assumeUnit); //assumeUnit means p is a unit quaternion + * ``` + */ + Quat<_Tp> inv(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return sinh value of quaternion q, sinh could be calculated as: + * \f[\sinh(p) = \sin(w)\cos(||\boldsymbol{v}||) + \cosh(w)\frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * sinh(q); + * ``` + */ + template + friend Quat sinh(const Quat &q); + + /** + * @brief return sinh value of this quaternion, sinh could be calculated as: + * \f$\sinh(p) = \sin(w)\cos(||\boldsymbol{v}||) + \cosh(w)\frac{v}{||\boldsymbol{v}||}\sin||\boldsymbol{v}||\f$ + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.sinh(); + * ``` + */ + Quat<_Tp> sinh() const; + + /** + * @brief return cosh value of quaternion q, cosh could be calculated as: + * \f[\cosh(p) = \cosh(w) * \cos(||\boldsymbol{v}||) + \sinh(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sin(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * cosh(q); + * ``` + */ + template + friend Quat cosh(const Quat &q); + + /** + * @brief return cosh value of this quaternion, cosh could be calculated as: + * \f[\cosh(p) = \cosh(w) * \cos(||\boldsymbol{v}||) + \sinh(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}sin(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.cosh(); + * ``` + */ + Quat<_Tp> cosh() const; + + /** + * @brief return tanh value of quaternion q, tanh could be calculated as: + * \f[ \tanh(q) = \frac{\sinh(q)}{\cosh(q)}.\f] + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * tanh(q); + * ``` + * @sa sinh, cosh + */ + template + friend Quat tanh(const Quat &q); + + /** + * @brief return tanh value of this quaternion, tanh could be calculated as: + * \f[ \tanh(q) = \frac{\sinh(q)}{\cosh(q)}.\f] + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.tanh(); + * ``` + * @sa sinh, cosh + */ + Quat<_Tp> tanh() const; + + /** + * @brief return tanh value of quaternion q, sin could be calculated as: + * \f[\sin(p) = \sin(w) * \cosh(||\boldsymbol{v}||) + \cos(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * sin(q); + * ``` + */ + template + friend Quat sin(const Quat &q); + + /** + * @brief return sin value of this quaternion, sin could be calculated as: + * \f[\sin(p) = \sin(w) * \cosh(||\boldsymbol{v}||) + \cos(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.sin(); + * ``` + */ + Quat<_Tp> sin() const; + + /** + * @brief return sin value of quaternion q, cos could be calculated as: + * \f[\cos(p) = \cos(w) * \cosh(||\boldsymbol{v}||) - \sin(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * cos(q); + * ``` + */ + template + friend Quat cos(const Quat &q); + + /** + * @brief return cos value of this quaternion, cos could be calculated as: + * \f[\cos(p) = \cos(w) * \cosh(||\boldsymbol{v}||) - \sin(w)\frac{\boldsymbol{v}}{||\boldsymbol{v}||}\sinh(||\boldsymbol{v}||)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.cos(); + * ``` + */ + Quat<_Tp> cos() const; + + /** + * @brief return tan value of quaternion q, tan could be calculated as: + * \f[\tan(q) = \frac{\sin(q)}{\cos(q)}.\f] + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * tan(q); + * ``` + */ + template + friend Quat tan(const Quat &q); + + /** + * @brief return tan value of this quaternion, tan could be calculated as: + * \f[\tan(q) = \frac{\sin(q)}{\cos(q)}.\f] + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.tan(); + * ``` + */ + Quat<_Tp> tan() const; + + /** + * @brief return arcsin value of quaternion q, arcsin could be calculated as: + * \f[\arcsin(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arcsinh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * asin(q); + * ``` + */ + template + friend Quat asin(const Quat &q); + + /** + * @brief return arcsin value of this quaternion, arcsin could be calculated as: + * \f[\arcsin(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arcsinh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.asin(); + * ``` + */ + Quat<_Tp> asin() const; + + /** + * @brief return arccos value of quaternion q, arccos could be calculated as: + * \f[\arccos(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arccosh(q)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * acos(q); + * ``` + */ + template + friend Quat acos(const Quat &q); + + /** + * @brief return arccos value of this quaternion, arccos could be calculated as: + * \f[\arccos(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arccosh(q)\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.acos(); + * ``` + */ + Quat<_Tp> acos() const; + + /** + * @brief return arctan value of quaternion q, arctan could be calculated as: + * \f[\arctan(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arctanh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * atan(q); + * ``` + */ + template + friend Quat atan(const Quat &q); + + /** + * @brief return arctan value of this quaternion, arctan could be calculated as: + * \f[\arctan(q) = -\frac{\boldsymbol{v}}{||\boldsymbol{v}||}arctanh(q\frac{\boldsymbol{v}}{||\boldsymbol{v}||})\f] + * where \f$\boldsymbol{v} = [x, y, z].\f$ + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.atan(); + * ``` + */ + Quat<_Tp> atan() const; + + /** + * @brief return arcsinh value of quaternion q, arcsinh could be calculated as: + * \f[arcsinh(q) = \ln(q + \sqrt{q^2 + 1})\f]. + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * asinh(q); + * ``` + */ + template + friend Quat asinh(const Quat &q); + + /** + * @brief return arcsinh value of this quaternion, arcsinh could be calculated as: + * \f[arcsinh(q) = \ln(q + \sqrt{q^2 + 1})\f]. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.asinh(); + * ``` + */ + Quat<_Tp> asinh() const; + + /** + * @brief return arccosh value of quaternion q, arccosh could be calculated as: + * \f[arccosh(q) = \ln(q + \sqrt{q^2 - 1})\f]. + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * acosh(q); + * ``` + */ + template + friend Quat acosh(const Quat &q); + + /** + * @brief return arccosh value of this quaternion, arccosh could be calculated as: + * \f[arcosh(q) = \ln(q + \sqrt{q^2 - 1})\f]. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.acosh(); + * ``` + */ + Quat<_Tp> acosh() const; + + /** + * @brief return arctanh value of quaternion q, arctanh could be calculated as: + * \f[arctanh(q) = \frac{\ln(q + 1) - \ln(1 - q)}{2}\f]. + * @param q a quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * atanh(q); + * ``` + */ + template + friend Quat atanh(const Quat &q); + + /** + * @brief return arctanh value of this quaternion, arctanh could be calculated as: + * \f[arcsinh(q) = \frac{\ln(q + 1) - \ln(1 - q)}{2}\f]. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.atanh(); + * ``` + */ + Quat<_Tp> atanh() const; + + /** + * @brief return true if this quaternion is a unit quaternion. + * @param eps tolerance scope of normalization. The eps could be defined as + * + * \f[eps = |1 - dotValue|\f] where \f[dotValue = (this.w^2 + this.x^2 + this,y^2 + this.z^2).\f] + * And this function will consider it is normalized when the dotValue over a range \f$[1-eps, 1+eps]\f$. + */ + bool isNormal(_Tp eps=CV_QUAT_EPS) const; + + /** + * @brief to throw an error if this quaternion is not a unit quaternion. + * @param eps tolerance scope of normalization. + * @sa isNormal + */ + void assertNormal(_Tp eps=CV_QUAT_EPS) const; + + /** + * @brief transform a quaternion to a 3x3 rotation matrix. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. Otherwise, this function will normalize this + * quaternion at first then do the transformation. + * + * @note Matrix A which is to be rotated should have the form + * \f[\begin{bmatrix} + * x_0& x_1& x_2&...&x_n\\ + * y_0& y_1& y_2&...&y_n\\ + * z_0& z_1& z_2&...&z_n + * \end{bmatrix}\f] + * where the same subscript represents a point. The shape of A assume to be [3, n] + * The points matrix A can be rotated by toRotMat3x3() * A. + * The result has 3 rows and n columns too. + + * For example + * ``` + * double angle = CV_PI; + * Vec3d axis{0,0,1}; + * Quatd q_unit = Quatd::createFromAngleAxis(angle, axis); //quaternion could also be get by interpolation by two or more quaternions. + * + * //assume there is two points (1,0,0) and (1,0,1) to be rotated + * Mat pointsA = (Mat_(2, 3) << 1,0,0,1,0,1); + * //change the shape + * pointsA = pointsA.t(); + * // rotate 180 degrees around the z axis + * Mat new_point = q_unit.toRotMat3x3() * pointsA; + * // print two points + * cout << new_point << endl; + * ``` + */ + Matx<_Tp, 3, 3> toRotMat3x3(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief transform a quaternion to a 4x4 rotation matrix. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. Otherwise, this function will normalize this + * quaternion at first then do the transformation. + * + * The operations is similar as toRotMat3x3 + * except that the points matrix should have the form + * \f[\begin{bmatrix} + * x_0& x_1& x_2&...&x_n\\ + * y_0& y_1& y_2&...&y_n\\ + * z_0& z_1& z_2&...&z_n\\ + * 0&0&0&...&0 + * \end{bmatrix}\f] + * + * @sa toRotMat3x3 + */ + + Matx<_Tp, 4, 4> toRotMat4x4(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief transform the this quaternion to a Vec. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.toVec(); + * ``` + */ + Vec<_Tp, 4> toVec() const; + + /** + * @brief transform this quaternion to a Rotation vector. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. + * Rotation vector rVec is defined as: + * \f[ rVec = [\theta v_x, \theta v_y, \theta v_z]\f] + * where \f$\theta\f$ represents rotation angle, and \f$\boldsymbol{v}\f$ represents the normalized rotation axis. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.toRotVec(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q.normalize().toRotVec(assumeUnit); //answer is same as q.toRotVec(). + * ``` + */ + Vec<_Tp, 3> toRotVec(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief get the angle of quaternion, it returns the rotation angle. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. + * \f[\psi = 2 *arccos(\frac{w}{||q||})\f] + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.getAngle(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q.normalize().getAngle(assumeUnit);//same as q.getAngle(). + * ``` + * @note It always return the value between \f$[0, 2\pi]\f$. + */ + _Tp getAngle(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief get the axis of quaternion, it returns a vector of length 3. + * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and + * this function will save some computations. + * + * the unit axis \f$\boldsymbol{u}\f$ is defined by + * \f[\begin{equation} + * \begin{split} + * \boldsymbol{v} + * &= \boldsymbol{u} ||\boldsymbol{v}||\\ + * &= \boldsymbol{u}||q||sin(\frac{\theta}{2}) + * \end{split} + * \end{equation}\f] + * where \f$v=[x, y ,z]\f$ and \f$\theta\f$ represents rotation angle. + * + * + * For example + * ``` + * Quatd q(1,2,3,4); + * q.getAxis(); + * + * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; + * q.normalize().getAxis(assumeUnit);//same as q.getAxis() + * ``` + */ + Vec<_Tp, 3> getAxis(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the dot between quaternion \f$q\f$ and this quaternion. + * + * dot(p, q) is a good metric of how close the quaternions are. + * Indeed, consider the unit quaternion difference \f$p^{-1} * q\f$, its real part is dot(p, q). + * At the same time its real part is equal to \f$\cos(\beta/2)\f$ where \f$\beta\f$ is + * an angle of rotation between p and q, i.e., + * Therefore, the closer dot(p, q) to 1, + * the smaller rotation between them. + * \f[p \cdot q = p.w \cdot q.w + p.x \cdot q.x + p.y \cdot q.y + p.z \cdot q.z\f] + * @param q the other quaternion. + * + * For example + * ``` + * Quatd q(1,2,3,4); + * Quatd p(5,6,7,8); + * p.dot(q); + * ``` + */ + _Tp dot(Quat<_Tp> q) const; + + /** + * @brief To calculate the interpolation from \f$q_0\f$ to \f$q_1\f$ by Linear Interpolation(Nlerp) + * For two quaternions, this interpolation curve can be displayed as: + * \f[Lerp(q_0, q_1, t) = (1 - t)q_0 + tq_1.\f] + * Obviously, the lerp will interpolate along a straight line if we think of \f$q_0\f$ and \f$q_1\f$ as a vector + * in a two-dimensional space. When \f$t = 0\f$, it returns \f$q_0\f$ and when \f$t= 1\f$, it returns \f$q_1\f$. + * \f$t\f$ should to be ranged in \f$[0, 1]\f$ normally. + * @param q0 a quaternion used in linear interpolation. + * @param q1 a quaternion used in linear interpolation. + * @param t percent of vector \f$\overrightarrow{q_0q_1}\f$ over a range [0, 1]. + * @note it returns a non-unit quaternion. + */ + static Quat<_Tp> lerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t); + + /** + * @brief To calculate the interpolation from \f$q_0\f$ to \f$q_1\f$ by Normalized Linear Interpolation(Nlerp). + * it returns a normalized quaternion of Linear Interpolation(Lerp). + * \f[ Nlerp(q_0, q_1, t) = \frac{(1 - t)q_0 + tq_1}{||(1 - t)q_0 + tq_1||}.\f] + * The interpolation will always choose the shortest path but the constant speed is not guaranteed. + * @param q0 a quaternion used in normalized linear interpolation. + * @param q1 a quaternion used in normalized linear interpolation. + * @param t percent of vector \f$\overrightarrow{q_0q_1}\f$ over a range [0, 1]. + * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all inputs + quaternion will be normalized inside the function. + * @sa lerp + */ + static Quat<_Tp> nlerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + @brief To calculate the interpolation between \f$q_0\f$ and \f$q_1\f$ by Spherical Linear + Interpolation(Slerp), which can be defined as: + \f[ Slerp(q_0, q_1, t) = \frac{\sin((1-t)\theta)}{\sin(\theta)}q_0 + \frac{\sin(t\theta)}{\sin(\theta)}q_1\f] + where \f$\theta\f$ can be calculated as: + \f[\theta=cos^{-1}(q_0\cdot q_1)\f] + resulting from the both of their norm is unit. + @param q0 a quaternion used in Slerp. + @param q1 a quaternion used in Slerp. + @param t percent of angle between \f$q_0\f$ and \f$q_1\f$ over a range [0, 1]. + @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternions. Otherwise, all input + quaternions will be normalized inside the function. + @param directChange if QUAT_ASSUME_UNIT, the interpolation will choose the nearest path. + @note If the interpolation angle is small, the error between Nlerp and Slerp is not so large. To improve efficiency and + avoid zero division error, we use Nlerp instead of Slerp. + */ + static Quat<_Tp> slerp(const Quat<_Tp> &q0, const Quat &q1, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT, bool directChange=true); + + /** + * @brief To calculate the interpolation between \f$q_0\f$,\f$q_1\f$,\f$q_2\f$,\f$q_3\f$ by Spherical and quadrangle(Squad). This could be defined as: + * \f[Squad(q_i, s_i, s_{i+1}, q_{i+1}, t) = Slerp(Slerp(q_i, q_{i+1}, t), Slerp(s_i, s_{i+1}, t), 2t(1-t))\f] + * where + * \f[s_i = q_i\exp(-\frac{\log(q^*_iq_{i+1}) + \log(q^*_iq_{i-1})}{4})\f] + * + * The Squad expression is analogous to the \f$B\acute{e}zier\f$ curve, but involves spherical linear + * interpolation instead of simple linear interpolation. Each \f$s_i\f$ needs to be calculated by three + * quaternions. + * + * @param q0 the first quaternion. + * @param s0 the second quaternion. + * @param s1 the third quaternion. + * @param q1 thr fourth quaternion. + * @param t interpolation parameter of quadratic and linear interpolation over a range \f$[0, 1]\f$. + * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all input + * quaternions will be normalized inside the function. + * @param directChange if QUAT_ASSUME_UNIT, squad will find the nearest path to interpolate. + * @sa interPoint, spline + */ + static Quat<_Tp> squad(const Quat<_Tp> &q0, const Quat<_Tp> &s0, + const Quat<_Tp> &s1, const Quat<_Tp> &q1, + const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT, + bool directChange=true); + + /** + * @brief This is the part calculation of squad. + * To calculate the intermedia quaternion \f$s_i\f$ between each three quaternion + * \f[s_i = q_i\exp(-\frac{\log(q^*_iq_{i+1}) + \log(q^*_iq_{i-1})}{4}).\f] + * @param q0 the first quaternion. + * @param q1 the second quaternion. + * @param q2 the third quaternion. + * @param assumeUnit if QUAT_ASSUME_UNIT, all input quaternions assume to be unit quaternion. Otherwise, all input + * quaternions will be normalized inside the function. + * @sa squad + */ + static Quat<_Tp> interPoint(const Quat<_Tp> &q0, const Quat<_Tp> &q1, + const Quat<_Tp> &q2, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief to calculate a quaternion which is the result of a \f$C^1\f$ continuous + * spline curve constructed by squad at the ratio t. Here, the interpolation values are + * between \f$q_1\f$ and \f$q_2\f$. \f$q_0\f$ and \f$q_2\f$ are used to ensure the \f$C^1\f$ + * continuity. if t = 0, it returns \f$q_1\f$, if t = 1, it returns \f$q_2\f$. + * @param q0 the first input quaternion to ensure \f$C^1\f$ continuity. + * @param q1 the second input quaternion. + * @param q2 the third input quaternion. + * @param q3 the fourth input quaternion the same use of \f$q1\f$. + * @param t ratio over a range [0, 1]. + * @param assumeUnit if QUAT_ASSUME_UNIT, \f$q_0, q_1, q_2, q_3\f$ assume to be unit quaternion. Otherwise, all input + * quaternions will be normalized inside the function. + * + * For example: + * + * If there are three double quaternions \f$v_0, v_1, v_2\f$ waiting to be interpolated. + * + * Interpolation between \f$v_0\f$ and \f$v_1\f$ with a ratio \f$t_0\f$ could be calculated as + * ``` + * Quatd::spline(v0, v0, v1, v2, t0); + * ``` + * Interpolation between \f$v_1\f$ and \f$v_2\f$ with a ratio \f$t_0\f$ could be calculated as + * ``` + * Quatd::spline(v0, v1, v2, v2, t0); + * ``` + * @sa squad, slerp + */ + static Quat<_Tp> spline(const Quat<_Tp> &q0, const Quat<_Tp> &q1, + const Quat<_Tp> &q2, const Quat<_Tp> &q3, + const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief Return opposite quaternion \f$-p\f$ + * which satisfies \f$p + (-p) = 0.\f$ + * + * For example + * ``` + * Quatd q{1, 2, 3, 4}; + * std::cout << -q << std::endl; // [-1, -2, -3, -4] + * ``` + */ + Quat<_Tp> operator-() const; + + /** + * @brief return true if two quaternions p and q are nearly equal, i.e. when the absolute + * value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_QUAT_EPS. + */ + bool operator==(const Quat<_Tp>&) const; + + /** + * @brief Addition operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p + q << std::endl; //[6, 8, 10, 12] + * ``` + */ + Quat<_Tp> operator+(const Quat<_Tp>&) const; + + /** + * @brief Addition assignment operator of two quaternions p and q. + * It adds right operand to the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p += q; // equivalent to p = p + q + * std::cout << p << std::endl; //[6, 8, 10, 12] + * + * ``` + */ + Quat<_Tp>& operator+=(const Quat<_Tp>&); + + /** + * @brief Subtraction operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p - q << std::endl; //[-4, -4, -4, -4] + * ``` + */ + Quat<_Tp> operator-(const Quat<_Tp>&) const; + + /** + * @brief Subtraction assignment operator of two quaternions p and q. + * It subtracts right operand from the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p -= q; // equivalent to p = p - q + * std::cout << p << std::endl; //[-4, -4, -4, -4] + * + * ``` + */ + Quat<_Tp>& operator-=(const Quat<_Tp>&); + + /** + * @brief Multiplication assignment operator of two quaternions q and p. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p *= q; // equivalent to p = p * q + * std::cout << p << std::endl; //[-60, 12, 30, 24] + * ``` + */ + Quat<_Tp>& operator*=(const Quat<_Tp>&); + + /** + * @brief Multiplication assignment operator of a quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p *= s; // equivalent to p = p * s + * std::cout << p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator*=(const _Tp s); + + /** + * @brief Multiplication operator of two quaternions q and p. + * Multiplies values on either side of the operator. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p * q << std::endl; //[-60, 12, 30, 24] + * ``` + */ + Quat<_Tp> operator*(const Quat<_Tp>&) const; + + /** + * @brief Division operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w/s, x/s, y/s, z/s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1, 1.5, 2] + * ``` + * @note the type of scalar should be equal to this quaternion. + */ + Quat<_Tp> operator/(const _Tp s) const; + + /** + * @brief Division operator of two quaternions p and q. + * Divides left hand operand by right hand operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / q &= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p / q << std::endl; // equivalent to p * q.inv() + * ``` + */ + Quat<_Tp> operator/(const Quat<_Tp>&) const; + + /** + * @brief Division assignment operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w / s, x / s, y / s, z / s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0;; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator/=(const _Tp s); + + /** + * @brief Division assignment operator of two quaternions p and q; + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q&= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p /= q; // equivalent to p = p * q.inv() + * std::cout << p << std::endl; + * ``` + */ + Quat<_Tp>& operator/=(const Quat<_Tp>&); + + _Tp& operator[](std::size_t n); + + const _Tp& operator[](std::size_t n) const; + + /** + * @brief Subtraction operator of a scalar and a quaternions. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const T s, const Quat&); + + /** + * @brief Subtraction operator of a quaternions and a scalar. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p - scalar << std::endl; //[-1.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const Quat&, const T s); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const T s, const Quat&); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const Quat&, const T s); + + /** + * @brief Multiplication operator of a scalar and a quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << s * p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const T s, const Quat&); + + /** + * @brief Multiplication operator of a quaternion and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << p * s << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const Quat&, const T s); + + template + friend std::ostream& cv::operator<<(std::ostream&, const Quat&); + + /** + * @brief Transform a quaternion q to Euler angles. + * + * + * When transforming a quaternion \f$q = w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\f$ to Euler angles, rotation matrix M can be calculated by: + * \f[ \begin{aligned} {M} &={\begin{bmatrix}1-2(y^{2}+z^{2})&2(xy-zx)&2(xz+yw)\\2(xy+zw)&1-2(x^{2}+z^{2})&2(yz-xw)\\2(xz-yw)&2(yz+xw)&1-2(x^{2}+y^{2})\end{bmatrix}}\end{aligned}.\f] + * On the other hand, the rotation matrix can be obtained from Euler angles. + * Using intrinsic rotations with Euler angles type XYZ as an example, + * \f$\theta_1 \f$, \f$\theta_2 \f$, \f$\theta_3 \f$ are three angles for Euler angles, the rotation matrix R can be calculated by:\f[R =X(\theta_1)Y(\theta_2)Z(\theta_3) + * ={\begin{bmatrix}\cos\theta_{2}\cos\theta_{3}&-\cos\theta_{2}\sin\theta_{3}&\sin\theta_{2}\\\cos\theta_{1}\sin\theta_{3}+\cos\theta_{3}\sin\theta_{1}\sin\theta_{2}&\cos\theta_{1}\cos\theta_{3}-\sin\theta_{1}\sin\theta_{2}\sin\theta_{3}&-\cos\theta_{2}\sin\theta_{1}\\\sin\theta_{1}\sin\theta_{3}-\cos\theta_{1}\cos\theta_{3}\sin\theta_{2}&\cos\theta_{3}\sin\theta_{1}+\cos\theta_{1}\sin\theta_{2}\sin\theta_{3}&\cos\theta_{1}\cos_{2}\end{bmatrix}}\f] + * Rotation matrix M and R are equal. As long as \f$ s_{2} \neq 1 \f$, by comparing each element of two matrices ,the solution is\f$\begin{cases} \theta_1 = \arctan2(-m_{23},m_{33})\\\theta_2 = arcsin(m_{13}) \\\theta_3 = \arctan2(-m_{12},m_{11}) \end{cases}\f$. + * + * When \f$ s_{2}=1\f$ or \f$ s_{2}=-1\f$, the gimbal lock_ occurs. The function will prompt "WARNING: Gimbal Lock will occur. Euler angles is non-unique. For intrinsic rotations, we set the third angle to 0, and for external rotation, we set the first angle to 0.". + * + * When \f$ s_{2}=1\f$ , + * The rotation matrix R is \f$R = {\begin{bmatrix}0&0&1\\\sin(\theta_1+\theta_3)&\cos(\theta_1+\theta_3)&0\\-\cos(\theta_1+\theta_3)&\sin(\theta_1+\theta_3)&0\end{bmatrix}}\f$. + * + * The number of solutions is infinite with the condition \f$\begin{cases} \theta_1+\theta_3 = \arctan2(m_{21},m_{22})\\ \theta_2=\pi/2 \end{cases}\ \f$. + * + * We set \f$ \theta_3 = 0\f$, the solution is \f$\begin{cases} \theta_1=\arctan2(m_{21},m_{22})\\ \theta_2=\pi/2\\ \theta_3=0 \end{cases}\f$. + * + * When \f$ s_{2}=-1\f$, + * The rotation matrix R is \f$X_{1}Y_{2}Z_{3}={\begin{bmatrix}0&0&-1\\-\sin(\theta_1-\theta_3)&\cos(\theta_1-\theta_3)&0\\\cos(\theta_1-\theta_3)&\sin(\theta_1-\theta_3)&0\end{bmatrix}}\f$. + * + * The number of solutions is infinite with the condition \f$\begin{cases} \theta_1+\theta_3 = \arctan2(m_{32},m_{22})\\ \theta_2=\pi/2 \end{cases}\ \f$. + * + * We set \f$ \theta_3 = 0\f$, the solution is \f$ \begin{cases}\theta_1=\arctan2(m_{32},m_{22}) \\ \theta_2=-\pi/2\\ \theta_3=0\end{cases}\f$. + * + * Since \f$ sin \theta\in [-1,1] \f$ and \f$ cos \theta \in [-1,1] \f$, the unnormalized quaternion will cause computational troubles. For this reason, this function will normalize the quaternion at first and @ref QuatAssumeType is not needed. + * + * When the gimbal lock_ occurs, we set \f$\theta_3 = 0\f$ for intrinsic rotations or \f$\theta_1 = 0\f$ for extrinsic rotations. + * + * As a result, for every Euler angles type, we can get solution as shown in the following table. + * EulerAnglesType | Ordinary | \f$\theta_2 = π/2\f$ | \f$\theta_2 = -π/2\f$ + * ------------- | -------------| -------------| ------------- + * INT_XYZ|\f$ \theta_1 = \arctan2(-m_{23},m_{33})\\\theta_2 = \arcsin(m_{13}) \\\theta_3= \arctan2(-m_{12},m_{11}) \f$|\f$ \theta_1=\arctan2(m_{21},m_{22})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(m_{32},m_{22})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_XZY|\f$ \theta_1 = \arctan2(m_{32},m_{22})\\\theta_2 = -\arcsin(m_{12}) \\\theta_3= \arctan2(m_{13},m_{11}) \f$|\f$ \theta_1=\arctan2(m_{31},m_{33})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(-m_{23},m_{33})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_YXZ|\f$ \theta_1 = \arctan2(m_{13},m_{33})\\\theta_2 = -\arcsin(m_{23}) \\\theta_3= \arctan2(m_{21},m_{22}) \f$|\f$ \theta_1=\arctan2(m_{12},m_{11})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(-m_{12},m_{11})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_YZX|\f$ \theta_1 = \arctan2(-m_{31},m_{11})\\\theta_2 = \arcsin(m_{21}) \\\theta_3= \arctan2(-m_{23},m_{22}) \f$|\f$ \theta_1=\arctan2(m_{13},m_{33})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(m_{13},m_{12})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_ZXY|\f$ \theta_1 = \arctan2(-m_{12},m_{22})\\\theta_2 = \arcsin(m_{32}) \\\theta_3= \arctan2(-m_{31},m_{33}) \f$|\f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_ZYX|\f$ \theta_1 = \arctan2(m_{21},m_{11})\\\theta_2 = \arcsin(-m_{31}) \\\theta_3= \arctan2(m_{32},m_{33}) \f$|\f$ \theta_1=\arctan2(m_{23},m_{22})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(-m_{12},m_{22})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * EXT_XYZ|\f$ \theta_1 = \arctan2(m_{32},m_{33})\\\theta_2 = \arcsin(-m_{31}) \\\ \theta_3 = \arctan2(m_{21},m_{11})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{23},m_{22}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(-m_{12},m_{22}) \f$ + * EXT_XZY|\f$ \theta_1 = \arctan2(-m_{23},m_{22})\\\theta_2 = \arcsin(m_{21}) \\\theta_3= \arctan2(-m_{31},m_{11})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{13},m_{33}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(m_{13},m_{12}) \f$ + * EXT_YXZ|\f$ \theta_1 = \arctan2(-m_{31},m_{33}) \\\theta_2 = \arcsin(m_{32}) \\\theta_3= \arctan2(-m_{12},m_{22})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{21},m_{11}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(m_{21},m_{11}) \f$ + * EXT_YZX|\f$ \theta_1 = \arctan2(m_{13},m_{11})\\\theta_2 = -\arcsin(m_{12}) \\\theta_3= \arctan2(m_{32},m_{22})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{31},m_{33}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(-m_{23},m_{33}) \f$ + * EXT_ZXY|\f$ \theta_1 = \arctan2(m_{21},m_{22})\\\theta_2 = -\arcsin(m_{23}) \\\theta_3= \arctan2(m_{13},m_{33})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{12},m_{11}) \f$|\f$ \theta_1= 0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(-m_{12},m_{11}) \f$ + * EXT_ZYX|\f$ \theta_1 = \arctan2(-m_{12},m_{11})\\\theta_2 = \arcsin(m_{13}) \\\theta_3= \arctan2(-m_{23},m_{33})\f$|\f$ \theta_1=0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{21},m_{22}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(m_{32},m_{22}) \f$ + * + * EulerAnglesType | Ordinary | \f$\theta_2 = 0\f$ | \f$\theta_2 = π\f$ + * ------------- | -------------| -------------| ------------- + * INT_XYX| \f$ \theta_1 = \arctan2(m_{21},-m_{31})\\\theta_2 =\arccos(m_{11}) \\\theta_3 = \arctan2(m_{12},m_{13}) \f$| \f$ \theta_1=\arctan2(m_{32},m_{33})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{23},m_{22})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_XZX| \f$ \theta_1 = \arctan2(m_{31},m_{21})\\\theta_2 = \arccos(m_{11}) \\\theta_3 = \arctan2(m_{13},-m_{12}) \f$| \f$ \theta_1=\arctan2(m_{32},m_{33})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(-m_{32},m_{33})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_YXY| \f$ \theta_1 = \arctan2(m_{12},m_{32})\\\theta_2 = \arccos(m_{22}) \\\theta_3 = \arctan2(m_{21},-m_{23}) \f$| \f$ \theta_1=\arctan2(m_{13},m_{11})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(-m_{31},m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_YZY| \f$ \theta_1 = \arctan2(m_{32},-m_{12})\\\theta_2 = \arccos(m_{22}) \\\theta_3 =\arctan2(m_{23},m_{21}) \f$| \f$ \theta_1=\arctan2(m_{13},m_{11})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{13},-m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_ZXZ| \f$ \theta_1 = \arctan2(-m_{13},m_{23})\\\theta_2 = \arccos(m_{33}) \\\theta_3 =\arctan2(m_{31},m_{32}) \f$| \f$ \theta_1=\arctan2(m_{21},m_{22})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_ZYZ| \f$ \theta_1 = \arctan2(m_{23},m_{13})\\\theta_2 = \arccos(m_{33}) \\\theta_3 = \arctan2(m_{32},-m_{31}) \f$| \f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * EXT_XYX| \f$ \theta_1 = \arctan2(m_{12},m_{13}) \\\theta_2 = \arccos(m_{11}) \\\theta_3 = \arctan2(m_{21},-m_{31})\f$| \f$ \theta_1=0\\ \theta_2=0\\ \theta_3=\arctan2(m_{32},m_{33}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3= \arctan2(m_{23},m_{22}) \f$ + * EXT_XZX| \f$ \theta_1 = \arctan2(m_{13},-m_{12})\\\theta_2 = \arccos(m_{11}) \\\theta_3 = \arctan2(m_{31},m_{21})\f$| \f$ \theta_1= 0\\ \theta_2=0\\ \theta_3=\arctan2(m_{32},m_{33}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(-m_{32},m_{33}) \f$ + * EXT_YXY| \f$ \theta_1 = \arctan2(m_{21},-m_{23})\\\theta_2 = \arccos(m_{22}) \\\theta_3 = \arctan2(m_{12},m_{32}) \f$| \f$ \theta_1= 0\\ \theta_2=0\\ \theta_3=\arctan2(m_{13},m_{11}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(-m_{31},m_{11}) \f$ + * EXT_YZY| \f$ \theta_1 = \arctan2(m_{23},m_{21}) \\\theta_2 = \arccos(m_{22}) \\\theta_3 = \arctan2(m_{32},-m_{12}) \f$| \f$ \theta_1= 0\\ \theta_2=0\\ \theta_3=\arctan2(m_{13},m_{11}) \f$| \f$ \theta_1=0\\ \theta_2=\pi\\ \theta_3=\arctan2(m_{13},-m_{11}) \f$ + * EXT_ZXZ| \f$ \theta_1 = \arctan2(m_{31},m_{32}) \\\theta_2 = \arccos(m_{33}) \\\theta_3 = \arctan2(-m_{13},m_{23})\f$| \f$ \theta_1=0\\ \theta_2=0\\ \theta_3=\arctan2(m_{21},m_{22}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(m_{21},m_{11}) \f$ + * EXT_ZYZ| \f$ \theta_1 = \arctan2(m_{32},-m_{31})\\\theta_2 = \arccos(m_{33}) \\\theta_3 = \arctan2(m_{23},m_{13}) \f$| \f$ \theta_1=0\\ \theta_2=0\\ \theta_3=\arctan2(m_{21},m_{11}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(m_{21},m_{11}) \f$ + * + * @param eulerAnglesType the convertion Euler angles type + */ + + Vec<_Tp, 3> toEulerAngles(QuatEnum::EulerAnglesType eulerAnglesType); + + _Tp w, x, y, z; + +}; + +template +Quat inv(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat sinh(const Quat &q); + +template +Quat cosh(const Quat &q); + +template +Quat tanh(const Quat &q); + +template +Quat sin(const Quat &q); + +template +Quat cos(const Quat &q); + +template +Quat tan(const Quat &q); + +template +Quat asinh(const Quat &q); + +template +Quat acosh(const Quat &q); + +template +Quat atanh(const Quat &q); + +template +Quat asin(const Quat &q); + +template +Quat acos(const Quat &q); + +template +Quat atan(const Quat &q); + +template +Quat power(const Quat &q, const Quat &p, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat exp(const Quat &q); + +template +Quat log(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat power(const Quat& q, const T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat crossProduct(const Quat &p, const Quat &q); + +template +Quat sqrt(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + +template +Quat operator*(const T, const Quat&); + +template +Quat operator*(const Quat&, const T); + +template +std::ostream& operator<<(std::ostream&, const Quat&); + +using Quatd = Quat; +using Quatf = Quat; + +//! @} core +} + +#include "opencv2/core/quaternion.inl.hpp" + +#endif /* OPENCV_CORE_QUATERNION_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/quaternion.inl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/quaternion.inl.hpp new file mode 100644 index 0000000..29a16d9 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/quaternion.inl.hpp @@ -0,0 +1,1063 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang + +#ifndef OPENCV_CORE_QUATERNION_INL_HPP +#define OPENCV_CORE_QUATERNION_INL_HPP + +#ifndef OPENCV_CORE_QUATERNION_HPP +#erorr This is not a standalone header. Include quaternion.hpp instead. +#endif + +//@cond IGNORE +/////////////////////////////////////////////////////////////////////////////////////// +//Implementation +namespace cv { + +template +Quat::Quat() : w(0), x(0), y(0), z(0) {} + +template +Quat::Quat(const Vec &coeff):w(coeff[0]), x(coeff[1]), y(coeff[2]), z(coeff[3]){} + +template +Quat::Quat(const T qw, const T qx, const T qy, const T qz):w(qw), x(qx), y(qy), z(qz){} + +template +Quat Quat::createFromAngleAxis(const T angle, const Vec &axis) +{ + T w, x, y, z; + T vNorm = std::sqrt(axis.dot(axis)); + if (vNorm < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "this quaternion does not represent a rotation"); + } + const T angle_half = angle * T(0.5); + w = std::cos(angle_half); + const T sin_v = std::sin(angle_half); + const T sin_norm = sin_v / vNorm; + x = sin_norm * axis[0]; + y = sin_norm * axis[1]; + z = sin_norm * axis[2]; + return Quat(w, x, y, z); +} + +template +Quat Quat::createFromRotMat(InputArray _R) +{ + CV_CheckTypeEQ(_R.type(), cv::traits::Type::value, ""); + if (_R.rows() != 3 || _R.cols() != 3) + { + CV_Error(Error::StsBadArg, "Cannot convert matrix to quaternion: rotation matrix should be a 3x3 matrix"); + } + Matx R; + _R.copyTo(R); + + T S, w, x, y, z; + T trace = R(0, 0) + R(1, 1) + R(2, 2); + if (trace > 0) + { + S = std::sqrt(trace + 1) * T(2); + x = (R(1, 2) - R(2, 1)) / S; + y = (R(2, 0) - R(0, 2)) / S; + z = (R(0, 1) - R(1, 0)) / S; + w = -T(0.25) * S; + } + else if (R(0, 0) > R(1, 1) && R(0, 0) > R(2, 2)) + { + + S = std::sqrt(T(1.0) + R(0, 0) - R(1, 1) - R(2, 2)) * T(2); + x = -T(0.25) * S; + y = -(R(1, 0) + R(0, 1)) / S; + z = -(R(0, 2) + R(2, 0)) / S; + w = (R(1, 2) - R(2, 1)) / S; + } + else if (R(1, 1) > R(2, 2)) + { + S = std::sqrt(T(1.0) - R(0, 0) + R(1, 1) - R(2, 2)) * T(2); + x = (R(0, 1) + R(1, 0)) / S; + y = T(0.25) * S; + z = (R(1, 2) + R(2, 1)) / S; + w = (R(0, 2) - R(2, 0)) / S; + } + else + { + S = std::sqrt(T(1.0) - R(0, 0) - R(1, 1) + R(2, 2)) * T(2); + x = (R(0, 2) + R(2, 0)) / S; + y = (R(1, 2) + R(2, 1)) / S; + z = T(0.25) * S; + w = -(R(0, 1) - R(1, 0)) / S; + } + return Quat (w, x, y, z); +} + +template +Quat Quat::createFromRvec(InputArray _rvec) +{ + if (!((_rvec.cols() == 1 && _rvec.rows() == 3) || (_rvec.cols() == 3 && _rvec.rows() == 1))) { + CV_Error(Error::StsBadArg, "Cannot convert rotation vector to quaternion: The length of rotation vector should be 3"); + } + Vec rvec; + _rvec.copyTo(rvec); + T psi = std::sqrt(rvec.dot(rvec)); + if (abs(psi) < CV_QUAT_EPS) { + return Quat (1, 0, 0, 0); + } + Vec axis = rvec / psi; + return createFromAngleAxis(psi, axis); +} + +template +inline Quat Quat::operator-() const +{ + return Quat(-w, -x, -y, -z); +} + + +template +inline bool Quat::operator==(const Quat &q) const +{ + return (abs(w - q.w) < CV_QUAT_EPS && abs(x - q.x) < CV_QUAT_EPS && abs(y - q.y) < CV_QUAT_EPS && abs(z - q.z) < CV_QUAT_EPS); +} + +template +inline Quat Quat::operator+(const Quat &q1) const +{ + return Quat(w + q1.w, x + q1.x, y + q1.y, z + q1.z); +} + +template +inline Quat operator+(const T a, const Quat& q) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator+(const Quat& q, const T a) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator-(const T a, const Quat& q) +{ + return Quat(a - q.w, -q.x, -q.y, -q.z); +} + +template +inline Quat operator-(const Quat& q, const T a) +{ + return Quat(q.w - a, q.x, q.y, q.z); +} + +template +inline Quat Quat::operator-(const Quat &q1) const +{ + return Quat(w - q1.w, x - q1.x, y - q1.y, z - q1.z); +} + +template +inline Quat& Quat::operator+=(const Quat &q1) +{ + w += q1.w; + x += q1.x; + y += q1.y; + z += q1.z; + return *this; +} + +template +inline Quat& Quat::operator-=(const Quat &q1) +{ + w -= q1.w; + x -= q1.x; + y -= q1.y; + z -= q1.z; + return *this; +} + +template +inline Quat Quat::operator*(const Quat &q1) const +{ + Vec q{w, x, y, z}; + Vec q2{q1.w, q1.x, q1.y, q1.z}; + return Quat(q * q2); +} + + +template +Quat operator*(const Quat &q1, const T a) +{ + return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); +} + +template +Quat operator*(const T a, const Quat &q1) +{ + return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); +} + +template +inline Quat& Quat::operator*=(const Quat &q1) +{ + T qw, qx, qy, qz; + qw = w * q1.w - x * q1.x - y * q1.y - z * q1.z; + qx = x * q1.w + w * q1.x + y * q1.z - z * q1.y; + qy = y * q1.w + w * q1.y + z * q1.x - x * q1.z; + qz = z * q1.w + w * q1.z + x * q1.y - y * q1.x; + w = qw; + x = qx; + y = qy; + z = qz; + return *this; +} + +template +inline Quat& Quat::operator/=(const Quat &q1) +{ + Quat q(*this * q1.inv()); + w = q.w; + x = q.x; + y = q.y; + z = q.z; + return *this; +} +template +Quat& Quat::operator*=(const T q1) +{ + w *= q1; + x *= q1; + y *= q1; + z *= q1; + return *this; +} + +template +inline Quat& Quat::operator/=(const T a) +{ + const T a_inv = 1.0 / a; + w *= a_inv; + x *= a_inv; + y *= a_inv; + z *= a_inv; + return *this; +} + +template +inline Quat Quat::operator/(const T a) const +{ + const T a_inv = T(1.0) / a; + return Quat(w * a_inv, x * a_inv, y * a_inv, z * a_inv); +} + +template +inline Quat Quat::operator/(const Quat &q) const +{ + return *this * q.inv(); +} + +template +inline const T& Quat::operator[](std::size_t n) const +{ + switch (n) { + case 0: + return w; + case 1: + return x; + case 2: + return y; + case 3: + return z; + default: + CV_Error(Error::StsOutOfRange, "subscript exceeds the index range"); + } +} + +template +inline T& Quat::operator[](std::size_t n) +{ + switch (n) { + case 0: + return w; + case 1: + return x; + case 2: + return y; + case 3: + return z; + default: + CV_Error(Error::StsOutOfRange, "subscript exceeds the index range"); + } +} + +template +std::ostream & operator<<(std::ostream &os, const Quat &q) +{ + os << "Quat " << Vec{q.w, q.x, q.y, q.z}; + return os; +} + +template +inline T Quat::at(size_t index) const +{ + return (*this)[index]; +} + +template +inline Quat Quat::conjugate() const +{ + return Quat(w, -x, -y, -z); +} + +template +inline T Quat::norm() const +{ + return std::sqrt(dot(*this)); +} + +template +Quat exp(const Quat &q) +{ + return q.exp(); +} + +template +Quat Quat::exp() const +{ + Vec v{x, y, z}; + T normV = std::sqrt(v.dot(v)); + T k = normV < CV_QUAT_EPS ? 1 : std::sin(normV) / normV; + return std::exp(w) * Quat(std::cos(normV), v[0] * k, v[1] * k, v[2] * k); +} + +template +Quat log(const Quat &q, QuatAssumeType assumeUnit) +{ + return q.log(assumeUnit); +} + +template +Quat Quat::log(QuatAssumeType assumeUnit) const +{ + Vec v{x, y, z}; + T vNorm = std::sqrt(v.dot(v)); + if (assumeUnit) + { + T k = vNorm < CV_QUAT_EPS ? 1 : std::acos(w) / vNorm; + return Quat(0, v[0] * k, v[1] * k, v[2] * k); + } + T qNorm = norm(); + if (qNorm < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "Cannot apply this quaternion to log function: undefined"); + } + T k = vNorm < CV_QUAT_EPS ? 1 : std::acos(w / qNorm) / vNorm; + return Quat(std::log(qNorm), v[0] * k, v[1] * k, v[2] *k); +} + +template +inline Quat power(const Quat &q1, const T alpha, QuatAssumeType assumeUnit) +{ + return q1.power(alpha, assumeUnit); +} + +template +inline Quat Quat::power(const T alpha, QuatAssumeType assumeUnit) const +{ + if (x * x + y * y + z * z > CV_QUAT_EPS) + { + T angle = getAngle(assumeUnit); + Vec axis = getAxis(assumeUnit); + if (assumeUnit) + { + return createFromAngleAxis(alpha * angle, axis); + } + return std::pow(norm(), alpha) * createFromAngleAxis(alpha * angle, axis); + } + else + { + return std::pow(norm(), alpha) * Quat(w, x, y, z); + } +} + + +template +inline Quat sqrt(const Quat &q, QuatAssumeType assumeUnit) +{ + return q.sqrt(assumeUnit); +} + +template +inline Quat Quat::sqrt(QuatAssumeType assumeUnit) const +{ + return power(0.5, assumeUnit); +} + + +template +inline Quat power(const Quat &p, const Quat &q, QuatAssumeType assumeUnit) +{ + return p.power(q, assumeUnit); +} + + +template +inline Quat Quat::power(const Quat &q, QuatAssumeType assumeUnit) const +{ + return cv::exp(q * log(assumeUnit)); +} + +template +inline T Quat::dot(Quat q1) const +{ + return w * q1.w + x * q1.x + y * q1.y + z * q1.z; +} + + +template +inline Quat crossProduct(const Quat &p, const Quat &q) +{ + return p.crossProduct(q); +} + + +template +inline Quat Quat::crossProduct(const Quat &q) const +{ + return Quat (0, y * q.z - z * q.y, z * q.x - x * q.z, x * q.y - q.x * y); +} + +template +inline Quat Quat::normalize() const +{ + T normVal = norm(); + if (normVal < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "Cannot normalize this quaternion: the norm is too small."); + } + return Quat(w / normVal, x / normVal, y / normVal, z / normVal) ; +} + +template +inline Quat inv(const Quat &q, QuatAssumeType assumeUnit) +{ + return q.inv(assumeUnit); +} + + +template +inline Quat Quat::inv(QuatAssumeType assumeUnit) const +{ + if (assumeUnit) + { + return conjugate(); + } + T norm2 = dot(*this); + if (norm2 < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "This quaternion do not have inverse quaternion"); + } + return conjugate() / norm2; +} + +template +inline Quat sinh(const Quat &q) +{ + return q.sinh(); +} + + +template +inline Quat Quat::sinh() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::cosh(w) * std::sin(vNorm) / vNorm; + return Quat(std::sinh(w) * std::cos(vNorm), v[0] * k, v[1] * k, v[2] * k); +} + + +template +inline Quat cosh(const Quat &q) +{ + return q.cosh(); +} + + +template +inline Quat Quat::cosh() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::sinh(w) * std::sin(vNorm) / vNorm; + return Quat(std::cosh(w) * std::cos(vNorm), v[0] * k, v[1] * k, v[2] * k); +} + +template +inline Quat tanh(const Quat &q) +{ + return q.tanh(); +} + +template +inline Quat Quat::tanh() const +{ + return sinh() * cosh().inv(); +} + + +template +inline Quat sin(const Quat &q) +{ + return q.sin(); +} + + +template +inline Quat Quat::sin() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::cos(w) * std::sinh(vNorm) / vNorm; + return Quat(std::sin(w) * std::cosh(vNorm), v[0] * k, v[1] * k, v[2] * k); +} + +template +inline Quat cos(const Quat &q) +{ + return q.cos(); +} + +template +inline Quat Quat::cos() const +{ + Vec v{x, y ,z}; + T vNorm = std::sqrt(v.dot(v)); + T k = vNorm < CV_QUAT_EPS ? 1 : std::sin(w) * std::sinh(vNorm) / vNorm; + return Quat(std::cos(w) * std::cosh(vNorm), -v[0] * k, -v[1] * k, -v[2] * k); +} + +template +inline Quat tan(const Quat &q) +{ + return q.tan(); +} + +template +inline Quat Quat::tan() const +{ + return sin() * cos().inv(); +} + +template +inline Quat asinh(const Quat &q) +{ + return q.asinh(); +} + +template +inline Quat Quat::asinh() const +{ + return cv::log(*this + cv::power(*this * *this + Quat(1, 0, 0, 0), 0.5)); +} + +template +inline Quat acosh(const Quat &q) +{ + return q.acosh(); +} + +template +inline Quat Quat::acosh() const +{ + return cv::log(*this + cv::power(*this * *this - Quat(1,0,0,0), 0.5)); +} + +template +inline Quat atanh(const Quat &q) +{ + return q.atanh(); +} + +template +inline Quat Quat::atanh() const +{ + Quat ident(1, 0, 0, 0); + Quat c1 = (ident + *this).log(); + Quat c2 = (ident - *this).log(); + return 0.5 * (c1 - c2); +} + +template +inline Quat asin(const Quat &q) +{ + return q.asin(); +} + +template +inline Quat Quat::asin() const +{ + Quat v(0, x, y, z); + T vNorm = v.norm(); + T k = vNorm < CV_QUAT_EPS ? 1 : vNorm; + return -v / k * (*this * v / k).asinh(); +} + +template +inline Quat acos(const Quat &q) +{ + return q.acos(); +} + +template +inline Quat Quat::acos() const +{ + Quat v(0, x, y, z); + T vNorm = v.norm(); + T k = vNorm < CV_QUAT_EPS ? 1 : vNorm; + return -v / k * acosh(); +} + +template +inline Quat atan(const Quat &q) +{ + return q.atan(); +} + +template +inline Quat Quat::atan() const +{ + Quat v(0, x, y, z); + T vNorm = v.norm(); + T k = vNorm < CV_QUAT_EPS ? 1 : vNorm; + return -v / k * (*this * v / k).atanh(); +} + +template +inline T Quat::getAngle(QuatAssumeType assumeUnit) const +{ + if (assumeUnit) + { + return 2 * std::acos(w); + } + if (norm() < CV_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "This quaternion does not represent a rotation"); + } + return 2 * std::acos(w / norm()); +} + +template +inline Vec Quat::getAxis(QuatAssumeType assumeUnit) const +{ + T angle = getAngle(assumeUnit); + const T sin_v = std::sin(angle * 0.5); + if (assumeUnit) + { + return Vec{x, y, z} / sin_v; + } + return Vec {x, y, z} / (norm() * sin_v); +} + +template +Matx Quat::toRotMat4x4(QuatAssumeType assumeUnit) const +{ + T a = w, b = x, c = y, d = z; + if (!assumeUnit) + { + Quat qTemp = normalize(); + a = qTemp.w; + b = qTemp.x; + c = qTemp.y; + d = qTemp.z; + } + Matx R{ + 1 - 2 * (c * c + d * d), 2 * (b * c - a * d) , 2 * (b * d + a * c) , 0, + 2 * (b * c + a * d) , 1 - 2 * (b * b + d * d), 2 * (c * d - a * b) , 0, + 2 * (b * d - a * c) , 2 * (c * d + a * b) , 1 - 2 * (b * b + c * c), 0, + 0 , 0 , 0 , 1, + }; + return R; +} + +template +Matx Quat::toRotMat3x3(QuatAssumeType assumeUnit) const +{ + T a = w, b = x, c = y, d = z; + if (!assumeUnit) + { + Quat qTemp = normalize(); + a = qTemp.w; + b = qTemp.x; + c = qTemp.y; + d = qTemp.z; + } + Matx R{ + 1 - 2 * (c * c + d * d), 2 * (b * c - a * d) , 2 * (b * d + a * c), + 2 * (b * c + a * d) , 1 - 2 * (b * b + d * d), 2 * (c * d - a * b), + 2 * (b * d - a * c) , 2 * (c * d + a * b) , 1 - 2 * (b * b + c * c) + }; + return R; +} + +template +Vec Quat::toRotVec(QuatAssumeType assumeUnit) const +{ + T angle = getAngle(assumeUnit); + Vec axis = getAxis(assumeUnit); + return angle * axis; +} + +template +Vec Quat::toVec() const +{ + return Vec{w, x, y, z}; +} + +template +Quat Quat::lerp(const Quat &q0, const Quat &q1, const T t) +{ + return (1 - t) * q0 + t * q1; +} + +template +Quat Quat::slerp(const Quat &q0, const Quat &q1, const T t, QuatAssumeType assumeUnit, bool directChange) +{ + Quatd v0(q0); + Quatd v1(q1); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + } + T cosTheta = v0.dot(v1); + constexpr T DOT_THRESHOLD = 0.995; + if (cosTheta > DOT_THRESHOLD) + { + return nlerp(v0, v1, t, QUAT_ASSUME_UNIT); + } + + if (directChange && cosTheta < 0) + { + v0 = -v0; + cosTheta = -cosTheta; + } + T sinTheta = std::sqrt(1 - cosTheta * cosTheta); + T angle = atan2(sinTheta, cosTheta); + return (std::sin((1 - t) * angle) / (sinTheta) * v0 + std::sin(t * angle) / (sinTheta) * v1).normalize(); +} + + +template +inline Quat Quat::nlerp(const Quat &q0, const Quat &q1, const T t, QuatAssumeType assumeUnit) +{ + Quat v0(q0), v1(q1); + if (v1.dot(v0) < 0) + { + v0 = -v0; + } + if (assumeUnit) + { + return ((1 - t) * v0 + t * v1).normalize(); + } + v0 = v0.normalize(); + v1 = v1.normalize(); + return ((1 - t) * v0 + t * v1).normalize(); +} + + +template +inline bool Quat::isNormal(T eps) const +{ + + double normVar = norm(); + if ((normVar > 1 - eps) && (normVar < 1 + eps)) + return true; + return false; +} + +template +inline void Quat::assertNormal(T eps) const +{ + if (!isNormal(eps)) + CV_Error(Error::StsBadArg, "Quaternion should be normalized"); +} + + +template +inline Quat Quat::squad(const Quat &q0, const Quat &q1, + const Quat &q2, const Quat &q3, + const T t, QuatAssumeType assumeUnit, + bool directChange) +{ + Quat v0(q0), v1(q1), v2(q2), v3(q3); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + v2 = v2.normalize(); + v3 = v3.normalize(); + } + + Quat c0 = slerp(v0, v3, t, assumeUnit, directChange); + Quat c1 = slerp(v1, v2, t, assumeUnit, directChange); + return slerp(c0, c1, 2 * t * (1 - t), assumeUnit, directChange); +} + +template +Quat Quat::interPoint(const Quat &q0, const Quat &q1, + const Quat &q2, QuatAssumeType assumeUnit) +{ + Quat v0(q0), v1(q1), v2(q2); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + v2 = v2.normalize(); + } + return v1 * cv::exp(-(cv::log(v1.conjugate() * v0, assumeUnit) + (cv::log(v1.conjugate() * v2, assumeUnit))) / 4); +} + +template +Quat Quat::spline(const Quat &q0, const Quat &q1, const Quat &q2, const Quat &q3, const T t, QuatAssumeType assumeUnit) +{ + Quatd v0(q0), v1(q1), v2(q2), v3(q3); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + v2 = v2.normalize(); + v3 = v3.normalize(); + } + T cosTheta; + std::vector> vec{v0, v1, v2, v3}; + for (size_t i = 0; i < 3; ++i) + { + cosTheta = vec[i].dot(vec[i + 1]); + if (cosTheta < 0) + { + vec[i + 1] = -vec[i + 1]; + } + } + Quat s1 = interPoint(vec[0], vec[1], vec[2], QUAT_ASSUME_UNIT); + Quat s2 = interPoint(vec[1], vec[2], vec[3], QUAT_ASSUME_UNIT); + return squad(vec[1], s1, s2, vec[2], t, assumeUnit, QUAT_ASSUME_NOT_UNIT); +} + +namespace detail { + +template static +Quat createFromAxisRot(int axis, const T theta) +{ + if (axis == 0) + return Quat::createFromXRot(theta); + if (axis == 1) + return Quat::createFromYRot(theta); + if (axis == 2) + return Quat::createFromZRot(theta); + CV_Assert(0); +} + +inline bool isIntAngleType(QuatEnum::EulerAnglesType eulerAnglesType) +{ + return eulerAnglesType < QuatEnum::EXT_XYZ; +} + +inline bool isTaitBryan(QuatEnum::EulerAnglesType eulerAnglesType) +{ + return eulerAnglesType/6 == 1 || eulerAnglesType/6 == 3; +} +} // namespace detail + +template +Quat Quat::createFromYRot(const T theta) +{ + return Quat{std::cos(theta * 0.5f), 0, std::sin(theta * 0.5f), 0}; +} + +template +Quat Quat::createFromXRot(const T theta){ + return Quat{std::cos(theta * 0.5f), std::sin(theta * 0.5f), 0, 0}; +} + +template +Quat Quat::createFromZRot(const T theta){ + return Quat{std::cos(theta * 0.5f), 0, 0, std::sin(theta * 0.5f)}; +} + + +template +Quat Quat::createFromEulerAngles(const Vec &angles, QuatEnum::EulerAnglesType eulerAnglesType) { + CV_Assert(eulerAnglesType < QuatEnum::EulerAnglesType::EULER_ANGLES_MAX_VALUE); + static const int rotationAxis[24][3] = { + {0, 1, 2}, ///< Intrinsic rotations with the Euler angles type X-Y-Z + {0, 2, 1}, ///< Intrinsic rotations with the Euler angles type X-Z-Y + {1, 0, 2}, ///< Intrinsic rotations with the Euler angles type Y-X-Z + {1, 2, 0}, ///< Intrinsic rotations with the Euler angles type Y-Z-X + {2, 0, 1}, ///< Intrinsic rotations with the Euler angles type Z-X-Y + {2, 1, 0}, ///< Intrinsic rotations with the Euler angles type Z-Y-X + {0, 1, 0}, ///< Intrinsic rotations with the Euler angles type X-Y-X + {0, 2, 0}, ///< Intrinsic rotations with the Euler angles type X-Z-X + {1, 0, 1}, ///< Intrinsic rotations with the Euler angles type Y-X-Y + {1, 2, 1}, ///< Intrinsic rotations with the Euler angles type Y-Z-Y + {2, 0, 2}, ///< Intrinsic rotations with the Euler angles type Z-X-Z + {2, 1, 2}, ///< Intrinsic rotations with the Euler angles type Z-Y-Z + {0, 1, 2}, ///< Extrinsic rotations with the Euler angles type X-Y-Z + {0, 2, 1}, ///< Extrinsic rotations with the Euler angles type X-Z-Y + {1, 0, 2}, ///< Extrinsic rotations with the Euler angles type Y-X-Z + {1, 2, 0}, ///< Extrinsic rotations with the Euler angles type Y-Z-X + {2, 0, 1}, ///< Extrinsic rotations with the Euler angles type Z-X-Y + {2, 1, 0}, ///< Extrinsic rotations with the Euler angles type Z-Y-X + {0, 1, 0}, ///< Extrinsic rotations with the Euler angles type X-Y-X + {0, 2, 0}, ///< Extrinsic rotations with the Euler angles type X-Z-X + {1, 0, 1}, ///< Extrinsic rotations with the Euler angles type Y-X-Y + {1, 2, 1}, ///< Extrinsic rotations with the Euler angles type Y-Z-Y + {2, 0, 2}, ///< Extrinsic rotations with the Euler angles type Z-X-Z + {2, 1, 2} ///< Extrinsic rotations with the Euler angles type Z-Y-Z + }; + Quat q1 = detail::createFromAxisRot(rotationAxis[eulerAnglesType][0], angles(0)); + Quat q2 = detail::createFromAxisRot(rotationAxis[eulerAnglesType][1], angles(1)); + Quat q3 = detail::createFromAxisRot(rotationAxis[eulerAnglesType][2], angles(2)); + if (detail::isIntAngleType(eulerAnglesType)) + { + return q1 * q2 * q3; + } + else // (!detail::isIntAngleType(eulerAnglesType)) + { + return q3 * q2 * q1; + } +} + +template +Vec Quat::toEulerAngles(QuatEnum::EulerAnglesType eulerAnglesType){ + CV_Assert(eulerAnglesType < QuatEnum::EulerAnglesType::EULER_ANGLES_MAX_VALUE); + Matx33d R = toRotMat3x3(); + enum { + C_ZERO, + C_PI, + C_PI_2, + N_CONSTANTS, + R_0_0 = N_CONSTANTS, R_0_1, R_0_2, + R_1_0, R_1_1, R_1_2, + R_2_0, R_2_1, R_2_2 + }; + static const T constants_[N_CONSTANTS] = { + 0, // C_ZERO + (T)CV_PI, // C_PI + (T)(CV_PI * 0.5) // C_PI_2, -C_PI_2 + }; + static const int rotationR_[24][12] = { + {+R_0_2, +R_1_0, +R_1_1, C_PI_2, +R_2_1, +R_1_1, -C_PI_2, -R_1_2, +R_2_2, +R_0_2, -R_0_1, +R_0_0}, // INT_XYZ + {+R_0_1, -R_1_2, +R_2_2, -C_PI_2, +R_2_0, +R_2_2, C_PI_2, +R_2_1, +R_1_1, -R_0_1, +R_0_2, +R_0_0}, // INT_XZY + {+R_1_2, -R_0_1, +R_0_0, -C_PI_2, +R_0_1, +R_0_0, C_PI_2, +R_0_2, +R_2_2, -R_1_2, +R_1_0, +R_1_1}, // INT_YXZ + {+R_1_0, +R_0_2, +R_2_2, C_PI_2, +R_0_2, +R_0_1, -C_PI_2, -R_2_0, +R_0_0, +R_1_0, -R_1_2, +R_1_1}, // INT_YZX + {+R_2_1, +R_1_0, +R_0_0, C_PI_2, +R_1_0, +R_0_0, -C_PI_2, -R_0_1, +R_1_1, +R_2_1, -R_2_0, +R_2_2}, // INT_ZXY + {+R_2_0, -R_0_1, +R_1_1, -C_PI_2, +R_1_2, +R_1_1, C_PI_2, +R_1_0, +R_0_0, -R_2_0, +R_2_1, +R_2_2}, // INT_ZYX + {+R_0_0, +R_2_1, +R_2_2, C_ZERO, +R_1_2, +R_1_1, C_PI, +R_1_0, -R_2_0, +R_0_0, +R_0_1, +R_0_2}, // INT_XYX + {+R_0_0, +R_2_1, +R_2_2, C_ZERO, -R_2_1, +R_2_2, C_PI, +R_2_0, +R_1_0, +R_0_0, +R_0_2, -R_0_1}, // INT_XZX + {+R_1_1, +R_0_2, +R_0_0, C_ZERO, -R_2_0, +R_0_0, C_PI, +R_0_1, +R_2_1, +R_1_1, +R_1_0, -R_1_2}, // INT_YXY + {+R_1_1, +R_0_2, +R_0_0, C_ZERO, +R_0_2, -R_0_0, C_PI, +R_2_1, -R_0_1, +R_1_1, +R_1_2, +R_1_0}, // INT_YZY + {+R_2_2, +R_1_0, +R_1_1, C_ZERO, +R_1_0, +R_0_0, C_PI, +R_0_2, -R_1_2, +R_2_2, +R_2_0, +R_2_1}, // INT_ZXZ + {+R_2_2, +R_1_0, +R_0_0, C_ZERO, +R_1_0, +R_0_0, C_PI, +R_1_2, +R_0_2, +R_2_2, +R_2_1, -R_2_0}, // INT_ZYZ + + {+R_2_0, -C_PI_2, -R_0_1, +R_1_1, C_PI_2, +R_1_2, +R_1_1, +R_2_1, +R_2_2, -R_2_0, +R_1_0, +R_0_0}, // EXT_XYZ + {+R_1_0, C_PI_2, +R_0_2, +R_2_2, -C_PI_2, +R_0_2, +R_0_1, -R_1_2, +R_1_1, +R_1_0, -R_2_0, +R_0_0}, // EXT_XZY + {+R_2_1, C_PI_2, +R_1_0, +R_0_0, -C_PI_2, +R_1_0, +R_0_0, -R_2_0, +R_2_2, +R_2_1, -R_0_1, +R_1_1}, // EXT_YXZ + {+R_0_2, -C_PI_2, -R_1_2, +R_2_2, C_PI_2, +R_2_0, +R_2_2, +R_0_2, +R_0_0, -R_0_1, +R_2_1, +R_1_1}, // EXT_YZX + {+R_1_2, -C_PI_2, -R_0_1, +R_0_0, C_PI_2, +R_0_1, +R_0_0, +R_1_0, +R_1_1, -R_1_2, +R_0_2, +R_2_2}, // EXT_ZXY + {+R_0_2, C_PI_2, +R_1_0, +R_1_1, -C_PI_2, +R_2_1, +R_1_1, -R_0_1, +R_0_0, +R_0_2, -R_1_2, +R_2_2}, // EXT_ZYX + {+R_0_0, C_ZERO, +R_2_1, +R_2_2, C_PI, +R_1_2, +R_1_1, +R_0_1, +R_0_2, +R_0_0, +R_1_0, -R_2_0}, // EXT_XYX + {+R_0_0, C_ZERO, +R_2_1, +R_2_2, C_PI, +R_2_1, +R_2_2, +R_0_2, -R_0_1, +R_0_0, +R_2_0, +R_1_0}, // EXT_XZX + {+R_1_1, C_ZERO, +R_0_2, +R_0_0, C_PI, -R_2_0, +R_0_0, +R_1_0, -R_1_2, +R_1_1, +R_0_1, +R_2_1}, // EXT_YXY + {+R_1_1, C_ZERO, +R_0_2, +R_0_0, C_PI, +R_0_2, -R_0_0, +R_1_2, +R_1_0, +R_1_1, +R_2_1, -R_0_1}, // EXT_YZY + {+R_2_2, C_ZERO, +R_1_0, +R_1_1, C_PI, +R_1_0, +R_0_0, +R_2_0, +R_2_1, +R_2_2, +R_0_2, -R_1_2}, // EXT_ZXZ + {+R_2_2, C_ZERO, +R_1_0, +R_0_0, C_PI, +R_1_0, +R_0_0, +R_2_1, -R_2_0, +R_2_2, +R_1_2, +R_0_2}, // EXT_ZYZ + }; + T rotationR[12]; + for (int i = 0; i < 12; i++) + { + int id = rotationR_[eulerAnglesType][i]; + unsigned idx = std::abs(id); + T value = 0.0f; + if (idx < N_CONSTANTS) + { + value = constants_[idx]; + } + else + { + unsigned r_idx = idx - N_CONSTANTS; + CV_DbgAssert(r_idx < 9); + value = R.val[r_idx]; + } + bool isNegative = id < 0; + if (isNegative) + value = -value; + rotationR[i] = value; + } + Vec angles; + if (detail::isIntAngleType(eulerAnglesType)) + { + if (abs(rotationR[0] - 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the third angle to 0"); + angles = {std::atan2(rotationR[1], rotationR[2]), rotationR[3], 0}; + return angles; + } + else if(abs(rotationR[0] + 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the third angle to 0"); + angles = {std::atan2(rotationR[4], rotationR[5]), rotationR[6], 0}; + return angles; + } + } + else // (!detail::isIntAngleType(eulerAnglesType)) + { + if (abs(rotationR[0] - 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the first angle to 0"); + angles = {0, rotationR[1], std::atan2(rotationR[2], rotationR[3])}; + return angles; + } + else if (abs(rotationR[0] + 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the first angle to 0"); + angles = {0, rotationR[4], std::atan2(rotationR[5], rotationR[6])}; + return angles; + } + } + + angles(0) = std::atan2(rotationR[7], rotationR[8]); + if (detail::isTaitBryan(eulerAnglesType)) + angles(1) = std::acos(rotationR[9]); + else + angles(1) = std::asin(rotationR[9]); + angles(2) = std::atan2(rotationR[10], rotationR[11]); + return angles; +} + +} // namepsace +//! @endcond + +#endif /*OPENCV_CORE_QUATERNION_INL_HPP*/ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/saturate.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/saturate.hpp new file mode 100644 index 0000000..8127e3d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/saturate.hpp @@ -0,0 +1,179 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2014, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_SATURATE_HPP +#define OPENCV_CORE_SATURATE_HPP + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/fast_math.hpp" + +namespace cv +{ + +//! @addtogroup core_utils +//! @{ + +/////////////// saturate_cast (used in image & signal processing) /////////////////// + +/** @brief Template function for accurate conversion from one primitive type to another. + + The function saturate_cast resembles the standard C++ cast operations, such as static_cast\() + and others. It perform an efficient and accurate conversion from one primitive type to another + (see the introduction chapter). saturate in the name means that when the input value v is out of the + range of the target type, the result is not formed just by taking low bits of the input, but instead + the value is clipped. For example: + @code + uchar a = saturate_cast(-100); // a = 0 (UCHAR_MIN) + short b = saturate_cast(33333.33333); // b = 32767 (SHRT_MAX) + @endcode + Such clipping is done when the target type is unsigned char , signed char , unsigned short or + signed short . For 32-bit integers, no clipping is done. + + When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit), + the floating-point value is first rounded to the nearest integer and then clipped if needed (when + the target type is 8- or 16-bit). + + @param v Function parameter. + @sa add, subtract, multiply, divide, Mat::convertTo + */ +template static inline _Tp saturate_cast(uchar v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(schar v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(ushort v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(short v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(unsigned v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(float v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(double v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(int64 v) { return _Tp(v); } +/** @overload */ +template static inline _Tp saturate_cast(uint64 v) { return _Tp(v); } + +template<> inline uchar saturate_cast(schar v) { return (uchar)std::max((int)v, 0); } +template<> inline uchar saturate_cast(ushort v) { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); } +template<> inline uchar saturate_cast(int v) { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(short v) { return saturate_cast((int)v); } +template<> inline uchar saturate_cast(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); } +template<> inline uchar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline uchar saturate_cast(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } +template<> inline uchar saturate_cast(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); } + +template<> inline schar saturate_cast(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); } +template<> inline schar saturate_cast(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); } +template<> inline schar saturate_cast(int v) { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(short v) { return saturate_cast((int)v); } +template<> inline schar saturate_cast(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); } +template<> inline schar saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline schar saturate_cast(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); } +template<> inline schar saturate_cast(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); } + +template<> inline ushort saturate_cast(schar v) { return (ushort)std::max((int)v, 0); } +template<> inline ushort saturate_cast(short v) { return (ushort)std::max((int)v, 0); } +template<> inline ushort saturate_cast(int v) { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); } +template<> inline ushort saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline ushort saturate_cast(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } +template<> inline ushort saturate_cast(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); } + +template<> inline short saturate_cast(ushort v) { return (short)std::min((int)v, SHRT_MAX); } +template<> inline short saturate_cast(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); } +template<> inline short saturate_cast(float v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(double v) { int iv = cvRound(v); return saturate_cast(iv); } +template<> inline short saturate_cast(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } +template<> inline short saturate_cast(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } + +template<> inline int saturate_cast(unsigned v) { return (int)std::min(v, (unsigned)INT_MAX); } +template<> inline int saturate_cast(int64 v) { return (int)((uint64)(v - INT_MIN) <= (uint64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); } +template<> inline int saturate_cast(uint64 v) { return (int)std::min(v, (uint64)INT_MAX); } +template<> inline int saturate_cast(float v) { return cvRound(v); } +template<> inline int saturate_cast(double v) { return cvRound(v); } + +template<> inline unsigned saturate_cast(schar v) { return (unsigned)std::max(v, (schar)0); } +template<> inline unsigned saturate_cast(short v) { return (unsigned)std::max(v, (short)0); } +template<> inline unsigned saturate_cast(int v) { return (unsigned)std::max(v, (int)0); } +template<> inline unsigned saturate_cast(int64 v) { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); } +template<> inline unsigned saturate_cast(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); } +// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. +template<> inline unsigned saturate_cast(float v) { return static_cast(cvRound(v)); } +template<> inline unsigned saturate_cast(double v) { return static_cast(cvRound(v)); } + +template<> inline uint64 saturate_cast(schar v) { return (uint64)std::max(v, (schar)0); } +template<> inline uint64 saturate_cast(short v) { return (uint64)std::max(v, (short)0); } +template<> inline uint64 saturate_cast(int v) { return (uint64)std::max(v, (int)0); } +template<> inline uint64 saturate_cast(int64 v) { return (uint64)std::max(v, (int64)0); } + +template<> inline int64 saturate_cast(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); } + +/** @overload */ +template static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); } + +// in theory, we could use a LUT for 8u/8s->16f conversion, +// but with hardware support for FP32->FP16 conversion the current approach is preferable +template<> inline float16_t saturate_cast(uchar v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(schar v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(ushort v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(short v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(unsigned v){ return float16_t((float)v); } +template<> inline float16_t saturate_cast(int v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(uint64 v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(int64 v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(float v) { return float16_t(v); } +template<> inline float16_t saturate_cast(double v) { return float16_t((float)v); } + +//! @} + +} // cv + +#endif // OPENCV_CORE_SATURATE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/simd_intrinsics.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/simd_intrinsics.hpp new file mode 100644 index 0000000..2658d92 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/simd_intrinsics.hpp @@ -0,0 +1,87 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_SIMD_INTRINSICS_HPP +#define OPENCV_CORE_SIMD_INTRINSICS_HPP + +/** +Helper header to support SIMD intrinsics (universal intrinsics) in user code. +Intrinsics documentation: https://docs.opencv.org/4.x/df/d91/group__core__hal__intrin.html + + +Checks of target CPU instruction set based on compiler definitions don't work well enough. +More reliable solutions require utilization of configuration systems (like CMake). + +So, probably you need to specify your own configuration. + +You can do that via CMake in this way: + add_definitions(/DOPENCV_SIMD_CONFIG_HEADER=opencv_simd_config_custom.hpp) +or + add_definitions(/DOPENCV_SIMD_CONFIG_INCLUDE_DIR=1) + +Additionally you may need to add include directory to your files: + include_directories("${CMAKE_CURRENT_LIST_DIR}/opencv_config_${MYTARGET}") + +These files can be pre-generated for target configurations of your application +or generated by CMake on the fly (use CMAKE_BINARY_DIR for that). + +Notes: +- H/W capability checks are still responsibility of your application +- runtime dispatching is not covered by this helper header +*/ + +#ifdef __OPENCV_BUILD +#error "Use core/hal/intrin.hpp during OpenCV build" +#endif + +#ifdef OPENCV_HAL_INTRIN_HPP +#error "core/simd_intrinsics.hpp must be included before core/hal/intrin.hpp" +#endif + +#include "opencv2/core/cvdef.h" + +#ifdef OPENCV_SIMD_CONFIG_HEADER +#include CVAUX_STR(OPENCV_SIMD_CONFIG_HEADER) +#elif defined(OPENCV_SIMD_CONFIG_INCLUDE_DIR) +#include "opencv_simd_config.hpp" // corresponding directory should be added via -I compiler parameter +#else // custom config headers + +#if (!defined(CV_AVX_512F) || !CV_AVX_512F) && (defined(__AVX512__) || defined(__AVX512F__)) +# include +# undef CV_AVX_512F +# define CV_AVX_512F 1 +# ifndef OPENCV_SIMD_DONT_ASSUME_SKX // Skylake-X with AVX-512F/CD/BW/DQ/VL +# undef CV_AVX512_SKX +# define CV_AVX512_SKX 1 +# undef CV_AVX_512CD +# define CV_AVX_512CD 1 +# undef CV_AVX_512BW +# define CV_AVX_512BW 1 +# undef CV_AVX_512DQ +# define CV_AVX_512DQ 1 +# undef CV_AVX_512VL +# define CV_AVX_512VL 1 +# endif +#endif // AVX512 + +// GCC/Clang: -mavx2 +// MSVC: /arch:AVX2 +#if defined __AVX2__ +# include +# undef CV_AVX2 +# define CV_AVX2 1 +# if defined __F16C__ +# undef CV_FP16 +# define CV_FP16 1 +# endif +#endif + +#endif + +// SSE / NEON / VSX is handled by cv_cpu_dispatch.h compatibility block +#include "cv_cpu_dispatch.h" + +#include "hal/intrin.hpp" + +#endif // OPENCV_CORE_SIMD_INTRINSICS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/softfloat.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/softfloat.hpp new file mode 100644 index 0000000..485e15c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/softfloat.hpp @@ -0,0 +1,514 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This file is based on files from package issued with the following license: + +/*============================================================================ + +This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3c, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#pragma once +#ifndef softfloat_h +#define softfloat_h 1 + +#include "cvdef.h" + +namespace cv +{ + +/** @addtogroup core_utils_softfloat + + [SoftFloat](http://www.jhauser.us/arithmetic/SoftFloat.html) is a software implementation + of floating-point calculations according to IEEE 754 standard. + All calculations are done in integers, that's why they are machine-independent and bit-exact. + This library can be useful in accuracy-critical parts like look-up tables generation, tests, etc. + OpenCV contains a subset of SoftFloat partially rewritten to C++. + + ### Types + + There are two basic types: @ref softfloat and @ref softdouble. + These types are binary compatible with float and double types respectively + and support conversions to/from them. + Other types from original SoftFloat library like fp16 or fp128 were thrown away + as well as quiet/signaling NaN support, on-the-fly rounding mode switch + and exception flags (though exceptions can be implemented in the future). + + ### Operations + + Both types support the following: + - Construction from signed and unsigned 32-bit and 64 integers, + float/double or raw binary representation + - Conversions between each other, to float or double and to int + using @ref cvRound, @ref cvTrunc, @ref cvFloor, @ref cvCeil or a bunch of + saturate_cast functions + - Add, subtract, multiply, divide, remainder, square root, FMA with absolute precision + - Comparison operations + - Explicit sign, exponent and significand manipulation through get/set methods, + number state indicators (isInf, isNan, isSubnormal) + - Type-specific constants like eps, minimum/maximum value, best pi approximation, etc. + - min(), max(), abs(), exp(), log() and pow() functions + +*/ +//! @{ + +struct softfloat; +struct softdouble; + +struct CV_EXPORTS softfloat +{ +public: + /** @brief Default constructor */ + softfloat() { v = 0; } + /** @brief Copy constructor */ + softfloat( const softfloat& c) { v = c.v; } + /** @brief Assign constructor */ + softfloat& operator=( const softfloat& c ) + { + if(&c != this) v = c.v; + return *this; + } + /** @brief Construct from raw + + Builds new value from raw binary representation + */ + static const softfloat fromRaw( const uint32_t a ) { softfloat x; x.v = a; return x; } + + /** @brief Construct from integer */ + explicit softfloat( const uint32_t ); + explicit softfloat( const uint64_t ); + explicit softfloat( const int32_t ); + explicit softfloat( const int64_t ); + +#ifdef CV_INT32_T_IS_LONG_INT + // for platforms with int32_t = long int + explicit softfloat( const int a ) { *this = softfloat(static_cast(a)); } +#endif + + /** @brief Construct from float */ + explicit softfloat( const float a ) { Cv32suf s; s.f = a; v = s.u; } + + /** @brief Type casts */ + operator softdouble() const; + operator float() const { Cv32suf s; s.u = v; return s.f; } + + /** @brief Basic arithmetics */ + softfloat operator + (const softfloat&) const; + softfloat operator - (const softfloat&) const; + softfloat operator * (const softfloat&) const; + softfloat operator / (const softfloat&) const; + softfloat operator - () const { softfloat x; x.v = v ^ (1U << 31); return x; } + + /** @brief Remainder operator + + A quote from original SoftFloat manual: + + > The IEEE Standard remainder operation computes the value + > a - n * b, where n is the integer closest to a / b. + > If a / b is exactly halfway between two integers, n is the even integer + > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding. + > Depending on the relative magnitudes of the operands, the remainder functions + > can take considerably longer to execute than the other SoftFloat functions. + > This is an inherent characteristic of the remainder operation itself and is not a flaw + > in the SoftFloat implementation. + */ + softfloat operator % (const softfloat&) const; + + softfloat& operator += (const softfloat& a) { *this = *this + a; return *this; } + softfloat& operator -= (const softfloat& a) { *this = *this - a; return *this; } + softfloat& operator *= (const softfloat& a) { *this = *this * a; return *this; } + softfloat& operator /= (const softfloat& a) { *this = *this / a; return *this; } + softfloat& operator %= (const softfloat& a) { *this = *this % a; return *this; } + + /** @brief Comparison operations + + - Any operation with NaN produces false + + The only exception is when x is NaN: x != y for any y. + - Positive and negative zeros are equal + */ + bool operator == ( const softfloat& ) const; + bool operator != ( const softfloat& ) const; + bool operator > ( const softfloat& ) const; + bool operator >= ( const softfloat& ) const; + bool operator < ( const softfloat& ) const; + bool operator <= ( const softfloat& ) const; + + /** @brief NaN state indicator */ + inline bool isNaN() const { return (v & 0x7fffffff) > 0x7f800000; } + /** @brief Inf state indicator */ + inline bool isInf() const { return (v & 0x7fffffff) == 0x7f800000; } + /** @brief Subnormal number indicator */ + inline bool isSubnormal() const { return ((v >> 23) & 0xFF) == 0; } + + /** @brief Get sign bit */ + inline bool getSign() const { return (v >> 31) != 0; } + /** @brief Construct a copy with new sign bit */ + inline softfloat setSign(bool sign) const { softfloat x; x.v = (v & ((1U << 31) - 1)) | ((uint32_t)sign << 31); return x; } + /** @brief Get 0-based exponent */ + inline int getExp() const { return ((v >> 23) & 0xFF) - 127; } + /** @brief Construct a copy with new 0-based exponent */ + inline softfloat setExp(int e) const { softfloat x; x.v = (v & 0x807fffff) | (((e + 127) & 0xFF) << 23 ); return x; } + + /** @brief Get a fraction part + + Returns a number 1 <= x < 2 with the same significand + */ + inline softfloat getFrac() const + { + uint_fast32_t vv = (v & 0x007fffff) | (127 << 23); + return softfloat::fromRaw(vv); + } + /** @brief Construct a copy with provided significand + + Constructs a copy of a number with significand taken from parameter + */ + inline softfloat setFrac(const softfloat& s) const + { + softfloat x; + x.v = (v & 0xff800000) | (s.v & 0x007fffff); + return x; + } + + /** @brief Zero constant */ + static softfloat zero() { return softfloat::fromRaw( 0 ); } + /** @brief Positive infinity constant */ + static softfloat inf() { return softfloat::fromRaw( 0xFF << 23 ); } + /** @brief Default NaN constant */ + static softfloat nan() { return softfloat::fromRaw( 0x7fffffff ); } + /** @brief One constant */ + static softfloat one() { return softfloat::fromRaw( 127 << 23 ); } + /** @brief Smallest normalized value */ + static softfloat min() { return softfloat::fromRaw( 0x01 << 23 ); } + /** @brief Difference between 1 and next representable value */ + static softfloat eps() { return softfloat::fromRaw( (127 - 23) << 23 ); } + /** @brief Biggest finite value */ + static softfloat max() { return softfloat::fromRaw( (0xFF << 23) - 1 ); } + /** @brief Correct pi approximation */ + static softfloat pi() { return softfloat::fromRaw( 0x40490fdb ); } + + uint32_t v; +}; + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +struct CV_EXPORTS softdouble +{ +public: + /** @brief Default constructor */ + softdouble() : v(0) { } + /** @brief Copy constructor */ + softdouble( const softdouble& c) { v = c.v; } + /** @brief Assign constructor */ + softdouble& operator=( const softdouble& c ) + { + if(&c != this) v = c.v; + return *this; + } + /** @brief Construct from raw + + Builds new value from raw binary representation + */ + static softdouble fromRaw( const uint64_t a ) { softdouble x; x.v = a; return x; } + + /** @brief Construct from integer */ + explicit softdouble( const uint32_t ); + explicit softdouble( const uint64_t ); + explicit softdouble( const int32_t ); + explicit softdouble( const int64_t ); + +#ifdef CV_INT32_T_IS_LONG_INT + // for platforms with int32_t = long int + explicit softdouble( const int a ) { *this = softdouble(static_cast(a)); } +#endif + + /** @brief Construct from double */ + explicit softdouble( const double a ) { Cv64suf s; s.f = a; v = s.u; } + + /** @brief Type casts */ + operator softfloat() const; + operator double() const { Cv64suf s; s.u = v; return s.f; } + + /** @brief Basic arithmetics */ + softdouble operator + (const softdouble&) const; + softdouble operator - (const softdouble&) const; + softdouble operator * (const softdouble&) const; + softdouble operator / (const softdouble&) const; + softdouble operator - () const { softdouble x; x.v = v ^ (1ULL << 63); return x; } + + /** @brief Remainder operator + + A quote from original SoftFloat manual: + + > The IEEE Standard remainder operation computes the value + > a - n * b, where n is the integer closest to a / b. + > If a / b is exactly halfway between two integers, n is the even integer + > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding. + > Depending on the relative magnitudes of the operands, the remainder functions + > can take considerably longer to execute than the other SoftFloat functions. + > This is an inherent characteristic of the remainder operation itself and is not a flaw + > in the SoftFloat implementation. + */ + softdouble operator % (const softdouble&) const; + + softdouble& operator += (const softdouble& a) { *this = *this + a; return *this; } + softdouble& operator -= (const softdouble& a) { *this = *this - a; return *this; } + softdouble& operator *= (const softdouble& a) { *this = *this * a; return *this; } + softdouble& operator /= (const softdouble& a) { *this = *this / a; return *this; } + softdouble& operator %= (const softdouble& a) { *this = *this % a; return *this; } + + /** @brief Comparison operations + + - Any operation with NaN produces false + + The only exception is when x is NaN: x != y for any y. + - Positive and negative zeros are equal + */ + bool operator == ( const softdouble& ) const; + bool operator != ( const softdouble& ) const; + bool operator > ( const softdouble& ) const; + bool operator >= ( const softdouble& ) const; + bool operator < ( const softdouble& ) const; + bool operator <= ( const softdouble& ) const; + + /** @brief NaN state indicator */ + inline bool isNaN() const { return (v & 0x7fffffffffffffff) > 0x7ff0000000000000; } + /** @brief Inf state indicator */ + inline bool isInf() const { return (v & 0x7fffffffffffffff) == 0x7ff0000000000000; } + /** @brief Subnormal number indicator */ + inline bool isSubnormal() const { return ((v >> 52) & 0x7FF) == 0; } + + /** @brief Get sign bit */ + inline bool getSign() const { return (v >> 63) != 0; } + /** @brief Construct a copy with new sign bit */ + softdouble setSign(bool sign) const { softdouble x; x.v = (v & ((1ULL << 63) - 1)) | ((uint_fast64_t)(sign) << 63); return x; } + /** @brief Get 0-based exponent */ + inline int getExp() const { return ((v >> 52) & 0x7FF) - 1023; } + /** @brief Construct a copy with new 0-based exponent */ + inline softdouble setExp(int e) const + { + softdouble x; + x.v = (v & 0x800FFFFFFFFFFFFF) | ((uint_fast64_t)((e + 1023) & 0x7FF) << 52); + return x; + } + + /** @brief Get a fraction part + + Returns a number 1 <= x < 2 with the same significand + */ + inline softdouble getFrac() const + { + uint_fast64_t vv = (v & 0x000FFFFFFFFFFFFF) | ((uint_fast64_t)(1023) << 52); + return softdouble::fromRaw(vv); + } + /** @brief Construct a copy with provided significand + + Constructs a copy of a number with significand taken from parameter + */ + inline softdouble setFrac(const softdouble& s) const + { + softdouble x; + x.v = (v & 0xFFF0000000000000) | (s.v & 0x000FFFFFFFFFFFFF); + return x; + } + + /** @brief Zero constant */ + static softdouble zero() { return softdouble::fromRaw( 0 ); } + /** @brief Positive infinity constant */ + static softdouble inf() { return softdouble::fromRaw( (uint_fast64_t)(0x7FF) << 52 ); } + /** @brief Default NaN constant */ + static softdouble nan() { return softdouble::fromRaw( CV_BIG_INT(0x7FFFFFFFFFFFFFFF) ); } + /** @brief One constant */ + static softdouble one() { return softdouble::fromRaw( (uint_fast64_t)( 1023) << 52 ); } + /** @brief Smallest normalized value */ + static softdouble min() { return softdouble::fromRaw( (uint_fast64_t)( 0x01) << 52 ); } + /** @brief Difference between 1 and next representable value */ + static softdouble eps() { return softdouble::fromRaw( (uint_fast64_t)( 1023 - 52 ) << 52 ); } + /** @brief Biggest finite value */ + static softdouble max() { return softdouble::fromRaw( ((uint_fast64_t)(0x7FF) << 52) - 1 ); } + /** @brief Correct pi approximation */ + static softdouble pi() { return softdouble::fromRaw( CV_BIG_INT(0x400921FB54442D18) ); } + + uint64_t v; +}; + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +/** @brief Fused Multiplication and Addition + +Computes (a*b)+c with single rounding +*/ +CV_EXPORTS softfloat mulAdd( const softfloat& a, const softfloat& b, const softfloat & c); +CV_EXPORTS softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c); + +/** @brief Square root */ +CV_EXPORTS softfloat sqrt( const softfloat& a ); +CV_EXPORTS softdouble sqrt( const softdouble& a ); +} + +/*---------------------------------------------------------------------------- +| Ported from OpenCV and added for usability +*----------------------------------------------------------------------------*/ + +/** @brief Truncates number to integer with minimum magnitude */ +CV_EXPORTS int cvTrunc(const cv::softfloat& a); +CV_EXPORTS int cvTrunc(const cv::softdouble& a); + +/** @brief Rounds a number to nearest even integer */ +CV_EXPORTS int cvRound(const cv::softfloat& a); +CV_EXPORTS int cvRound(const cv::softdouble& a); + +/** @brief Rounds a number to nearest even long long integer */ +CV_EXPORTS int64_t cvRound64(const cv::softdouble& a); + +/** @brief Rounds a number down to integer */ +CV_EXPORTS int cvFloor(const cv::softfloat& a); +CV_EXPORTS int cvFloor(const cv::softdouble& a); + +/** @brief Rounds number up to integer */ +CV_EXPORTS int cvCeil(const cv::softfloat& a); +CV_EXPORTS int cvCeil(const cv::softdouble& a); + +namespace cv +{ +/** @brief Saturate casts */ +template static inline _Tp saturate_cast(softfloat a) { return _Tp(a); } +template static inline _Tp saturate_cast(softdouble a) { return _Tp(a); } + +template<> inline uchar saturate_cast(softfloat a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); } +template<> inline uchar saturate_cast(softdouble a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); } + +template<> inline schar saturate_cast(softfloat a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); } +template<> inline schar saturate_cast(softdouble a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); } + +template<> inline ushort saturate_cast(softfloat a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); } +template<> inline ushort saturate_cast(softdouble a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); } + +template<> inline short saturate_cast(softfloat a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); } +template<> inline short saturate_cast(softdouble a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); } + +template<> inline int saturate_cast(softfloat a) { return cvRound(a); } +template<> inline int saturate_cast(softdouble a) { return cvRound(a); } + +template<> inline int64_t saturate_cast(softfloat a) { return cvRound(a); } +template<> inline int64_t saturate_cast(softdouble a) { return cvRound64(a); } + +/** @brief Saturate cast to unsigned integer and unsigned long long integer +We intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. +*/ +template<> inline unsigned saturate_cast(softfloat a) { return cvRound(a); } +template<> inline unsigned saturate_cast(softdouble a) { return cvRound(a); } + +template<> inline uint64_t saturate_cast(softfloat a) { return cvRound(a); } +template<> inline uint64_t saturate_cast(softdouble a) { return cvRound64(a); } + +/** @brief Min and Max functions */ +inline softfloat min(const softfloat& a, const softfloat& b) { return (a > b) ? b : a; } +inline softdouble min(const softdouble& a, const softdouble& b) { return (a > b) ? b : a; } + +inline softfloat max(const softfloat& a, const softfloat& b) { return (a > b) ? a : b; } +inline softdouble max(const softdouble& a, const softdouble& b) { return (a > b) ? a : b; } + +/** @brief Absolute value */ +inline softfloat abs( softfloat a) { softfloat x; x.v = a.v & ((1U << 31) - 1); return x; } +inline softdouble abs( softdouble a) { softdouble x; x.v = a.v & ((1ULL << 63) - 1); return x; } + +/** @brief Exponent + +Special cases: +- exp(NaN) is NaN +- exp(-Inf) == 0 +- exp(+Inf) == +Inf +*/ +CV_EXPORTS softfloat exp( const softfloat& a); +CV_EXPORTS softdouble exp( const softdouble& a); + +/** @brief Natural logarithm + +Special cases: +- log(NaN), log(x < 0) are NaN +- log(0) == -Inf +*/ +CV_EXPORTS softfloat log( const softfloat& a ); +CV_EXPORTS softdouble log( const softdouble& a ); + +/** @brief Raising to the power + +Special cases: +- x**NaN is NaN for any x +- ( |x| == 1 )**Inf is NaN +- ( |x| > 1 )**+Inf or ( |x| < 1 )**-Inf is +Inf +- ( |x| > 1 )**-Inf or ( |x| < 1 )**+Inf is 0 +- x ** 0 == 1 for any x +- x ** 1 == 1 for any x +- NaN ** y is NaN for any other y +- Inf**(y < 0) == 0 +- Inf ** y is +Inf for any other y +- (x < 0)**y is NaN for any other y if x can't be correctly rounded to integer +- 0 ** 0 == 1 +- 0 ** (y < 0) is +Inf +- 0 ** (y > 0) is 0 +*/ +CV_EXPORTS softfloat pow( const softfloat& a, const softfloat& b); +CV_EXPORTS softdouble pow( const softdouble& a, const softdouble& b); + +/** @brief Cube root + +Special cases: +- cbrt(NaN) is NaN +- cbrt(+/-Inf) is +/-Inf +*/ +CV_EXPORTS softfloat cbrt( const softfloat& a ); + +/** @brief Sine + +Special cases: +- sin(Inf) or sin(NaN) is NaN +- sin(x) == x when sin(x) is close to zero +*/ +CV_EXPORTS softdouble sin( const softdouble& a ); + +/** @brief Cosine + * +Special cases: +- cos(Inf) or cos(NaN) is NaN +- cos(x) == +/- 1 when cos(x) is close to +/- 1 +*/ +CV_EXPORTS softdouble cos( const softdouble& a ); + +//! @} core_utils_softfloat + +} // cv:: + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/sse_utils.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/sse_utils.hpp new file mode 100644 index 0000000..0906583 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/sse_utils.hpp @@ -0,0 +1,652 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_SSE_UTILS_HPP +#define OPENCV_CORE_SSE_UTILS_HPP + +#ifndef __cplusplus +# error sse_utils.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cvdef.h" + +//! @addtogroup core_utils_sse +//! @{ + +#if CV_SSE2 + +inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g0); + __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g0); + __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_g1); + __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_g1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk3); + __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk3); + + __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk3); + __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk3); + + __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk2); + __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk2); + __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk3); + __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk3); + + v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk2); + v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk2); + v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk3); + v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk3); +} + +inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g1); + __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g1); + __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b0); + __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b0); + __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_b1); + __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_b1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk5); + __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk5); + + __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk5); + __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk5); + + __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk3); + __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk3); + __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk4); + __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk4); + __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk5); + __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk5); + + v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk3); + v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk3); + v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk4); + v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk4); + v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk5); + v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk5); +} + +inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0); + __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0); + __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1); + __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b1); + __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_a0); + __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_a0); + __m128i layer1_chunk6 = _mm_unpacklo_epi8(v_g1, v_a1); + __m128i layer1_chunk7 = _mm_unpackhi_epi8(v_g1, v_a1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk6 = _mm_unpacklo_epi8(layer1_chunk3, layer1_chunk7); + __m128i layer2_chunk7 = _mm_unpackhi_epi8(layer1_chunk3, layer1_chunk7); + + __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk6 = _mm_unpacklo_epi8(layer2_chunk3, layer2_chunk7); + __m128i layer3_chunk7 = _mm_unpackhi_epi8(layer2_chunk3, layer2_chunk7); + + __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk4); + __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk4); + __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk5); + __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk5); + __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk6); + __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk6); + __m128i layer4_chunk6 = _mm_unpacklo_epi8(layer3_chunk3, layer3_chunk7); + __m128i layer4_chunk7 = _mm_unpackhi_epi8(layer3_chunk3, layer3_chunk7); + + v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk4); + v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk4); + v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk5); + v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk5); + v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk6); + v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk6); + v_a0 = _mm_unpacklo_epi8(layer4_chunk3, layer4_chunk7); + v_a1 = _mm_unpackhi_epi8(layer4_chunk3, layer4_chunk7); +} + +inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i v_mask = _mm_set1_epi16(0x00ff); + + __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); + __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); + + __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); + __m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); + __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); + + __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); + __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); + + __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); + __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); + + v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); + v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); +} + +inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i v_mask = _mm_set1_epi16(0x00ff); + + __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); + __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); + __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8)); + + __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); + __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); + __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8)); + + __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); + __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); + __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8)); + + __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); + __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); + __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8)); + + v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); + v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); + v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8)); +} + +inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i v_mask = _mm_set1_epi16(0x00ff); + + __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8)); + __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8)); + __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer4_chunk6 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8)); + __m128i layer4_chunk3 = _mm_packus_epi16(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask)); + __m128i layer4_chunk7 = _mm_packus_epi16(_mm_srli_epi16(v_a0, 8), _mm_srli_epi16(v_a1, 8)); + + __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8)); + __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8)); + __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask)); + __m128i layer3_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8)); + __m128i layer3_chunk3 = _mm_packus_epi16(_mm_and_si128(layer4_chunk6, v_mask), _mm_and_si128(layer4_chunk7, v_mask)); + __m128i layer3_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk6, 8), _mm_srli_epi16(layer4_chunk7, 8)); + + __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8)); + __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8)); + __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8)); + __m128i layer2_chunk3 = _mm_packus_epi16(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask)); + __m128i layer2_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk6, 8), _mm_srli_epi16(layer3_chunk7, 8)); + + __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8)); + __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8)); + __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8)); + __m128i layer1_chunk3 = _mm_packus_epi16(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask)); + __m128i layer1_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk6, 8), _mm_srli_epi16(layer2_chunk7, 8)); + + v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8)); + v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8)); + v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_a0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8)); + v_g1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask)); + v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8)); +} + +inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0); + __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0); + __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1); + __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_g1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk2); + __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk3); + __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk3); + + __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk2); + __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk3); + __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk3); + + v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk2); + v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk2); + v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk3); + v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk3); +} + +inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1); + __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1); + __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0); + __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b0); + __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_b1); + __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_b1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk3); + __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk4); + __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk5); + __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk5); + + __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk3); + __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk4); + __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk5); + __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk5); + + v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk3); + v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk3); + v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk4); + v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk4); + v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk5); + v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk5); +} + +inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0); + __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0); + __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b1); + __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b1); + __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_a0); + __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0); + __m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1); + __m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1); + + __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4); + __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk5); + __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk6); + __m128i layer2_chunk6 = _mm_unpacklo_epi16(layer1_chunk3, layer1_chunk7); + __m128i layer2_chunk7 = _mm_unpackhi_epi16(layer1_chunk3, layer1_chunk7); + + __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk4); + __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk5); + __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk6); + __m128i layer3_chunk6 = _mm_unpacklo_epi16(layer2_chunk3, layer2_chunk7); + __m128i layer3_chunk7 = _mm_unpackhi_epi16(layer2_chunk3, layer2_chunk7); + + v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk4); + v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk4); + v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk5); + v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk5); + v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk6); + v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk6); + v_a0 = _mm_unpacklo_epi16(layer3_chunk3, layer3_chunk7); + v_a1 = _mm_unpackhi_epi16(layer3_chunk3, layer3_chunk7); +} + +#if CV_SSE4_1 + +inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1) +{ + __m128i v_mask = _mm_set1_epi32(0x0000ffff); + + __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer3_chunk2 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); + __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); + + __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); + __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); + + __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); + __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); + + v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); + v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); +} + +inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, + __m128i & v_g1, __m128i & v_b0, __m128i & v_b1) +{ + __m128i v_mask = _mm_set1_epi32(0x0000ffff); + + __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); + __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); + __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16)); + + __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); + __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); + __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16)); + + __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); + __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); + __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16)); + + v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); + v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); + v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16)); +} + +inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1, + __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1) +{ + __m128i v_mask = _mm_set1_epi32(0x0000ffff); + + __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask)); + __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16)); + __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask)); + __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16)); + __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask)); + __m128i layer3_chunk6 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16)); + __m128i layer3_chunk3 = _mm_packus_epi32(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask)); + __m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16)); + + __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask)); + __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16)); + __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask)); + __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16)); + __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask)); + __m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16)); + __m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask)); + __m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16)); + + __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask)); + __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16)); + __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask)); + __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16)); + __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask)); + __m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16)); + __m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask)); + __m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16)); + + v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask)); + v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16)); + v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask)); + v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16)); + v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask)); + v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16)); + v_g1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask)); + v_a1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk6, 16), _mm_srli_epi32(layer1_chunk7, 16)); +} + +#endif // CV_SSE4_1 + +inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) +{ + __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0); + __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0); + __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1); + __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1); + + __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2); + __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2); + __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3); + __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk3); + + v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk2); + v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk2); + v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk3); + v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk3); +} + +inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, + __m128 & v_g1, __m128 & v_b0, __m128 & v_b1) +{ + __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1); + __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1); + __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0); + __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0); + __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1); + __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1); + + __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3); + __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3); + __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4); + __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk4); + __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk5); + __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk5); + + v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk3); + v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk3); + v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk4); + v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk4); + v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk5); + v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk5); +} + +inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, + __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) +{ + __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0); + __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0); + __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1); + __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b1); + __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_a0); + __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_a0); + __m128 layer1_chunk6 = _mm_unpacklo_ps(v_g1, v_a1); + __m128 layer1_chunk7 = _mm_unpackhi_ps(v_g1, v_a1); + + __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk4); + __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk4); + __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk5); + __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk5); + __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk6); + __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk6); + __m128 layer2_chunk6 = _mm_unpacklo_ps(layer1_chunk3, layer1_chunk7); + __m128 layer2_chunk7 = _mm_unpackhi_ps(layer1_chunk3, layer1_chunk7); + + v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk4); + v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk4); + v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk5); + v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk5); + v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk6); + v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk6); + v_a0 = _mm_unpacklo_ps(layer2_chunk3, layer2_chunk7); + v_a1 = _mm_unpackhi_ps(layer2_chunk3, layer2_chunk7); +} + +inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) +{ + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; + + __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); + __m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); + __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); + __m128 layer2_chunk3 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); + + __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); + __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); + __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); + __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); + + v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); + v_g0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); + v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); + v_g1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); +} + +inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, + __m128 & v_g1, __m128 & v_b0, __m128 & v_b1) +{ + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; + + __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); + __m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); + __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); + __m128 layer2_chunk4 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); + __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo); + __m128 layer2_chunk5 = _mm_shuffle_ps(v_b0, v_b1, mask_hi); + + __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); + __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); + __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); + __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); + __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo); + __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi); + + v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); + v_g1 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); + v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); + v_b0 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); + v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo); + v_b1 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi); +} + +inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, + __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) +{ + enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) }; + + __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); + __m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); + __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo); + __m128 layer2_chunk5 = _mm_shuffle_ps(v_g0, v_g1, mask_hi); + __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo); + __m128 layer2_chunk6 = _mm_shuffle_ps(v_b0, v_b1, mask_hi); + __m128 layer2_chunk3 = _mm_shuffle_ps(v_a0, v_a1, mask_lo); + __m128 layer2_chunk7 = _mm_shuffle_ps(v_a0, v_a1, mask_hi); + + __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo); + __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi); + __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo); + __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi); + __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo); + __m128 layer1_chunk6 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi); + __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_lo); + __m128 layer1_chunk7 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_hi); + + v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo); + v_b0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi); + v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo); + v_b1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi); + v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo); + v_a0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi); + v_g1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_lo); + v_a1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_hi); +} + +#endif // CV_SSE2 + +//! @} + +#endif //OPENCV_CORE_SSE_UTILS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/traits.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/traits.hpp new file mode 100644 index 0000000..52ab083 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/traits.hpp @@ -0,0 +1,417 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_TRAITS_HPP +#define OPENCV_CORE_TRAITS_HPP + +#include "opencv2/core/cvdef.h" + +namespace cv +{ + +//#define OPENCV_TRAITS_ENABLE_DEPRECATED + +//! @addtogroup core_basic +//! @{ + +/** @brief Template "trait" class for OpenCV primitive data types. + +@note Deprecated. This is replaced by "single purpose" traits: traits::Type and traits::Depth + +A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed +short, int, float, double, or a tuple of values of one of these types, where all the values in the +tuple have the same type. Any primitive type from the list can be defined by an identifier in the +form CV_\{U|S|F}C(\), for example: uchar \~ CV_8UC1, 3-element +floating-point tuple \~ CV_32FC3, and so on. A universal OpenCV structure that is able to store a +single instance of such a primitive data type is Vec. Multiple instances of such a type can be +stored in a std::vector, Mat, Mat_, SparseMat, SparseMat_, or any other container that is able to +store Vec instances. + +The DataType class is basically used to provide a description of such primitive data types without +adding any fields or methods to the corresponding classes (and it is actually impossible to add +anything to primitive C/C++ data types). This technique is known in C++ as class traits. It is not +DataType itself that is used but its specialized versions, such as: +@code + template<> class DataType + { + typedef uchar value_type; + typedef int work_type; + typedef uchar channel_type; + enum { channel_type = CV_8U, channels = 1, fmt='u', type = CV_8U }; + }; + ... + template DataType > + { + typedef std::complex<_Tp> value_type; + typedef std::complex<_Tp> work_type; + typedef _Tp channel_type; + // DataDepth is another helper trait class + enum { depth = DataDepth<_Tp>::value, channels=2, + fmt=(channels-1)*256+DataDepth<_Tp>::fmt, + type=CV_MAKETYPE(depth, channels) }; + }; + ... +@endcode +The main purpose of this class is to convert compilation-time type information to an +OpenCV-compatible data type identifier, for example: +@code + // allocates a 30x40 floating-point matrix + Mat A(30, 40, DataType::type); + + Mat B = Mat_ >(3, 3); + // the statement below will print 6, 2 , that is depth == CV_64F, channels == 2 + cout << B.depth() << ", " << B.channels() << endl; +@endcode +So, such traits are used to tell OpenCV which data type you are working with, even if such a type is +not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV +defines the proper specialized template class DataType\ \> . This mechanism is also +useful (and used in OpenCV this way) for generic algorithms implementations. + +@note Default values were dropped to stop confusing developers about using of unsupported types (see #7599) +*/ +template class DataType +{ +public: +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + typedef _Tp value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 1, + depth = -1, + channels = 1, + fmt = 0, + type = CV_MAKETYPE(depth, channels) + }; +#endif +}; + +template<> class DataType +{ +public: + typedef bool value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8U, + channels = 1, + fmt = (int)'u', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef uchar value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8U, + channels = 1, + fmt = (int)'u', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef schar value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8S, + channels = 1, + fmt = (int)'c', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef schar value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_8S, + channels = 1, + fmt = (int)'c', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef ushort value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16U, + channels = 1, + fmt = (int)'w', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef short value_type; + typedef int work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16S, + channels = 1, + fmt = (int)'s', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef int value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_32S, + channels = 1, + fmt = (int)'i', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef float value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_32F, + channels = 1, + fmt = (int)'f', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef double value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_64F, + channels = 1, + fmt = (int)'d', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef float16_t value_type; + typedef float work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16F, + channels = 1, + fmt = (int)'h', + type = CV_MAKETYPE(depth, channels) + }; +}; + +/** @brief A helper class for cv::DataType + +The class is specialized for each fundamental numerical data type supported by OpenCV. It provides +DataDepth::value constant. +*/ +template class DataDepth +{ +public: + enum + { + value = DataType<_Tp>::depth, + fmt = DataType<_Tp>::fmt + }; +}; + + +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + +template class TypeDepth +{ +#ifdef OPENCV_TRAITS_ENABLE_LEGACY_DEFAULTS + enum { depth = CV_USRTYPE1 }; + typedef void value_type; +#endif +}; + +template<> class TypeDepth +{ + enum { depth = CV_8U }; + typedef uchar value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_8S }; + typedef schar value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_16U }; + typedef ushort value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_16S }; + typedef short value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_32S }; + typedef int value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_32F }; + typedef float value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_64F }; + typedef double value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_16F }; + typedef float16_t value_type; +}; + +#endif + +//! @} + +namespace traits { + +namespace internal { +#define CV_CREATE_MEMBER_CHECK(X) \ +template class CheckMember_##X { \ + struct Fallback { int X; }; \ + struct Derived : T, Fallback { }; \ + template struct Check; \ + typedef char CV_NO[1]; \ + typedef char CV_YES[2]; \ + template static CV_NO & func(Check *); \ + template static CV_YES & func(...); \ +public: \ + typedef CheckMember_##X type; \ + enum { value = sizeof(func(0)) == sizeof(CV_YES) }; \ +}; + +CV_CREATE_MEMBER_CHECK(fmt) +CV_CREATE_MEMBER_CHECK(type) + +} // namespace internal + + +template +struct Depth +{ enum { value = DataType::depth }; }; + +template +struct Type +{ enum { value = DataType::type }; }; + +/** Similar to traits::Type but has value = -1 in case of unknown type (instead of compiler error) */ +template >::value > +struct SafeType {}; + +template +struct SafeType +{ enum { value = -1 }; }; + +template +struct SafeType +{ enum { value = Type::value }; }; + + +template >::value > +struct SafeFmt {}; + +template +struct SafeFmt +{ enum { fmt = 0 }; }; + +template +struct SafeFmt +{ enum { fmt = DataType::fmt }; }; + + +} // namespace + +} // cv + +#endif // OPENCV_CORE_TRAITS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/types.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/types.hpp new file mode 100644 index 0000000..2867520 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/types.hpp @@ -0,0 +1,2439 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_TYPES_HPP +#define OPENCV_CORE_TYPES_HPP + +#ifndef __cplusplus +# error types.hpp header must be compiled as C++ +#endif + +#include +#include +#include +#include + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/matx.hpp" + +namespace cv +{ + +//! @addtogroup core_basic +//! @{ + +//////////////////////////////// Complex ////////////////////////////// + +/** @brief A complex number class. + + The template class is similar and compatible with std::complex, however it provides slightly + more convenient access to the real and imaginary parts using through the simple field access, as opposite + to std::complex::real() and std::complex::imag(). +*/ +template class Complex +{ +public: + + //! default constructor + Complex(); + Complex( _Tp _re, _Tp _im = 0 ); + + //! conversion to another data type + template operator Complex() const; + //! conjugation + Complex conj() const; + + _Tp re, im; //< the real and the imaginary parts +}; + +typedef Complex Complexf; +typedef Complex Complexd; + +template class DataType< Complex<_Tp> > +{ +public: + typedef Complex<_Tp> value_type; + typedef value_type work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Complex<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Complex<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace + + +//////////////////////////////// Point_ //////////////////////////////// + +/** @brief Template class for 2D points specified by its coordinates `x` and `y`. + +An instance of the class is interchangeable with C structures, CvPoint and CvPoint2D32f . There is +also a cast operator to convert point coordinates to the specified type. The conversion from +floating-point coordinates to integer coordinates is done by rounding. Commonly, the conversion +uses this operation for each of the coordinates. Besides the class members listed in the +declaration above, the following operations on points are implemented: +@code + pt1 = pt2 + pt3; + pt1 = pt2 - pt3; + pt1 = pt2 * a; + pt1 = a * pt2; + pt1 = pt2 / a; + pt1 += pt2; + pt1 -= pt2; + pt1 *= a; + pt1 /= a; + double value = norm(pt); // L2 norm + pt1 == pt2; + pt1 != pt2; +@endcode +For your convenience, the following type aliases are defined: +@code + typedef Point_ Point2i; + typedef Point2i Point; + typedef Point_ Point2f; + typedef Point_ Point2d; +@endcode +Example: +@code + Point2f a(0.3f, 0.f), b(0.f, 0.4f); + Point pt = (a + b)*10.f; + cout << pt.x << ", " << pt.y << endl; +@endcode +*/ +template class Point_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Point_(); + Point_(_Tp _x, _Tp _y); +#if (defined(__GNUC__) && __GNUC__ < 5) && !defined(__clang__) // GCC 4.x bug. Details: https://github.com/opencv/opencv/pull/20837 + Point_(const Point_& pt); + Point_(Point_&& pt) CV_NOEXCEPT = default; +#elif OPENCV_ABI_COMPATIBILITY < 500 + Point_(const Point_& pt) = default; + Point_(Point_&& pt) CV_NOEXCEPT = default; +#endif + Point_(const Size_<_Tp>& sz); + Point_(const Vec<_Tp, 2>& v); + +#if (defined(__GNUC__) && __GNUC__ < 5) && !defined(__clang__) // GCC 4.x bug. Details: https://github.com/opencv/opencv/pull/20837 + Point_& operator = (const Point_& pt); + Point_& operator = (Point_&& pt) CV_NOEXCEPT = default; +#elif OPENCV_ABI_COMPATIBILITY < 500 + Point_& operator = (const Point_& pt) = default; + Point_& operator = (Point_&& pt) CV_NOEXCEPT = default; +#endif + //! conversion to another data type + template operator Point_<_Tp2>() const; + + //! conversion to the old-style C structures + operator Vec<_Tp, 2>() const; + + //! dot product + _Tp dot(const Point_& pt) const; + //! dot product computed in double-precision arithmetics + double ddot(const Point_& pt) const; + //! cross-product + double cross(const Point_& pt) const; + //! checks whether the point is inside the specified rectangle + bool inside(const Rect_<_Tp>& r) const; + _Tp x; //!< x coordinate of the point + _Tp y; //!< y coordinate of the point +}; + +typedef Point_ Point2i; +typedef Point_ Point2l; +typedef Point_ Point2f; +typedef Point_ Point2d; +typedef Point2i Point; + +template class DataType< Point_<_Tp> > +{ +public: + typedef Point_<_Tp> value_type; + typedef Point_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Point_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Point_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace + + +//////////////////////////////// Point3_ //////////////////////////////// + +/** @brief Template class for 3D points specified by its coordinates `x`, `y` and `z`. + +An instance of the class is interchangeable with the C structure CvPoint2D32f . Similarly to +Point_ , the coordinates of 3D points can be converted to another type. The vector arithmetic and +comparison operations are also supported. + +The following Point3_\<\> aliases are available: +@code + typedef Point3_ Point3i; + typedef Point3_ Point3f; + typedef Point3_ Point3d; +@endcode +@see cv::Point3i, cv::Point3f and cv::Point3d +*/ +template class Point3_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Point3_(); + Point3_(_Tp _x, _Tp _y, _Tp _z); +#if OPENCV_ABI_COMPATIBILITY < 500 + Point3_(const Point3_& pt) = default; + Point3_(Point3_&& pt) CV_NOEXCEPT = default; +#endif + explicit Point3_(const Point_<_Tp>& pt); + Point3_(const Vec<_Tp, 3>& v); + +#if OPENCV_ABI_COMPATIBILITY < 500 + Point3_& operator = (const Point3_& pt) = default; + Point3_& operator = (Point3_&& pt) CV_NOEXCEPT = default; +#endif + //! conversion to another data type + template operator Point3_<_Tp2>() const; + //! conversion to cv::Vec<> + operator Vec<_Tp, 3>() const; + + //! dot product + _Tp dot(const Point3_& pt) const; + //! dot product computed in double-precision arithmetics + double ddot(const Point3_& pt) const; + //! cross product of the 2 3D points + Point3_ cross(const Point3_& pt) const; + _Tp x; //!< x coordinate of the 3D point + _Tp y; //!< y coordinate of the 3D point + _Tp z; //!< z coordinate of the 3D point +}; + +typedef Point3_ Point3i; +typedef Point3_ Point3f; +typedef Point3_ Point3d; + +template class DataType< Point3_<_Tp> > +{ +public: + typedef Point3_<_Tp> value_type; + typedef Point3_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 3, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Point3_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Point3_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 3) }; }; +} // namespace + +//////////////////////////////// Size_ //////////////////////////////// + +/** @brief Template class for specifying the size of an image or rectangle. + +The class includes two members called width and height. The structure can be converted to and from +the old OpenCV structures CvSize and CvSize2D32f . The same set of arithmetic and comparison +operations as for Point_ is available. + +OpenCV defines the following Size_\<\> aliases: +@code + typedef Size_ Size2i; + typedef Size2i Size; + typedef Size_ Size2f; +@endcode +*/ +template class Size_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Size_(); + Size_(_Tp _width, _Tp _height); +#if OPENCV_ABI_COMPATIBILITY < 500 + Size_(const Size_& sz) = default; + Size_(Size_&& sz) CV_NOEXCEPT = default; +#endif + Size_(const Point_<_Tp>& pt); + +#if OPENCV_ABI_COMPATIBILITY < 500 + Size_& operator = (const Size_& sz) = default; + Size_& operator = (Size_&& sz) CV_NOEXCEPT = default; +#endif + //! the area (width*height) + _Tp area() const; + //! aspect ratio (width/height) + double aspectRatio() const; + //! true if empty + bool empty() const; + + //! conversion of another data type. + template operator Size_<_Tp2>() const; + + _Tp width; //!< the width + _Tp height; //!< the height +}; + +typedef Size_ Size2i; +typedef Size_ Size2l; +typedef Size_ Size2f; +typedef Size_ Size2d; +typedef Size2i Size; + +template class DataType< Size_<_Tp> > +{ +public: + typedef Size_<_Tp> value_type; + typedef Size_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Size_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Size_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; }; +} // namespace + +//////////////////////////////// Rect_ //////////////////////////////// + +/** @brief Template class for 2D rectangles + +described by the following parameters: +- Coordinates of the top-left corner. This is a default interpretation of Rect_::x and Rect_::y + in OpenCV. Though, in your algorithms you may count x and y from the bottom-left corner. +- Rectangle width and height. + +OpenCV typically assumes that the top and left boundary of the rectangle are inclusive, while the +right and bottom boundaries are not. For example, the method Rect_::contains returns true if + +\f[x \leq pt.x < x+width, + y \leq pt.y < y+height\f] + +Virtually every loop over an image ROI in OpenCV (where ROI is specified by Rect_\ ) is +implemented as: +@code + for(int y = roi.y; y < roi.y + roi.height; y++) + for(int x = roi.x; x < roi.x + roi.width; x++) + { + // ... + } +@endcode +In addition to the class members, the following operations on rectangles are implemented: +- \f$\texttt{rect} = \texttt{rect} \pm \texttt{point}\f$ (shifting a rectangle by a certain offset) +- \f$\texttt{rect} = \texttt{rect} \pm \texttt{size}\f$ (expanding or shrinking a rectangle by a + certain amount) +- rect += point, rect -= point, rect += size, rect -= size (augmenting operations) +- rect = rect1 & rect2 (rectangle intersection) +- rect = rect1 | rect2 (minimum area rectangle containing rect1 and rect2 ) +- rect &= rect1, rect |= rect1 (and the corresponding augmenting operations) +- rect == rect1, rect != rect1 (rectangle comparison) + +This is an example how the partial ordering on rectangles can be established (rect1 \f$\subseteq\f$ +rect2): +@code + template inline bool + operator <= (const Rect_<_Tp>& r1, const Rect_<_Tp>& r2) + { + return (r1 & r2) == r1; + } +@endcode +For your convenience, the Rect_\<\> alias is available: cv::Rect +*/ +template class Rect_ +{ +public: + typedef _Tp value_type; + + //! default constructor + Rect_(); + Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height); +#if OPENCV_ABI_COMPATIBILITY < 500 + Rect_(const Rect_& r) = default; + Rect_(Rect_&& r) CV_NOEXCEPT = default; +#endif + Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz); + Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2); + +#if OPENCV_ABI_COMPATIBILITY < 500 + Rect_& operator = (const Rect_& r) = default; + Rect_& operator = (Rect_&& r) CV_NOEXCEPT = default; +#endif + //! the top-left corner + Point_<_Tp> tl() const; + //! the bottom-right corner + Point_<_Tp> br() const; + + //! size (width, height) of the rectangle + Size_<_Tp> size() const; + //! area (width*height) of the rectangle + _Tp area() const; + //! true if empty + bool empty() const; + + //! conversion to another data type + template operator Rect_<_Tp2>() const; + + //! checks whether the rectangle contains the point + bool contains(const Point_<_Tp>& pt) const; + + _Tp x; //!< x coordinate of the top-left corner + _Tp y; //!< y coordinate of the top-left corner + _Tp width; //!< width of the rectangle + _Tp height; //!< height of the rectangle +}; + +typedef Rect_ Rect2i; +typedef Rect_ Rect2f; +typedef Rect_ Rect2d; +typedef Rect2i Rect; + +template class DataType< Rect_<_Tp> > +{ +public: + typedef Rect_<_Tp> value_type; + typedef Rect_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 4, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Rect_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Rect_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; }; +} // namespace + +///////////////////////////// RotatedRect ///////////////////////////// + +/** @brief The class represents rotated (i.e. not up-right) rectangles on a plane. + +Each rectangle is specified by the center point (mass center), length of each side (represented by +#Size2f structure) and the rotation angle in degrees. + +The sample below demonstrates how to use RotatedRect: +@snippet snippets/core_various.cpp RotatedRect_demo +![image](pics/rotatedrect.png) + +@sa CamShift, fitEllipse, minAreaRect, CvBox2D +*/ +class CV_EXPORTS RotatedRect +{ +public: + //! default constructor + RotatedRect(); + /** full constructor + @param center The rectangle mass center. + @param size Width and height of the rectangle. + @param angle The rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc., + the rectangle becomes an up-right rectangle. + */ + RotatedRect(const Point2f& center, const Size2f& size, float angle); + /** + Any 3 end points of the RotatedRect. They must be given in order (either clockwise or + anticlockwise). + */ + RotatedRect(const Point2f& point1, const Point2f& point2, const Point2f& point3); + + /** returns 4 vertices of the rectangle + @param pts The points array for storing rectangle vertices. The order is bottomLeft, topLeft, topRight, bottomRight. + */ + void points(Point2f pts[]) const; + //! returns the minimal up-right integer rectangle containing the rotated rectangle + Rect boundingRect() const; + //! returns the minimal (exact) floating point rectangle containing the rotated rectangle, not intended for use with images + Rect_ boundingRect2f() const; + //! returns the rectangle mass center + Point2f center; + //! returns width and height of the rectangle + Size2f size; + //! returns the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle. + float angle; +}; + +template<> class DataType< RotatedRect > +{ +public: + typedef RotatedRect value_type; + typedef value_type work_type; + typedef float channel_type; + + enum { generic_type = 0, + channels = (int)sizeof(value_type)/sizeof(channel_type), // 5 + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template<> +struct Depth< RotatedRect > { enum { value = Depth::value }; }; +template<> +struct Type< RotatedRect > { enum { value = CV_MAKETYPE(Depth::value, (int)sizeof(RotatedRect)/sizeof(float)) }; }; +} // namespace + + +//////////////////////////////// Range ///////////////////////////////// + +/** @brief Template class specifying a continuous subsequence (slice) of a sequence. + +The class is used to specify a row or a column span in a matrix ( Mat ) and for many other purposes. +Range(a,b) is basically the same as a:b in Matlab or a..b in Python. As in Python, start is an +inclusive left boundary of the range and end is an exclusive right boundary of the range. Such a +half-opened interval is usually denoted as \f$[start,end)\f$ . + +The static method Range::all() returns a special variable that means "the whole sequence" or "the +whole range", just like " : " in Matlab or " ... " in Python. All the methods and functions in +OpenCV that take Range support this special Range::all() value. But, of course, in case of your own +custom processing, you will probably have to check and handle it explicitly: +@code + void my_function(..., const Range& r, ....) + { + if(r == Range::all()) { + // process all the data + } + else { + // process [r.start, r.end) + } + } +@endcode +*/ +class CV_EXPORTS Range +{ +public: + Range(); + Range(int _start, int _end); + int size() const; + bool empty() const; + static Range all(); + + int start, end; +}; + +template<> class DataType +{ +public: + typedef Range value_type; + typedef value_type work_type; + typedef int channel_type; + + enum { generic_type = 0, + channels = 2, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template<> +struct Depth< Range > { enum { value = Depth::value }; }; +template<> +struct Type< Range > { enum { value = CV_MAKETYPE(Depth::value, 2) }; }; +} // namespace + + +//////////////////////////////// Scalar_ /////////////////////////////// + +/** @brief Template class for a 4-element vector derived from Vec. + +Being derived from Vec\<_Tp, 4\> , Scalar\_ and Scalar can be used just as typical 4-element +vectors. In addition, they can be converted to/from CvScalar . The type Scalar is widely used in +OpenCV to pass pixel values. +*/ +template class Scalar_ : public Vec<_Tp, 4> +{ +public: + //! default constructor + Scalar_(); + Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0); + Scalar_(_Tp v0); + + Scalar_(const Scalar_& s); + Scalar_(Scalar_&& s) CV_NOEXCEPT; + + Scalar_& operator=(const Scalar_& s); + Scalar_& operator=(Scalar_&& s) CV_NOEXCEPT; + + template + Scalar_(const Vec<_Tp2, cn>& v); + + //! returns a scalar with all elements set to v0 + static Scalar_<_Tp> all(_Tp v0); + + //! conversion to another data type + template operator Scalar_() const; + + //! per-element product + Scalar_<_Tp> mul(const Scalar_<_Tp>& a, double scale=1 ) const; + + //! returns (v0, -v1, -v2, -v3) + Scalar_<_Tp> conj() const; + + //! returns true iff v1 == v2 == v3 == 0 + bool isReal() const; +}; + +typedef Scalar_ Scalar; + +template class DataType< Scalar_<_Tp> > +{ +public: + typedef Scalar_<_Tp> value_type; + typedef Scalar_::work_type> work_type; + typedef _Tp channel_type; + + enum { generic_type = 0, + channels = 4, + fmt = traits::SafeFmt::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template +struct Depth< Scalar_<_Tp> > { enum { value = Depth<_Tp>::value }; }; +template +struct Type< Scalar_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; }; +} // namespace + + +/////////////////////////////// KeyPoint //////////////////////////////// + +/** @brief Data structure for salient point detectors. + +The class instance stores a keypoint, i.e. a point feature found by one of many available keypoint +detectors, such as Harris corner detector, #FAST, %StarDetector, %SURF, %SIFT etc. + +The keypoint is characterized by the 2D position, scale (proportional to the diameter of the +neighborhood that needs to be taken into account), orientation and some other parameters. The +keypoint neighborhood is then analyzed by another algorithm that builds a descriptor (usually +represented as a feature vector). The keypoints representing the same object in different images +can then be matched using %KDTree or another method. +*/ +class CV_EXPORTS_W_SIMPLE KeyPoint +{ +public: + //! the default constructor + CV_WRAP KeyPoint(); + /** + @param pt x & y coordinates of the keypoint + @param size keypoint diameter + @param angle keypoint orientation + @param response keypoint detector response on the keypoint (that is, strength of the keypoint) + @param octave pyramid octave in which the keypoint has been detected + @param class_id object id + */ + KeyPoint(Point2f pt, float size, float angle=-1, float response=0, int octave=0, int class_id=-1); + /** + @param x x-coordinate of the keypoint + @param y y-coordinate of the keypoint + @param size keypoint diameter + @param angle keypoint orientation + @param response keypoint detector response on the keypoint (that is, strength of the keypoint) + @param octave pyramid octave in which the keypoint has been detected + @param class_id object id + */ + CV_WRAP KeyPoint(float x, float y, float size, float angle=-1, float response=0, int octave=0, int class_id=-1); + + size_t hash() const; + + /** + This method converts vector of keypoints to vector of points or the reverse, where each keypoint is + assigned the same size and the same orientation. + + @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB + @param points2f Array of (x,y) coordinates of each keypoint + @param keypointIndexes Array of indexes of keypoints to be converted to points. (Acts like a mask to + convert only specified keypoints) + */ + CV_WRAP static void convert(const std::vector& keypoints, + CV_OUT std::vector& points2f, + const std::vector& keypointIndexes=std::vector()); + /** @overload + @param points2f Array of (x,y) coordinates of each keypoint + @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB + @param size keypoint diameter + @param response keypoint detector response on the keypoint (that is, strength of the keypoint) + @param octave pyramid octave in which the keypoint has been detected + @param class_id object id + */ + CV_WRAP static void convert(const std::vector& points2f, + CV_OUT std::vector& keypoints, + float size=1, float response=1, int octave=0, int class_id=-1); + + /** + This method computes overlap for pair of keypoints. Overlap is the ratio between area of keypoint + regions' intersection and area of keypoint regions' union (considering keypoint region as circle). + If they don't overlap, we get zero. If they coincide at same location with same size, we get 1. + @param kp1 First keypoint + @param kp2 Second keypoint + */ + CV_WRAP static float overlap(const KeyPoint& kp1, const KeyPoint& kp2); + + CV_PROP_RW Point2f pt; //!< coordinates of the keypoints + CV_PROP_RW float size; //!< diameter of the meaningful keypoint neighborhood + CV_PROP_RW float angle; //!< computed orientation of the keypoint (-1 if not applicable); + //!< it's in [0,360) degrees and measured relative to + //!< image coordinate system, ie in clockwise. + CV_PROP_RW float response; //!< the response by which the most strong keypoints have been selected. Can be used for the further sorting or subsampling + CV_PROP_RW int octave; //!< octave (pyramid layer) from which the keypoint has been extracted + CV_PROP_RW int class_id; //!< object class (if the keypoints need to be clustered by an object they belong to) +}; + +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED +template<> class DataType +{ +public: + typedef KeyPoint value_type; + typedef float work_type; + typedef float channel_type; + + enum { generic_type = 0, + depth = DataType::depth, + channels = (int)(sizeof(value_type)/sizeof(channel_type)), // 7 + fmt = DataType::fmt + ((channels - 1) << 8), + type = CV_MAKETYPE(depth, channels) + }; + + typedef Vec vec_type; +}; +#endif + + +//////////////////////////////// DMatch ///////////////////////////////// + +/** @brief Class for matching keypoint descriptors + +query descriptor index, train descriptor index, train image index, and distance between +descriptors. +*/ +class CV_EXPORTS_W_SIMPLE DMatch +{ +public: + CV_WRAP DMatch(); + CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance); + CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance); + + CV_PROP_RW int queryIdx; //!< query descriptor index + CV_PROP_RW int trainIdx; //!< train descriptor index + CV_PROP_RW int imgIdx; //!< train image index + + CV_PROP_RW float distance; + + // less is better + bool operator<(const DMatch &m) const; +}; + +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED +template<> class DataType +{ +public: + typedef DMatch value_type; + typedef int work_type; + typedef int channel_type; + + enum { generic_type = 0, + depth = DataType::depth, + channels = (int)(sizeof(value_type)/sizeof(channel_type)), // 4 + fmt = DataType::fmt + ((channels - 1) << 8), + type = CV_MAKETYPE(depth, channels) + }; + + typedef Vec vec_type; +}; +#endif + + +///////////////////////////// TermCriteria ////////////////////////////// + +/** @brief The class defining termination criteria for iterative algorithms. + +You can initialize it by default constructor and then override any parameters, or the structure may +be fully initialized using the advanced variant of the constructor. +*/ +class CV_EXPORTS TermCriteria +{ +public: + /** + Criteria type, can be one of: COUNT, EPS or COUNT + EPS + */ + enum Type + { + COUNT=1, //!< the maximum number of iterations or elements to compute + MAX_ITER=COUNT, //!< ditto + EPS=2 //!< the desired accuracy or change in parameters at which the iterative algorithm stops + }; + + //! default constructor + TermCriteria(); + /** + @param type The type of termination criteria, one of TermCriteria::Type + @param maxCount The maximum number of iterations or elements to compute. + @param epsilon The desired accuracy or change in parameters at which the iterative algorithm stops. + */ + TermCriteria(int type, int maxCount, double epsilon); + + inline bool isValid() const + { + const bool isCount = (type & COUNT) && maxCount > 0; + const bool isEps = (type & EPS) && !cvIsNaN(epsilon); + return isCount || isEps; + } + + int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS + int maxCount; //!< the maximum number of iterations/elements + double epsilon; //!< the desired accuracy +}; + + +//! @} core_basic + +///////////////////////// raster image moments ////////////////////////// + +//! @addtogroup imgproc_shape +//! @{ + +/** @brief struct returned by cv::moments + +The spatial moments \f$\texttt{Moments::m}_{ji}\f$ are computed as: + +\f[\texttt{m} _{ji}= \sum _{x,y} \left ( \texttt{array} (x,y) \cdot x^j \cdot y^i \right )\f] + +The central moments \f$\texttt{Moments::mu}_{ji}\f$ are computed as: + +\f[\texttt{mu} _{ji}= \sum _{x,y} \left ( \texttt{array} (x,y) \cdot (x - \bar{x} )^j \cdot (y - \bar{y} )^i \right )\f] + +where \f$(\bar{x}, \bar{y})\f$ is the mass center: + +\f[\bar{x} = \frac{\texttt{m}_{10}}{\texttt{m}_{00}} , \; \bar{y} = \frac{\texttt{m}_{01}}{\texttt{m}_{00}}\f] + +The normalized central moments \f$\texttt{Moments::nu}_{ij}\f$ are computed as: + +\f[\texttt{nu} _{ji}= \frac{\texttt{mu}_{ji}}{\texttt{m}_{00}^{(i+j)/2+1}} .\f] + +@note +\f$\texttt{mu}_{00}=\texttt{m}_{00}\f$, \f$\texttt{nu}_{00}=1\f$ +\f$\texttt{nu}_{10}=\texttt{mu}_{10}=\texttt{mu}_{01}=\texttt{mu}_{10}=0\f$ , hence the values are not +stored. + +The moments of a contour are defined in the same way but computed using the Green's formula (see +). So, due to a limited raster resolution, the moments +computed for a contour are slightly different from the moments computed for the same rasterized +contour. + +@note +Since the contour moments are computed using Green formula, you may get seemingly odd results for +contours with self-intersections, e.g. a zero area (m00) for butterfly-shaped contours. + */ +class CV_EXPORTS_W_MAP Moments +{ +public: + //! the default constructor + Moments(); + //! the full constructor + Moments(double m00, double m10, double m01, double m20, double m11, + double m02, double m30, double m21, double m12, double m03 ); + ////! the conversion from CvMoments + //Moments( const CvMoments& moments ); + ////! the conversion to CvMoments + //operator CvMoments() const; + + //! @name spatial moments + //! @{ + CV_PROP_RW double m00, m10, m01, m20, m11, m02, m30, m21, m12, m03; + //! @} + + //! @name central moments + //! @{ + CV_PROP_RW double mu20, mu11, mu02, mu30, mu21, mu12, mu03; + //! @} + + //! @name central normalized moments + //! @{ + CV_PROP_RW double nu20, nu11, nu02, nu30, nu21, nu12, nu03; + //! @} +}; + +template<> class DataType +{ +public: + typedef Moments value_type; + typedef double work_type; + typedef double channel_type; + + enum { generic_type = 0, + channels = (int)(sizeof(value_type)/sizeof(channel_type)), // 24 + fmt = DataType::fmt + ((channels - 1) << 8) +#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED + ,depth = DataType::depth + ,type = CV_MAKETYPE(depth, channels) +#endif + }; + + typedef Vec vec_type; +}; + +namespace traits { +template<> +struct Depth< Moments > { enum { value = Depth::value }; }; +template<> +struct Type< Moments > { enum { value = CV_MAKETYPE(Depth::value, (int)(sizeof(Moments)/sizeof(double))) }; }; +} // namespace + +//! @} imgproc_shape + +//! @cond IGNORED + +///////////////////////////////////////////////////////////////////////// +///////////////////////////// Implementation //////////////////////////// +///////////////////////////////////////////////////////////////////////// + +//////////////////////////////// Complex //////////////////////////////// + +template inline +Complex<_Tp>::Complex() + : re(0), im(0) {} + +template inline +Complex<_Tp>::Complex( _Tp _re, _Tp _im ) + : re(_re), im(_im) {} + +template template inline +Complex<_Tp>::operator Complex() const +{ + return Complex(saturate_cast(re), saturate_cast(im)); +} + +template inline +Complex<_Tp> Complex<_Tp>::conj() const +{ + return Complex<_Tp>(re, -im); +} + + +template static inline +bool operator == (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return a.re == b.re && a.im == b.im; +} + +template static inline +bool operator != (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return a.re != b.re || a.im != b.im; +} + +template static inline +Complex<_Tp> operator + (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return Complex<_Tp>( a.re + b.re, a.im + b.im ); +} + +template static inline +Complex<_Tp>& operator += (Complex<_Tp>& a, const Complex<_Tp>& b) +{ + a.re += b.re; a.im += b.im; + return a; +} + +template static inline +Complex<_Tp> operator - (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return Complex<_Tp>( a.re - b.re, a.im - b.im ); +} + +template static inline +Complex<_Tp>& operator -= (Complex<_Tp>& a, const Complex<_Tp>& b) +{ + a.re -= b.re; a.im -= b.im; + return a; +} + +template static inline +Complex<_Tp> operator - (const Complex<_Tp>& a) +{ + return Complex<_Tp>(-a.re, -a.im); +} + +template static inline +Complex<_Tp> operator * (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + return Complex<_Tp>( a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re ); +} + +template static inline +Complex<_Tp> operator * (const Complex<_Tp>& a, _Tp b) +{ + return Complex<_Tp>( a.re*b, a.im*b ); +} + +template static inline +Complex<_Tp> operator * (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>( a.re*b, a.im*b ); +} + +template static inline +Complex<_Tp> operator + (const Complex<_Tp>& a, _Tp b) +{ + return Complex<_Tp>( a.re + b, a.im ); +} + +template static inline +Complex<_Tp> operator - (const Complex<_Tp>& a, _Tp b) +{ return Complex<_Tp>( a.re - b, a.im ); } + +template static inline +Complex<_Tp> operator + (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>( a.re + b, a.im ); +} + +template static inline +Complex<_Tp> operator - (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>( b - a.re, -a.im ); +} + +template static inline +Complex<_Tp>& operator += (Complex<_Tp>& a, _Tp b) +{ + a.re += b; return a; +} + +template static inline +Complex<_Tp>& operator -= (Complex<_Tp>& a, _Tp b) +{ + a.re -= b; return a; +} + +template static inline +Complex<_Tp>& operator *= (Complex<_Tp>& a, _Tp b) +{ + a.re *= b; a.im *= b; return a; +} + +template static inline +double abs(const Complex<_Tp>& a) +{ + return std::sqrt( (double)a.re*a.re + (double)a.im*a.im); +} + +template static inline +Complex<_Tp> operator / (const Complex<_Tp>& a, const Complex<_Tp>& b) +{ + double t = 1./((double)b.re*b.re + (double)b.im*b.im); + return Complex<_Tp>( (_Tp)((a.re*b.re + a.im*b.im)*t), + (_Tp)((-a.re*b.im + a.im*b.re)*t) ); +} + +template static inline +Complex<_Tp>& operator /= (Complex<_Tp>& a, const Complex<_Tp>& b) +{ + a = a / b; + return a; +} + +template static inline +Complex<_Tp> operator / (const Complex<_Tp>& a, _Tp b) +{ + _Tp t = (_Tp)1/b; + return Complex<_Tp>( a.re*t, a.im*t ); +} + +template static inline +Complex<_Tp> operator / (_Tp b, const Complex<_Tp>& a) +{ + return Complex<_Tp>(b)/a; +} + +template static inline +Complex<_Tp> operator /= (const Complex<_Tp>& a, _Tp b) +{ + _Tp t = (_Tp)1/b; + a.re *= t; a.im *= t; return a; +} + + + +//////////////////////////////// 2D Point /////////////////////////////// + +template inline +Point_<_Tp>::Point_() + : x(0), y(0) {} + +template inline +Point_<_Tp>::Point_(_Tp _x, _Tp _y) + : x(_x), y(_y) {} + +#if (defined(__GNUC__) && __GNUC__ < 5) && !defined(__clang__) // GCC 4.x bug. Details: https://github.com/opencv/opencv/pull/20837 +template inline +Point_<_Tp>::Point_(const Point_& pt) + : x(pt.x), y(pt.y) {} +#endif + +template inline +Point_<_Tp>::Point_(const Size_<_Tp>& sz) + : x(sz.width), y(sz.height) {} + +template inline +Point_<_Tp>::Point_(const Vec<_Tp,2>& v) + : x(v[0]), y(v[1]) {} + +#if (defined(__GNUC__) && __GNUC__ < 5) && !defined(__clang__) // GCC 4.x bug. Details: https://github.com/opencv/opencv/pull/20837 +template inline +Point_<_Tp>& Point_<_Tp>::operator = (const Point_& pt) +{ + x = pt.x; y = pt.y; + return *this; +} +#endif + +template template inline +Point_<_Tp>::operator Point_<_Tp2>() const +{ + return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); +} + +template inline +Point_<_Tp>::operator Vec<_Tp, 2>() const +{ + return Vec<_Tp, 2>(x, y); +} + +template inline +_Tp Point_<_Tp>::dot(const Point_& pt) const +{ + return saturate_cast<_Tp>(x*pt.x + y*pt.y); +} + +template inline +double Point_<_Tp>::ddot(const Point_& pt) const +{ + return (double)x*(double)(pt.x) + (double)y*(double)(pt.y); +} + +template inline +double Point_<_Tp>::cross(const Point_& pt) const +{ + return (double)x*pt.y - (double)y*pt.x; +} + +template inline bool +Point_<_Tp>::inside( const Rect_<_Tp>& r ) const +{ + return r.contains(*this); +} + + +template static inline +Point_<_Tp>& operator += (Point_<_Tp>& a, const Point_<_Tp>& b) +{ + a.x += b.x; + a.y += b.y; + return a; +} + +template static inline +Point_<_Tp>& operator -= (Point_<_Tp>& a, const Point_<_Tp>& b) +{ + a.x -= b.x; + a.y -= b.y; + return a; +} + +template static inline +Point_<_Tp>& operator *= (Point_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + return a; +} + +template static inline +Point_<_Tp>& operator *= (Point_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + return a; +} + +template static inline +Point_<_Tp>& operator *= (Point_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + return a; +} + +template static inline +Point_<_Tp>& operator /= (Point_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + return a; +} + +template static inline +Point_<_Tp>& operator /= (Point_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + return a; +} + +template static inline +Point_<_Tp>& operator /= (Point_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + return a; +} + +template static inline +double norm(const Point_<_Tp>& pt) +{ + return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y); +} + +template static inline +bool operator == (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return a.x == b.x && a.y == b.y; +} + +template static inline +bool operator != (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return a.x != b.x || a.y != b.y; +} + +template static inline +Point_<_Tp> operator + (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y) ); +} + +template static inline +Point_<_Tp> operator - (const Point_<_Tp>& a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y) ); +} + +template static inline +Point_<_Tp> operator - (const Point_<_Tp>& a) +{ + return Point_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y) ); +} + +template static inline +Point_<_Tp> operator * (const Point_<_Tp>& a, int b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) ); +} + +template static inline +Point_<_Tp> operator * (int a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) ); +} + +template static inline +Point_<_Tp> operator * (const Point_<_Tp>& a, float b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) ); +} + +template static inline +Point_<_Tp> operator * (float a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) ); +} + +template static inline +Point_<_Tp> operator * (const Point_<_Tp>& a, double b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) ); +} + +template static inline +Point_<_Tp> operator * (double a, const Point_<_Tp>& b) +{ + return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) ); +} + +template static inline +Point_<_Tp> operator * (const Matx<_Tp, 2, 2>& a, const Point_<_Tp>& b) +{ + Matx<_Tp, 2, 1> tmp = a * Vec<_Tp,2>(b.x, b.y); + return Point_<_Tp>(tmp.val[0], tmp.val[1]); +} + +template static inline +Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point_<_Tp>& b) +{ + Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, 1); + return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]); +} + +template static inline +Point_<_Tp> operator / (const Point_<_Tp>& a, int b) +{ + Point_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point_<_Tp> operator / (const Point_<_Tp>& a, float b) +{ + Point_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point_<_Tp> operator / (const Point_<_Tp>& a, double b) +{ + Point_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + + +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); +template static inline _AccTp normL2Sqr(const Point_& pt); + +template<> inline int normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline int64 normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline float normL2Sqr(const Point_& pt) { return pt.dot(pt); } +template<> inline double normL2Sqr(const Point_& pt) { return pt.dot(pt); } + +template<> inline double normL2Sqr(const Point_& pt) { return pt.ddot(pt); } +template<> inline double normL2Sqr(const Point_& pt) { return pt.ddot(pt); } + + + +//////////////////////////////// 3D Point /////////////////////////////// + +template inline +Point3_<_Tp>::Point3_() + : x(0), y(0), z(0) {} + +template inline +Point3_<_Tp>::Point3_(_Tp _x, _Tp _y, _Tp _z) + : x(_x), y(_y), z(_z) {} + +template inline +Point3_<_Tp>::Point3_(const Point_<_Tp>& pt) + : x(pt.x), y(pt.y), z(_Tp()) {} + +template inline +Point3_<_Tp>::Point3_(const Vec<_Tp, 3>& v) + : x(v[0]), y(v[1]), z(v[2]) {} + +template template inline +Point3_<_Tp>::operator Point3_<_Tp2>() const +{ + return Point3_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(z)); +} + +template inline +Point3_<_Tp>::operator Vec<_Tp, 3>() const +{ + return Vec<_Tp, 3>(x, y, z); +} + +template inline +_Tp Point3_<_Tp>::dot(const Point3_& pt) const +{ + return saturate_cast<_Tp>(x*pt.x + y*pt.y + z*pt.z); +} + +template inline +double Point3_<_Tp>::ddot(const Point3_& pt) const +{ + return (double)x*pt.x + (double)y*pt.y + (double)z*pt.z; +} + +template inline +Point3_<_Tp> Point3_<_Tp>::cross(const Point3_<_Tp>& pt) const +{ + return Point3_<_Tp>(y*pt.z - z*pt.y, z*pt.x - x*pt.z, x*pt.y - y*pt.x); +} + + +template static inline +Point3_<_Tp>& operator += (Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + return a; +} + +template static inline +Point3_<_Tp>& operator -= (Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + return a; +} + +template static inline +Point3_<_Tp>& operator *= (Point3_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + a.z = saturate_cast<_Tp>(a.z * b); + return a; +} + +template static inline +Point3_<_Tp>& operator *= (Point3_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + a.z = saturate_cast<_Tp>(a.z * b); + return a; +} + +template static inline +Point3_<_Tp>& operator *= (Point3_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x * b); + a.y = saturate_cast<_Tp>(a.y * b); + a.z = saturate_cast<_Tp>(a.z * b); + return a; +} + +template static inline +Point3_<_Tp>& operator /= (Point3_<_Tp>& a, int b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + a.z = saturate_cast<_Tp>(a.z / b); + return a; +} + +template static inline +Point3_<_Tp>& operator /= (Point3_<_Tp>& a, float b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + a.z = saturate_cast<_Tp>(a.z / b); + return a; +} + +template static inline +Point3_<_Tp>& operator /= (Point3_<_Tp>& a, double b) +{ + a.x = saturate_cast<_Tp>(a.x / b); + a.y = saturate_cast<_Tp>(a.y / b); + a.z = saturate_cast<_Tp>(a.z / b); + return a; +} + +template static inline +double norm(const Point3_<_Tp>& pt) +{ + return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y + (double)pt.z*pt.z); +} + +template static inline +bool operator == (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return a.x == b.x && a.y == b.y && a.z == b.z; +} + +template static inline +bool operator != (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return a.x != b.x || a.y != b.y || a.z != b.z; +} + +template static inline +Point3_<_Tp> operator + (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y), saturate_cast<_Tp>(a.z + b.z)); +} + +template static inline +Point3_<_Tp> operator - (const Point3_<_Tp>& a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y), saturate_cast<_Tp>(a.z - b.z)); +} + +template static inline +Point3_<_Tp> operator - (const Point3_<_Tp>& a) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y), saturate_cast<_Tp>(-a.z) ); +} + +template static inline +Point3_<_Tp> operator * (const Point3_<_Tp>& a, int b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b), saturate_cast<_Tp>(a.z*b) ); +} + +template static inline +Point3_<_Tp> operator * (int a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) ); +} + +template static inline +Point3_<_Tp> operator * (const Point3_<_Tp>& a, float b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) ); +} + +template static inline +Point3_<_Tp> operator * (float a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) ); +} + +template static inline +Point3_<_Tp> operator * (const Point3_<_Tp>& a, double b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) ); +} + +template static inline +Point3_<_Tp> operator * (double a, const Point3_<_Tp>& b) +{ + return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) ); +} + +template static inline +Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point3_<_Tp>& b) +{ + Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, b.z); + return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]); +} + +template static inline +Matx<_Tp, 4, 1> operator * (const Matx<_Tp, 4, 4>& a, const Point3_<_Tp>& b) +{ + return a * Matx<_Tp, 4, 1>(b.x, b.y, b.z, 1); +} + +template static inline +Point3_<_Tp> operator / (const Point3_<_Tp>& a, int b) +{ + Point3_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point3_<_Tp> operator / (const Point3_<_Tp>& a, float b) +{ + Point3_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Point3_<_Tp> operator / (const Point3_<_Tp>& a, double b) +{ + Point3_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + + + +////////////////////////////////// Size ///////////////////////////////// + +template inline +Size_<_Tp>::Size_() + : width(0), height(0) {} + +template inline +Size_<_Tp>::Size_(_Tp _width, _Tp _height) + : width(_width), height(_height) {} + +template inline +Size_<_Tp>::Size_(const Point_<_Tp>& pt) + : width(pt.x), height(pt.y) {} + +template template inline +Size_<_Tp>::operator Size_<_Tp2>() const +{ + return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); +} + +template inline +_Tp Size_<_Tp>::area() const +{ + const _Tp result = width * height; + CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer + || width == 0 || result / width == height); // make sure the result fits in the return value + return result; +} + +template inline +double Size_<_Tp>::aspectRatio() const +{ + return width / static_cast(height); +} + +template inline +bool Size_<_Tp>::empty() const +{ + return width <= 0 || height <= 0; +} + + +template static inline +Size_<_Tp>& operator *= (Size_<_Tp>& a, _Tp b) +{ + a.width *= b; + a.height *= b; + return a; +} + +template static inline +Size_<_Tp> operator * (const Size_<_Tp>& a, _Tp b) +{ + Size_<_Tp> tmp(a); + tmp *= b; + return tmp; +} + +template static inline +Size_<_Tp>& operator /= (Size_<_Tp>& a, _Tp b) +{ + a.width /= b; + a.height /= b; + return a; +} + +template static inline +Size_<_Tp> operator / (const Size_<_Tp>& a, _Tp b) +{ + Size_<_Tp> tmp(a); + tmp /= b; + return tmp; +} + +template static inline +Size_<_Tp>& operator += (Size_<_Tp>& a, const Size_<_Tp>& b) +{ + a.width += b.width; + a.height += b.height; + return a; +} + +template static inline +Size_<_Tp> operator + (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + Size_<_Tp> tmp(a); + tmp += b; + return tmp; +} + +template static inline +Size_<_Tp>& operator -= (Size_<_Tp>& a, const Size_<_Tp>& b) +{ + a.width -= b.width; + a.height -= b.height; + return a; +} + +template static inline +Size_<_Tp> operator - (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + Size_<_Tp> tmp(a); + tmp -= b; + return tmp; +} + +template static inline +bool operator == (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + return a.width == b.width && a.height == b.height; +} + +template static inline +bool operator != (const Size_<_Tp>& a, const Size_<_Tp>& b) +{ + return !(a == b); +} + + + +////////////////////////////////// Rect ///////////////////////////////// + +template inline +Rect_<_Tp>::Rect_() + : x(0), y(0), width(0), height(0) {} + +template inline +Rect_<_Tp>::Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height) + : x(_x), y(_y), width(_width), height(_height) {} + +template inline +Rect_<_Tp>::Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz) + : x(org.x), y(org.y), width(sz.width), height(sz.height) {} + +template inline +Rect_<_Tp>::Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2) +{ + x = std::min(pt1.x, pt2.x); + y = std::min(pt1.y, pt2.y); + width = std::max(pt1.x, pt2.x) - x; + height = std::max(pt1.y, pt2.y) - y; +} + +template inline +Point_<_Tp> Rect_<_Tp>::tl() const +{ + return Point_<_Tp>(x,y); +} + +template inline +Point_<_Tp> Rect_<_Tp>::br() const +{ + return Point_<_Tp>(x + width, y + height); +} + +template inline +Size_<_Tp> Rect_<_Tp>::size() const +{ + return Size_<_Tp>(width, height); +} + +template inline +_Tp Rect_<_Tp>::area() const +{ + const _Tp result = width * height; + CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer + || width == 0 || result / width == height); // make sure the result fits in the return value + return result; +} + +template inline +bool Rect_<_Tp>::empty() const +{ + return width <= 0 || height <= 0; +} + +template template inline +Rect_<_Tp>::operator Rect_<_Tp2>() const +{ + return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); +} + +template inline +bool Rect_<_Tp>::contains(const Point_<_Tp>& pt) const +{ + return x <= pt.x && pt.x < x + width && y <= pt.y && pt.y < y + height; +} + + +template static inline +Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Point_<_Tp>& b ) +{ + a.x += b.x; + a.y += b.y; + return a; +} + +template static inline +Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Point_<_Tp>& b ) +{ + a.x -= b.x; + a.y -= b.y; + return a; +} + +template static inline +Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b ) +{ + a.width += b.width; + a.height += b.height; + return a; +} + +template static inline +Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b ) +{ + const _Tp width = a.width - b.width; + const _Tp height = a.height - b.height; + CV_DbgAssert(width >= 0 && height >= 0); + a.width = width; + a.height = height; + return a; +} + +template static inline +Rect_<_Tp>& operator &= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) +{ + if (a.empty() || b.empty()) { + a = Rect(); + return a; + } + const Rect_<_Tp>& Rx_min = (a.x < b.x) ? a : b; + const Rect_<_Tp>& Rx_max = (a.x < b.x) ? b : a; + const Rect_<_Tp>& Ry_min = (a.y < b.y) ? a : b; + const Rect_<_Tp>& Ry_max = (a.y < b.y) ? b : a; + // Looking at the formula below, we will compute Rx_min.width - (Rx_max.x - Rx_min.x) + // but we want to avoid overflows. Rx_min.width >= 0 and (Rx_max.x - Rx_min.x) >= 0 + // by definition so the difference does not overflow. The only thing that can overflow + // is (Rx_max.x - Rx_min.x). And it can only overflow if Rx_min.x < 0. + // Let us first deal with the following case. + if ((Rx_min.x < 0 && Rx_min.x + Rx_min.width < Rx_max.x) || + (Ry_min.y < 0 && Ry_min.y + Ry_min.height < Ry_max.y)) { + a = Rect(); + return a; + } + // We now know that either Rx_min.x >= 0, or + // Rx_min.x < 0 && Rx_min.x + Rx_min.width >= Rx_max.x and therefore + // Rx_min.width >= (Rx_max.x - Rx_min.x) which means (Rx_max.x - Rx_min.x) + // is inferior to a valid int and therefore does not overflow. + a.width = std::min(Rx_min.width - (Rx_max.x - Rx_min.x), Rx_max.width); + a.height = std::min(Ry_min.height - (Ry_max.y - Ry_min.y), Ry_max.height); + a.x = Rx_max.x; + a.y = Ry_max.y; + if (a.empty()) + a = Rect(); + return a; +} + +template static inline +Rect_<_Tp>& operator |= ( Rect_<_Tp>& a, const Rect_<_Tp>& b ) +{ + if (a.empty()) { + a = b; + } + else if (!b.empty()) { + _Tp x1 = std::min(a.x, b.x); + _Tp y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + } + return a; +} + +template static inline +bool operator == (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + return a.x == b.x && a.y == b.y && a.width == b.width && a.height == b.height; +} + +template static inline +bool operator != (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + return a.x != b.x || a.y != b.y || a.width != b.width || a.height != b.height; +} + +template static inline +Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Point_<_Tp>& b) +{ + return Rect_<_Tp>( a.x + b.x, a.y + b.y, a.width, a.height ); +} + +template static inline +Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Point_<_Tp>& b) +{ + return Rect_<_Tp>( a.x - b.x, a.y - b.y, a.width, a.height ); +} + +template static inline +Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b) +{ + return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height ); +} + +template static inline +Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Size_<_Tp>& b) +{ + const _Tp width = a.width - b.width; + const _Tp height = a.height - b.height; + CV_DbgAssert(width >= 0 && height >= 0); + return Rect_<_Tp>( a.x, a.y, width, height ); +} + +template static inline +Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c &= b; +} + +template static inline +Rect_<_Tp> operator | (const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c |= b; +} + +/** + * @brief measure dissimilarity between two sample sets + * + * computes the complement of the Jaccard Index as described in . + * For rectangles this reduces to computing the intersection over the union. + */ +template static inline +double jaccardDistance(const Rect_<_Tp>& a, const Rect_<_Tp>& b) { + _Tp Aa = a.area(); + _Tp Ab = b.area(); + + if ((Aa + Ab) <= std::numeric_limits<_Tp>::epsilon()) { + // jaccard_index = 1 -> distance = 0 + return 0.0; + } + + double Aab = (a & b).area(); + // distance = 1 - jaccard_index + return 1.0 - Aab / (Aa + Ab - Aab); +} + +////////////////////////////// RotatedRect ////////////////////////////// + +inline +RotatedRect::RotatedRect() + : center(), size(), angle(0) {} + +inline +RotatedRect::RotatedRect(const Point2f& _center, const Size2f& _size, float _angle) + : center(_center), size(_size), angle(_angle) {} + +///////////////////////////////// Range ///////////////////////////////// + +inline +Range::Range() + : start(0), end(0) {} + +inline +Range::Range(int _start, int _end) + : start(_start), end(_end) {} + +inline +int Range::size() const +{ + return end - start; +} + +inline +bool Range::empty() const +{ + return start == end; +} + +inline +Range Range::all() +{ + return Range(INT_MIN, INT_MAX); +} + + +static inline +bool operator == (const Range& r1, const Range& r2) +{ + return r1.start == r2.start && r1.end == r2.end; +} + +static inline +bool operator != (const Range& r1, const Range& r2) +{ + return !(r1 == r2); +} + +static inline +bool operator !(const Range& r) +{ + return r.start == r.end; +} + +static inline +Range operator & (const Range& r1, const Range& r2) +{ + Range r(std::max(r1.start, r2.start), std::min(r1.end, r2.end)); + r.end = std::max(r.end, r.start); + return r; +} + +static inline +Range& operator &= (Range& r1, const Range& r2) +{ + r1 = r1 & r2; + return r1; +} + +static inline +Range operator + (const Range& r1, int delta) +{ + return Range(r1.start + delta, r1.end + delta); +} + +static inline +Range operator + (int delta, const Range& r1) +{ + return Range(r1.start + delta, r1.end + delta); +} + +static inline +Range operator - (const Range& r1, int delta) +{ + return r1 + (-delta); +} + + + +///////////////////////////////// Scalar //////////////////////////////// + +template inline +Scalar_<_Tp>::Scalar_() +{ + this->val[0] = this->val[1] = this->val[2] = this->val[3] = 0; +} + +template inline +Scalar_<_Tp>::Scalar_(_Tp v0, _Tp v1, _Tp v2, _Tp v3) +{ + this->val[0] = v0; + this->val[1] = v1; + this->val[2] = v2; + this->val[3] = v3; +} + +template inline +Scalar_<_Tp>::Scalar_(const Scalar_<_Tp>& s) : Vec<_Tp, 4>(s) { +} + +template inline +Scalar_<_Tp>::Scalar_(Scalar_<_Tp>&& s) CV_NOEXCEPT { + this->val[0] = std::move(s.val[0]); + this->val[1] = std::move(s.val[1]); + this->val[2] = std::move(s.val[2]); + this->val[3] = std::move(s.val[3]); +} + +template inline +Scalar_<_Tp>& Scalar_<_Tp>::operator=(const Scalar_<_Tp>& s) { + this->val[0] = s.val[0]; + this->val[1] = s.val[1]; + this->val[2] = s.val[2]; + this->val[3] = s.val[3]; + return *this; +} + +template inline +Scalar_<_Tp>& Scalar_<_Tp>::operator=(Scalar_<_Tp>&& s) CV_NOEXCEPT { + this->val[0] = std::move(s.val[0]); + this->val[1] = std::move(s.val[1]); + this->val[2] = std::move(s.val[2]); + this->val[3] = std::move(s.val[3]); + return *this; +} + +template template inline +Scalar_<_Tp>::Scalar_(const Vec<_Tp2, cn>& v) +{ + int i; + for( i = 0; i < (cn < 4 ? cn : 4); i++ ) + this->val[i] = cv::saturate_cast<_Tp>(v.val[i]); + for( ; i < 4; i++ ) + this->val[i] = 0; +} + +template inline +Scalar_<_Tp>::Scalar_(_Tp v0) +{ + this->val[0] = v0; + this->val[1] = this->val[2] = this->val[3] = 0; +} + +template inline +Scalar_<_Tp> Scalar_<_Tp>::all(_Tp v0) +{ + return Scalar_<_Tp>(v0, v0, v0, v0); +} + + +template inline +Scalar_<_Tp> Scalar_<_Tp>::mul(const Scalar_<_Tp>& a, double scale ) const +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(this->val[0] * a.val[0] * scale), + saturate_cast<_Tp>(this->val[1] * a.val[1] * scale), + saturate_cast<_Tp>(this->val[2] * a.val[2] * scale), + saturate_cast<_Tp>(this->val[3] * a.val[3] * scale)); +} + +template inline +Scalar_<_Tp> Scalar_<_Tp>::conj() const +{ + return Scalar_<_Tp>(saturate_cast<_Tp>( this->val[0]), + saturate_cast<_Tp>(-this->val[1]), + saturate_cast<_Tp>(-this->val[2]), + saturate_cast<_Tp>(-this->val[3])); +} + +template inline +bool Scalar_<_Tp>::isReal() const +{ + return this->val[1] == 0 && this->val[2] == 0 && this->val[3] == 0; +} + + +template template inline +Scalar_<_Tp>::operator Scalar_() const +{ + return Scalar_(saturate_cast(this->val[0]), + saturate_cast(this->val[1]), + saturate_cast(this->val[2]), + saturate_cast(this->val[3])); +} + + +template static inline +Scalar_<_Tp>& operator += (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a.val[0] += b.val[0]; + a.val[1] += b.val[1]; + a.val[2] += b.val[2]; + a.val[3] += b.val[3]; + return a; +} + +template static inline +Scalar_<_Tp>& operator -= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a.val[0] -= b.val[0]; + a.val[1] -= b.val[1]; + a.val[2] -= b.val[2]; + a.val[3] -= b.val[3]; + return a; +} + +template static inline +Scalar_<_Tp>& operator *= ( Scalar_<_Tp>& a, _Tp v ) +{ + a.val[0] *= v; + a.val[1] *= v; + a.val[2] *= v; + a.val[3] *= v; + return a; +} + +template static inline +bool operator == ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b ) +{ + return a.val[0] == b.val[0] && a.val[1] == b.val[1] && + a.val[2] == b.val[2] && a.val[3] == b.val[3]; +} + +template static inline +bool operator != ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b ) +{ + return a.val[0] != b.val[0] || a.val[1] != b.val[1] || + a.val[2] != b.val[2] || a.val[3] != b.val[3]; +} + +template static inline +Scalar_<_Tp> operator + (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return Scalar_<_Tp>(a.val[0] + b.val[0], + a.val[1] + b.val[1], + a.val[2] + b.val[2], + a.val[3] + b.val[3]); +} + +template static inline +Scalar_<_Tp> operator - (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(a.val[0] - b.val[0]), + saturate_cast<_Tp>(a.val[1] - b.val[1]), + saturate_cast<_Tp>(a.val[2] - b.val[2]), + saturate_cast<_Tp>(a.val[3] - b.val[3])); +} + +template static inline +Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, _Tp alpha) +{ + return Scalar_<_Tp>(a.val[0] * alpha, + a.val[1] * alpha, + a.val[2] * alpha, + a.val[3] * alpha); +} + +template static inline +Scalar_<_Tp> operator * (_Tp alpha, const Scalar_<_Tp>& a) +{ + return a*alpha; +} + +template static inline +Scalar_<_Tp> operator - (const Scalar_<_Tp>& a) +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(-a.val[0]), + saturate_cast<_Tp>(-a.val[1]), + saturate_cast<_Tp>(-a.val[2]), + saturate_cast<_Tp>(-a.val[3])); +} + + +template static inline +Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return Scalar_<_Tp>(saturate_cast<_Tp>(a[0]*b[0] - a[1]*b[1] - a[2]*b[2] - a[3]*b[3]), + saturate_cast<_Tp>(a[0]*b[1] + a[1]*b[0] + a[2]*b[3] - a[3]*b[2]), + saturate_cast<_Tp>(a[0]*b[2] - a[1]*b[3] + a[2]*b[0] + a[3]*b[1]), + saturate_cast<_Tp>(a[0]*b[3] + a[1]*b[2] - a[2]*b[1] + a[3]*b[0])); +} + +template static inline +Scalar_<_Tp>& operator *= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a = a * b; + return a; +} + +template static inline +Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, _Tp alpha) +{ + return Scalar_<_Tp>(a.val[0] / alpha, + a.val[1] / alpha, + a.val[2] / alpha, + a.val[3] / alpha); +} + +template static inline +Scalar_ operator / (const Scalar_& a, float alpha) +{ + float s = 1 / alpha; + return Scalar_(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s); +} + +template static inline +Scalar_ operator / (const Scalar_& a, double alpha) +{ + double s = 1 / alpha; + return Scalar_(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s); +} + +template static inline +Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, _Tp alpha) +{ + a = a / alpha; + return a; +} + +template static inline +Scalar_<_Tp> operator / (_Tp a, const Scalar_<_Tp>& b) +{ + _Tp s = a / (b[0]*b[0] + b[1]*b[1] + b[2]*b[2] + b[3]*b[3]); + return b.conj() * s; +} + +template static inline +Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + return a * ((_Tp)1 / b); +} + +template static inline +Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b) +{ + a = a / b; + return a; +} + +template static inline +Scalar operator * (const Matx<_Tp, 4, 4>& a, const Scalar& b) +{ + Matx c((Matx)a, b, Matx_MatMulOp()); + return reinterpret_cast(c); +} + +template<> inline +Scalar operator * (const Matx& a, const Scalar& b) +{ + Matx c(a, b, Matx_MatMulOp()); + return reinterpret_cast(c); +} + + + +//////////////////////////////// KeyPoint /////////////////////////////// + +inline +KeyPoint::KeyPoint() + : pt(0,0), size(0), angle(-1), response(0), octave(0), class_id(-1) {} + +inline +KeyPoint::KeyPoint(Point2f _pt, float _size, float _angle, float _response, int _octave, int _class_id) + : pt(_pt), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {} + +inline +KeyPoint::KeyPoint(float x, float y, float _size, float _angle, float _response, int _octave, int _class_id) + : pt(x, y), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {} + + + +///////////////////////////////// DMatch //////////////////////////////// + +inline +DMatch::DMatch() + : queryIdx(-1), trainIdx(-1), imgIdx(-1), distance(FLT_MAX) {} + +inline +DMatch::DMatch(int _queryIdx, int _trainIdx, float _distance) + : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(-1), distance(_distance) {} + +inline +DMatch::DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance) + : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(_imgIdx), distance(_distance) {} + +inline +bool DMatch::operator < (const DMatch &m) const +{ + return distance < m.distance; +} + + + +////////////////////////////// TermCriteria ///////////////////////////// + +inline +TermCriteria::TermCriteria() + : type(0), maxCount(0), epsilon(0) {} + +inline +TermCriteria::TermCriteria(int _type, int _maxCount, double _epsilon) + : type(_type), maxCount(_maxCount), epsilon(_epsilon) {} + +//! @endcond + +} // cv + +#endif //OPENCV_CORE_TYPES_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/types_c.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/types_c.h new file mode 100644 index 0000000..32f3c8c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/types_c.h @@ -0,0 +1,2126 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_TYPES_H +#define OPENCV_CORE_TYPES_H + +#ifdef CV__ENABLE_C_API_CTORS // invalid C API ctors (must be removed) +#if defined(_WIN32) && !defined(CV__SKIP_MESSAGE_MALFORMED_C_API_CTORS) +#error "C API ctors don't work on Win32: https://github.com/opencv/opencv/issues/15990" +#endif +#endif + +//#define CV__VALIDATE_UNUNITIALIZED_VARS 1 // C++11 & GCC only + +#ifdef __cplusplus + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#define CV_STRUCT_INITIALIZER {0,} +#else +#if defined(__GNUC__) && __GNUC__ == 4 // GCC 4.x warns on "= {}" initialization, fixed in GCC 5.0 +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif +#define CV_STRUCT_INITIALIZER {} +#endif + +#else +#define CV_STRUCT_INITIALIZER {0} +#endif + + +#ifdef HAVE_IPL +# ifndef __IPL_H__ +# if defined _WIN32 +# include +# else +# include +# endif +# endif +#elif defined __IPL_H__ +# define HAVE_IPL +#endif + +#include "opencv2/core/cvdef.h" + +#ifndef SKIP_INCLUDES +#include +#include +#include +#include +#endif // SKIP_INCLUDES + +#if defined _WIN32 +# define CV_CDECL __cdecl +# define CV_STDCALL __stdcall +#else +# define CV_CDECL +# define CV_STDCALL +#endif + +#ifndef CV_DEFAULT +# ifdef __cplusplus +# define CV_DEFAULT(val) = val +# else +# define CV_DEFAULT(val) +# endif +#endif + +#ifndef CV_EXTERN_C_FUNCPTR +# ifdef __cplusplus +# define CV_EXTERN_C_FUNCPTR(x) extern "C" { typedef x; } +# else +# define CV_EXTERN_C_FUNCPTR(x) typedef x +# endif +#endif + +#ifndef CVAPI +# define CVAPI(rettype) CV_EXTERN_C CV_EXPORTS rettype CV_CDECL +#endif + +#ifndef CV_IMPL +# define CV_IMPL CV_EXTERN_C +#endif + +#ifdef __cplusplus +# include "opencv2/core.hpp" +#endif + +/** @addtogroup core_c + @{ +*/ + +/** @brief This is the "metatype" used *only* as a function parameter. + +It denotes that the function accepts arrays of multiple types, such as IplImage*, CvMat* or even +CvSeq* sometimes. The particular array type is determined at runtime by analyzing the first 4 +bytes of the header. In C++ interface the role of CvArr is played by InputArray and OutputArray. + */ +typedef void CvArr; + +typedef int CVStatus; + +/** @see cv::Error::Code */ +enum { + CV_StsOk= 0, /**< everything is ok */ + CV_StsBackTrace= -1, /**< pseudo error for back trace */ + CV_StsError= -2, /**< unknown /unspecified error */ + CV_StsInternal= -3, /**< internal error (bad state) */ + CV_StsNoMem= -4, /**< insufficient memory */ + CV_StsBadArg= -5, /**< function arg/param is bad */ + CV_StsBadFunc= -6, /**< unsupported function */ + CV_StsNoConv= -7, /**< iter. didn't converge */ + CV_StsAutoTrace= -8, /**< tracing */ + CV_HeaderIsNull= -9, /**< image header is NULL */ + CV_BadImageSize= -10, /**< image size is invalid */ + CV_BadOffset= -11, /**< offset is invalid */ + CV_BadDataPtr= -12, /**/ + CV_BadStep= -13, /**< image step is wrong, this may happen for a non-continuous matrix */ + CV_BadModelOrChSeq= -14, /**/ + CV_BadNumChannels= -15, /**< bad number of channels, for example, some functions accept only single channel matrices */ + CV_BadNumChannel1U= -16, /**/ + CV_BadDepth= -17, /**< input image depth is not supported by the function */ + CV_BadAlphaChannel= -18, /**/ + CV_BadOrder= -19, /**< number of dimensions is out of range */ + CV_BadOrigin= -20, /**< incorrect input origin */ + CV_BadAlign= -21, /**< incorrect input align */ + CV_BadCallBack= -22, /**/ + CV_BadTileSize= -23, /**/ + CV_BadCOI= -24, /**< input COI is not supported */ + CV_BadROISize= -25, /**< incorrect input roi */ + CV_MaskIsTiled= -26, /**/ + CV_StsNullPtr= -27, /**< null pointer */ + CV_StsVecLengthErr= -28, /**< incorrect vector length */ + CV_StsFilterStructContentErr= -29, /**< incorrect filter structure content */ + CV_StsKernelStructContentErr= -30, /**< incorrect transform kernel content */ + CV_StsFilterOffsetErr= -31, /**< incorrect filter offset value */ + CV_StsBadSize= -201, /**< the input/output structure size is incorrect */ + CV_StsDivByZero= -202, /**< division by zero */ + CV_StsInplaceNotSupported= -203, /**< in-place operation is not supported */ + CV_StsObjectNotFound= -204, /**< request can't be completed */ + CV_StsUnmatchedFormats= -205, /**< formats of input/output arrays differ */ + CV_StsBadFlag= -206, /**< flag is wrong or not supported */ + CV_StsBadPoint= -207, /**< bad CvPoint */ + CV_StsBadMask= -208, /**< bad format of mask (neither 8uC1 nor 8sC1)*/ + CV_StsUnmatchedSizes= -209, /**< sizes of input/output structures do not match */ + CV_StsUnsupportedFormat= -210, /**< the data format/type is not supported by the function*/ + CV_StsOutOfRange= -211, /**< some of parameters are out of range */ + CV_StsParseError= -212, /**< invalid syntax/structure of the parsed file */ + CV_StsNotImplemented= -213, /**< the requested function/feature is not implemented */ + CV_StsBadMemBlock= -214, /**< an allocated block has been corrupted */ + CV_StsAssert= -215, /**< assertion failed */ + CV_GpuNotSupported= -216, /**< no CUDA support */ + CV_GpuApiCallError= -217, /**< GPU API call error */ + CV_OpenGlNotSupported= -218, /**< no OpenGL support */ + CV_OpenGlApiCallError= -219, /**< OpenGL API call error */ + CV_OpenCLApiCallError= -220, /**< OpenCL API call error */ + CV_OpenCLDoubleNotSupported= -221, + CV_OpenCLInitError= -222, /**< OpenCL initialization error */ + CV_OpenCLNoAMDBlasFft= -223 +}; + +/****************************************************************************************\ +* Common macros and inline functions * +\****************************************************************************************/ + +#define CV_SWAP(a,b,t) ((t) = (a), (a) = (b), (b) = (t)) + +/** min & max without jumps */ +#define CV_IMIN(a, b) ((a) ^ (((a)^(b)) & (((a) < (b)) - 1))) + +#define CV_IMAX(a, b) ((a) ^ (((a)^(b)) & (((a) > (b)) - 1))) + +/** absolute value without jumps */ +#ifndef __cplusplus +# define CV_IABS(a) (((a) ^ ((a) < 0 ? -1 : 0)) - ((a) < 0 ? -1 : 0)) +#else +# define CV_IABS(a) abs(a) +#endif +#define CV_CMP(a,b) (((a) > (b)) - ((a) < (b))) +#define CV_SIGN(a) CV_CMP((a),0) + +#define cvInvSqrt(value) ((float)(1./sqrt(value))) +#define cvSqrt(value) ((float)sqrt(value)) + + +/*************** Random number generation *******************/ + +typedef uint64 CvRNG; + +#define CV_RNG_COEFF 4164903690U + +/** @brief Initializes a random number generator state. + +The function initializes a random number generator and returns the state. The pointer to the state +can be then passed to the cvRandInt, cvRandReal and cvRandArr functions. In the current +implementation a multiply-with-carry generator is used. +@param seed 64-bit value used to initiate a random sequence +@sa the C++ class RNG replaced CvRNG. + */ +CV_INLINE CvRNG cvRNG( int64 seed CV_DEFAULT(-1)) +{ + CvRNG rng = seed ? (uint64)seed : (uint64)(int64)-1; + return rng; +} + +/** @brief Returns a 32-bit unsigned integer and updates RNG. + +The function returns a uniformly-distributed random 32-bit unsigned integer and updates the RNG +state. It is similar to the rand() function from the C runtime library, except that OpenCV functions +always generates a 32-bit random number, regardless of the platform. +@param rng CvRNG state initialized by cvRNG. + */ +CV_INLINE unsigned cvRandInt( CvRNG* rng ) +{ + uint64 temp = *rng; + temp = (uint64)(unsigned)temp*CV_RNG_COEFF + (temp >> 32); + *rng = temp; + return (unsigned)temp; +} + +/** @brief Returns a floating-point random number and updates RNG. + +The function returns a uniformly-distributed random floating-point number between 0 and 1 (1 is not +included). +@param rng RNG state initialized by cvRNG + */ +CV_INLINE double cvRandReal( CvRNG* rng ) +{ + return cvRandInt(rng)*2.3283064365386962890625e-10 /* 2^-32 */; +} + +/****************************************************************************************\ +* Image type (IplImage) * +\****************************************************************************************/ + +#ifndef HAVE_IPL + +/* + * The following definitions (until #endif) + * is an extract from IPL headers. + * Copyright (c) 1995 Intel Corporation. + */ +#define IPL_DEPTH_SIGN 0x80000000 + +#define IPL_DEPTH_1U 1 +#define IPL_DEPTH_8U 8 +#define IPL_DEPTH_16U 16 +#define IPL_DEPTH_32F 32 + +#define IPL_DEPTH_8S (IPL_DEPTH_SIGN| 8) +#define IPL_DEPTH_16S (IPL_DEPTH_SIGN|16) +#define IPL_DEPTH_32S (IPL_DEPTH_SIGN|32) + +#define IPL_DATA_ORDER_PIXEL 0 +#define IPL_DATA_ORDER_PLANE 1 + +#define IPL_ORIGIN_TL 0 +#define IPL_ORIGIN_BL 1 + +#define IPL_ALIGN_4BYTES 4 +#define IPL_ALIGN_8BYTES 8 +#define IPL_ALIGN_16BYTES 16 +#define IPL_ALIGN_32BYTES 32 + +#define IPL_ALIGN_DWORD IPL_ALIGN_4BYTES +#define IPL_ALIGN_QWORD IPL_ALIGN_8BYTES + +#define IPL_BORDER_CONSTANT 0 +#define IPL_BORDER_REPLICATE 1 +#define IPL_BORDER_REFLECT 2 +#define IPL_BORDER_WRAP 3 + +#ifdef __cplusplus +typedef struct _IplImage IplImage; +CV_EXPORTS _IplImage cvIplImage(const cv::Mat& m); +#endif + +/** The IplImage is taken from the Intel Image Processing Library, in which the format is native. OpenCV +only supports a subset of possible IplImage formats, as outlined in the parameter list above. + +In addition to the above restrictions, OpenCV handles ROIs differently. OpenCV functions require +that the image size or ROI size of all source and destination images match exactly. On the other +hand, the Intel Image Processing Library processes the area of intersection between the source and +destination images (or ROIs), allowing them to vary independently. +*/ +typedef struct +_IplImage +{ + int nSize; /**< sizeof(IplImage) */ + int ID; /**< version (=0)*/ + int nChannels; /**< Most of OpenCV functions support 1,2,3 or 4 channels */ + int alphaChannel; /**< Ignored by OpenCV */ + int depth; /**< Pixel depth in bits: IPL_DEPTH_8U, IPL_DEPTH_8S, IPL_DEPTH_16S, + IPL_DEPTH_32S, IPL_DEPTH_32F and IPL_DEPTH_64F are supported. */ + char colorModel[4]; /**< Ignored by OpenCV */ + char channelSeq[4]; /**< ditto */ + int dataOrder; /**< 0 - interleaved color channels, 1 - separate color channels. + cvCreateImage can only create interleaved images */ + int origin; /**< 0 - top-left origin, + 1 - bottom-left origin (Windows bitmaps style). */ + int align; /**< Alignment of image rows (4 or 8). + OpenCV ignores it and uses widthStep instead. */ + int width; /**< Image width in pixels. */ + int height; /**< Image height in pixels. */ + struct _IplROI *roi; /**< Image ROI. If NULL, the whole image is selected. */ + struct _IplImage *maskROI; /**< Must be NULL. */ + void *imageId; /**< " " */ + struct _IplTileInfo *tileInfo; /**< " " */ + int imageSize; /**< Image data size in bytes + (==image->height*image->widthStep + in case of interleaved data)*/ + char *imageData; /**< Pointer to aligned image data. */ + int widthStep; /**< Size of aligned image row in bytes. */ + int BorderMode[4]; /**< Ignored by OpenCV. */ + int BorderConst[4]; /**< Ditto. */ + char *imageDataOrigin; /**< Pointer to very origin of image data + (not necessarily aligned) - + needed for correct deallocation */ + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + _IplImage() + { + memset(this, 0, sizeof(*this)); // valid for POD structure + nSize = sizeof(IplImage); + } + _IplImage(const cv::Mat& m) { *this = cvIplImage(m); } +#endif +} +IplImage; + +CV_INLINE IplImage cvIplImage() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + IplImage self = CV_STRUCT_INITIALIZER; self.nSize = sizeof(IplImage); return self; +#else + return _IplImage(); +#endif +} + +typedef struct _IplTileInfo IplTileInfo; + +typedef struct _IplROI +{ + int coi; /**< 0 - no COI (all channels are selected), 1 - 0th channel is selected ...*/ + int xOffset; + int yOffset; + int width; + int height; +} +IplROI; + +typedef struct _IplConvKernel +{ + int nCols; + int nRows; + int anchorX; + int anchorY; + int *values; + int nShiftR; +} +IplConvKernel; + +typedef struct _IplConvKernelFP +{ + int nCols; + int nRows; + int anchorX; + int anchorY; + float *values; +} +IplConvKernelFP; + +#define IPL_IMAGE_HEADER 1 +#define IPL_IMAGE_DATA 2 +#define IPL_IMAGE_ROI 4 + +#endif/*HAVE_IPL*/ + +/** extra border mode */ +#define IPL_BORDER_REFLECT_101 4 +#define IPL_BORDER_TRANSPARENT 5 + +#define IPL_IMAGE_MAGIC_VAL ((int)sizeof(IplImage)) +#define CV_TYPE_NAME_IMAGE "opencv-image" + +#define CV_IS_IMAGE_HDR(img) \ + ((img) != NULL && ((const IplImage*)(img))->nSize == sizeof(IplImage)) + +#define CV_IS_IMAGE(img) \ + (CV_IS_IMAGE_HDR(img) && ((IplImage*)img)->imageData != NULL) + +/** for storing double-precision + floating point data in IplImage's */ +#define IPL_DEPTH_64F 64 + +/** get reference to pixel at (col,row), + for multi-channel images (col) should be multiplied by number of channels */ +#define CV_IMAGE_ELEM( image, elemtype, row, col ) \ + (((elemtype*)((image)->imageData + (image)->widthStep*(row)))[(col)]) + +/****************************************************************************************\ +* Matrix type (CvMat) * +\****************************************************************************************/ + +#define CV_AUTO_STEP 0x7fffffff +#define CV_WHOLE_ARR cvSlice( 0, 0x3fffffff ) + +#define CV_MAGIC_MASK 0xFFFF0000 +#define CV_MAT_MAGIC_VAL 0x42420000 +#define CV_TYPE_NAME_MAT "opencv-matrix" + +#ifdef __cplusplus +typedef struct CvMat CvMat; +CV_INLINE CvMat cvMat(const cv::Mat& m); +#endif + +/** Matrix elements are stored row by row. Element (i, j) (i - 0-based row index, j - 0-based column +index) of a matrix can be retrieved or modified using CV_MAT_ELEM macro: + + uchar pixval = CV_MAT_ELEM(grayimg, uchar, i, j) + CV_MAT_ELEM(cameraMatrix, float, 0, 2) = image.width*0.5f; + +To access multiple-channel matrices, you can use +CV_MAT_ELEM(matrix, type, i, j\*nchannels + channel_idx). + +@deprecated CvMat is now obsolete; consider using Mat instead. + */ +typedef struct CvMat +{ + int type; + int step; + + /* for internal use only */ + int* refcount; + int hdr_refcount; + + union + { + uchar* ptr; + short* s; + int* i; + float* fl; + double* db; + } data; + +#ifdef __cplusplus + union + { + int rows; + int height; + }; + + union + { + int cols; + int width; + }; +#else + int rows; + int cols; +#endif + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvMat() {} + CvMat(const cv::Mat& m) { *this = cvMat(m); } +#endif +} +CvMat; + + +#define CV_IS_MAT_HDR(mat) \ + ((mat) != NULL && \ + (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \ + ((const CvMat*)(mat))->cols > 0 && ((const CvMat*)(mat))->rows > 0) + +#define CV_IS_MAT_HDR_Z(mat) \ + ((mat) != NULL && \ + (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \ + ((const CvMat*)(mat))->cols >= 0 && ((const CvMat*)(mat))->rows >= 0) + +#define CV_IS_MAT(mat) \ + (CV_IS_MAT_HDR(mat) && ((const CvMat*)(mat))->data.ptr != NULL) + +#define CV_IS_MASK_ARR(mat) \ + (((mat)->type & (CV_MAT_TYPE_MASK & ~CV_8SC1)) == 0) + +#define CV_ARE_TYPES_EQ(mat1, mat2) \ + ((((mat1)->type ^ (mat2)->type) & CV_MAT_TYPE_MASK) == 0) + +#define CV_ARE_CNS_EQ(mat1, mat2) \ + ((((mat1)->type ^ (mat2)->type) & CV_MAT_CN_MASK) == 0) + +#define CV_ARE_DEPTHS_EQ(mat1, mat2) \ + ((((mat1)->type ^ (mat2)->type) & CV_MAT_DEPTH_MASK) == 0) + +#define CV_ARE_SIZES_EQ(mat1, mat2) \ + ((mat1)->rows == (mat2)->rows && (mat1)->cols == (mat2)->cols) + +#define CV_IS_MAT_CONST(mat) \ + (((mat)->rows|(mat)->cols) == 1) + +#define IPL2CV_DEPTH(depth) \ + ((((CV_8U)+(CV_16U<<4)+(CV_32F<<8)+(CV_64F<<16)+(CV_8S<<20)+ \ + (CV_16S<<24)+(CV_32S<<28)) >> ((((depth) & 0xF0) >> 2) + \ + (((depth) & IPL_DEPTH_SIGN) ? 20 : 0))) & 15) + +/** Inline constructor. No data is allocated internally!!! + * (Use together with cvCreateData, or use cvCreateMat instead to + * get a matrix with allocated data): + */ +CV_INLINE CvMat cvMat( int rows, int cols, int type, void* data CV_DEFAULT(NULL)) +{ + CvMat m; + + assert( (unsigned)CV_MAT_DEPTH(type) <= CV_64F ); + type = CV_MAT_TYPE(type); + m.type = CV_MAT_MAGIC_VAL | CV_MAT_CONT_FLAG | type; + m.cols = cols; + m.rows = rows; + m.step = m.cols*CV_ELEM_SIZE(type); + m.data.ptr = (uchar*)data; + m.refcount = NULL; + m.hdr_refcount = 0; + + return m; +} + +#ifdef __cplusplus + +CV_INLINE CvMat cvMat(const cv::Mat& m) +{ + CvMat self; + CV_DbgAssert(m.dims <= 2); + self = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data); + self.step = (int)m.step[0]; + self.type = (self.type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG); + return self; +} +CV_INLINE CvMat cvMat() +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMat self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMat(); +#endif +} +CV_INLINE CvMat cvMat(const CvMat& m) +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMat self = CV_STRUCT_INITIALIZER; memcpy(&self, &m, sizeof(self)); return self; +#else + return CvMat(m); +#endif +} + +#endif // __cplusplus + + +#define CV_MAT_ELEM_PTR_FAST( mat, row, col, pix_size ) \ + (assert( (unsigned)(row) < (unsigned)(mat).rows && \ + (unsigned)(col) < (unsigned)(mat).cols ), \ + (mat).data.ptr + (size_t)(mat).step*(row) + (pix_size)*(col)) + +#define CV_MAT_ELEM_PTR( mat, row, col ) \ + CV_MAT_ELEM_PTR_FAST( mat, row, col, CV_ELEM_SIZE((mat).type) ) + +#define CV_MAT_ELEM( mat, elemtype, row, col ) \ + (*(elemtype*)CV_MAT_ELEM_PTR_FAST( mat, row, col, sizeof(elemtype))) + +/** @brief Returns the particular element of single-channel floating-point matrix. + +The function is a fast replacement for cvGetReal2D in the case of single-channel floating-point +matrices. It is faster because it is inline, it does fewer checks for array type and array element +type, and it checks for the row and column ranges only in debug mode. +@param mat Input matrix +@param row The zero-based index of row +@param col The zero-based index of column + */ +CV_INLINE double cvmGet( const CvMat* mat, int row, int col ) +{ + int type; + + type = CV_MAT_TYPE(mat->type); + assert( (unsigned)row < (unsigned)mat->rows && + (unsigned)col < (unsigned)mat->cols ); + + if( type == CV_32FC1 ) + return ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col]; + else + { + assert( type == CV_64FC1 ); + return ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col]; + } +} + +/** @brief Sets a specific element of a single-channel floating-point matrix. + +The function is a fast replacement for cvSetReal2D in the case of single-channel floating-point +matrices. It is faster because it is inline, it does fewer checks for array type and array element +type, and it checks for the row and column ranges only in debug mode. +@param mat The matrix +@param row The zero-based index of row +@param col The zero-based index of column +@param value The new value of the matrix element + */ +CV_INLINE void cvmSet( CvMat* mat, int row, int col, double value ) +{ + int type; + type = CV_MAT_TYPE(mat->type); + assert( (unsigned)row < (unsigned)mat->rows && + (unsigned)col < (unsigned)mat->cols ); + + if( type == CV_32FC1 ) + ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = (float)value; + else + { + assert( type == CV_64FC1 ); + ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = value; + } +} + + +CV_INLINE int cvIplDepth( int type ) +{ + int depth = CV_MAT_DEPTH(type); + return CV_ELEM_SIZE1(depth)*8 | (depth == CV_8S || depth == CV_16S || + depth == CV_32S ? IPL_DEPTH_SIGN : 0); +} + + +/****************************************************************************************\ +* Multi-dimensional dense array (CvMatND) * +\****************************************************************************************/ + +#define CV_MATND_MAGIC_VAL 0x42430000 +#define CV_TYPE_NAME_MATND "opencv-nd-matrix" + +#define CV_MAX_DIM 32 + +#ifdef __cplusplus +typedef struct CvMatND CvMatND; +CV_EXPORTS CvMatND cvMatND(const cv::Mat& m); +#endif + +/** + @deprecated consider using cv::Mat instead + */ +typedef struct +CvMatND +{ + int type; + int dims; + + int* refcount; + int hdr_refcount; + + union + { + uchar* ptr; + float* fl; + double* db; + int* i; + short* s; + } data; + + struct + { + int size; + int step; + } + dim[CV_MAX_DIM]; + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvMatND() {} + CvMatND(const cv::Mat& m) { *this = cvMatND(m); } +#endif +} +CvMatND; + + +CV_INLINE CvMatND cvMatND() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvMatND self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMatND(); +#endif +} + +#define CV_IS_MATND_HDR(mat) \ + ((mat) != NULL && (((const CvMatND*)(mat))->type & CV_MAGIC_MASK) == CV_MATND_MAGIC_VAL) + +#define CV_IS_MATND(mat) \ + (CV_IS_MATND_HDR(mat) && ((const CvMatND*)(mat))->data.ptr != NULL) + + +/****************************************************************************************\ +* Multi-dimensional sparse array (CvSparseMat) * +\****************************************************************************************/ + +#define CV_SPARSE_MAT_MAGIC_VAL 0x42440000 +#define CV_TYPE_NAME_SPARSE_MAT "opencv-sparse-matrix" + +struct CvSet; + +typedef struct CvSparseMat +{ + int type; + int dims; + int* refcount; + int hdr_refcount; + + struct CvSet* heap; + void** hashtable; + int hashsize; + int valoffset; + int idxoffset; + int size[CV_MAX_DIM]; + +#ifdef __cplusplus + CV_EXPORTS void copyToSparseMat(cv::SparseMat& m) const; +#endif +} +CvSparseMat; + +#ifdef __cplusplus +CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m); +#endif + +#define CV_IS_SPARSE_MAT_HDR(mat) \ + ((mat) != NULL && \ + (((const CvSparseMat*)(mat))->type & CV_MAGIC_MASK) == CV_SPARSE_MAT_MAGIC_VAL) + +#define CV_IS_SPARSE_MAT(mat) \ + CV_IS_SPARSE_MAT_HDR(mat) + +/**************** iteration through a sparse array *****************/ + +typedef struct CvSparseNode +{ + unsigned hashval; + struct CvSparseNode* next; +} +CvSparseNode; + +typedef struct CvSparseMatIterator +{ + CvSparseMat* mat; + CvSparseNode* node; + int curidx; +} +CvSparseMatIterator; + +#define CV_NODE_VAL(mat,node) ((void*)((uchar*)(node) + (mat)->valoffset)) +#define CV_NODE_IDX(mat,node) ((int*)((uchar*)(node) + (mat)->idxoffset)) + +/****************************************************************************************\ +* Histogram * +\****************************************************************************************/ + +typedef int CvHistType; + +#define CV_HIST_MAGIC_VAL 0x42450000 +#define CV_HIST_UNIFORM_FLAG (1 << 10) + +/** indicates whether bin ranges are set already or not */ +#define CV_HIST_RANGES_FLAG (1 << 11) + +#define CV_HIST_ARRAY 0 +#define CV_HIST_SPARSE 1 +#define CV_HIST_TREE CV_HIST_SPARSE + +/** should be used as a parameter only, + it turns to CV_HIST_UNIFORM_FLAG of hist->type */ +#define CV_HIST_UNIFORM 1 + +typedef struct CvHistogram +{ + int type; + CvArr* bins; + float thresh[CV_MAX_DIM][2]; /**< For uniform histograms. */ + float** thresh2; /**< For non-uniform histograms. */ + CvMatND mat; /**< Embedded matrix header for array histograms. */ +} +CvHistogram; + +#define CV_IS_HIST( hist ) \ + ((hist) != NULL && \ + (((CvHistogram*)(hist))->type & CV_MAGIC_MASK) == CV_HIST_MAGIC_VAL && \ + (hist)->bins != NULL) + +#define CV_IS_UNIFORM_HIST( hist ) \ + (((hist)->type & CV_HIST_UNIFORM_FLAG) != 0) + +#define CV_IS_SPARSE_HIST( hist ) \ + CV_IS_SPARSE_MAT((hist)->bins) + +#define CV_HIST_HAS_RANGES( hist ) \ + (((hist)->type & CV_HIST_RANGES_FLAG) != 0) + +/****************************************************************************************\ +* Other supplementary data type definitions * +\****************************************************************************************/ + +/*************************************** CvRect *****************************************/ +/** @sa Rect_ */ +typedef struct CvRect +{ + int x; + int y; + int width; + int height; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvRect() __attribute__(( warning("Non-initialized variable") )) {}; + template CvRect(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 4); + x = y = width = height = 0; + if (list.size() == 4) + { + x = list.begin()[0]; y = list.begin()[1]; width = list.begin()[2]; height = list.begin()[3]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvRect(int _x = 0, int _y = 0, int w = 0, int h = 0): x(_x), y(_y), width(w), height(h) {} + template + CvRect(const cv::Rect_<_Tp>& r): x(cv::saturate_cast(r.x)), y(cv::saturate_cast(r.y)), width(cv::saturate_cast(r.width)), height(cv::saturate_cast(r.height)) {} +#endif +#ifdef __cplusplus + template + operator cv::Rect_<_Tp>() const { return cv::Rect_<_Tp>((_Tp)x, (_Tp)y, (_Tp)width, (_Tp)height); } +#endif +} +CvRect; + +/** constructs CvRect structure. */ +CV_INLINE CvRect cvRect( int x, int y, int width, int height ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvRect r = {x, y, width, height}; +#else + CvRect r(x, y , width, height); +#endif + return r; +} +#ifdef __cplusplus +CV_INLINE CvRect cvRect(const cv::Rect& rc) { return cvRect(rc.x, rc.y, rc.width, rc.height); } +#endif + +CV_INLINE IplROI cvRectToROI( CvRect rect, int coi ) +{ + IplROI roi; + roi.xOffset = rect.x; + roi.yOffset = rect.y; + roi.width = rect.width; + roi.height = rect.height; + roi.coi = coi; + + return roi; +} + + +CV_INLINE CvRect cvROIToRect( IplROI roi ) +{ + return cvRect( roi.xOffset, roi.yOffset, roi.width, roi.height ); +} + +/*********************************** CvTermCriteria *************************************/ + +#define CV_TERMCRIT_ITER 1 +#define CV_TERMCRIT_NUMBER CV_TERMCRIT_ITER +#define CV_TERMCRIT_EPS 2 + +/** @sa TermCriteria + */ +typedef struct CvTermCriteria +{ + int type; /**< may be combination of + CV_TERMCRIT_ITER + CV_TERMCRIT_EPS */ + int max_iter; + double epsilon; +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvTermCriteria(int _type = 0, int _iter = 0, double _eps = 0) : type(_type), max_iter(_iter), epsilon(_eps) {} + CvTermCriteria(const cv::TermCriteria& t) : type(t.type), max_iter(t.maxCount), epsilon(t.epsilon) {} +#endif +#ifdef __cplusplus + operator cv::TermCriteria() const { return cv::TermCriteria(type, max_iter, epsilon); } +#endif +} +CvTermCriteria; + +CV_INLINE CvTermCriteria cvTermCriteria( int type, int max_iter, double epsilon ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvTermCriteria t = { type, max_iter, (float)epsilon}; +#else + CvTermCriteria t(type, max_iter, epsilon); +#endif + return t; +} +#ifdef __cplusplus +CV_INLINE CvTermCriteria cvTermCriteria(const cv::TermCriteria& t) { return cvTermCriteria(t.type, t.maxCount, t.epsilon); } +#endif + + +/******************************* CvPoint and variants ***********************************/ + +typedef struct CvPoint +{ + int x; + int y; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvPoint(int _x = 0, int _y = 0): x(_x), y(_y) {} + template + CvPoint(const cv::Point_<_Tp>& pt): x((int)pt.x), y((int)pt.y) {} +#endif +#ifdef __cplusplus + template + operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); } +#endif +} +CvPoint; + +/** constructs CvPoint structure. */ +CV_INLINE CvPoint cvPoint( int x, int y ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint p = {x, y}; +#else + CvPoint p(x, y); +#endif + return p; +} +#ifdef __cplusplus +CV_INLINE CvPoint cvPoint(const cv::Point& pt) { return cvPoint(pt.x, pt.y); } +#endif + +typedef struct CvPoint2D32f +{ + float x; + float y; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint2D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint2D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvPoint2D32f(float _x = 0, float _y = 0): x(_x), y(_y) {} + template + CvPoint2D32f(const cv::Point_<_Tp>& pt): x((float)pt.x), y((float)pt.y) {} +#endif +#ifdef __cplusplus + template + operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); } +#endif +} +CvPoint2D32f; + +/** constructs CvPoint2D32f structure. */ +CV_INLINE CvPoint2D32f cvPoint2D32f( double x, double y ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint2D32f p = { (float)x, (float)y }; +#else + CvPoint2D32f p((float)x, (float)y); +#endif + return p; +} + +#ifdef __cplusplus +template +CvPoint2D32f cvPoint2D32f(const cv::Point_<_Tp>& pt) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint2D32f p = { (float)pt.x, (float)pt.y }; +#else + CvPoint2D32f p((float)pt.x, (float)pt.y); +#endif + return p; +} +#endif + +/** converts CvPoint to CvPoint2D32f. */ +CV_INLINE CvPoint2D32f cvPointTo32f( CvPoint point ) +{ + return cvPoint2D32f( (float)point.x, (float)point.y ); +} + +/** converts CvPoint2D32f to CvPoint. */ +CV_INLINE CvPoint cvPointFrom32f( CvPoint2D32f point ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint ipt = { cvRound(point.x), cvRound(point.y) }; +#else + CvPoint ipt(cvRound(point.x), cvRound(point.y)); +#endif + return ipt; +} + + +typedef struct CvPoint3D32f +{ + float x; + float y; + float z; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint3D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint3D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 3); + x = y = z = 0; + if (list.size() == 3) + { + x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvPoint3D32f(float _x = 0, float _y = 0, float _z = 0): x(_x), y(_y), z(_z) {} + template + CvPoint3D32f(const cv::Point3_<_Tp>& pt): x((float)pt.x), y((float)pt.y), z((float)pt.z) {} +#endif +#ifdef __cplusplus + template + operator cv::Point3_<_Tp>() const { return cv::Point3_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y), cv::saturate_cast<_Tp>(z)); } +#endif +} +CvPoint3D32f; + +/** constructs CvPoint3D32f structure. */ +CV_INLINE CvPoint3D32f cvPoint3D32f( double x, double y, double z ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint3D32f p = { (float)x, (float)y, (float)z }; +#else + CvPoint3D32f p((float)x, (float)y, (float)z); +#endif + return p; +} + +#ifdef __cplusplus +template +CvPoint3D32f cvPoint3D32f(const cv::Point3_<_Tp>& pt) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvPoint3D32f p = { (float)pt.x, (float)pt.y, (float)pt.z }; +#else + CvPoint3D32f p((float)pt.x, (float)pt.y, (float)pt.z); +#endif + return p; +} +#endif + + +typedef struct CvPoint2D64f +{ + double x; + double y; +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint2D64f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint2D64f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + x = y = 0; + if (list.size() == 2) + { + x = list.begin()[0]; y = list.begin()[1]; + } + }; +#endif +} +CvPoint2D64f; + +/** constructs CvPoint2D64f structure.*/ +CV_INLINE CvPoint2D64f cvPoint2D64f( double x, double y ) +{ + CvPoint2D64f p = { x, y }; + return p; +} + + +typedef struct CvPoint3D64f +{ + double x; + double y; + double z; +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvPoint3D64f() __attribute__(( warning("Non-initialized variable") )) {} + template CvPoint3D64f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 3); + x = y = z = 0; + if (list.size() == 3) + { + x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2]; + } + }; +#endif +} +CvPoint3D64f; + +/** constructs CvPoint3D64f structure. */ +CV_INLINE CvPoint3D64f cvPoint3D64f( double x, double y, double z ) +{ + CvPoint3D64f p = { x, y, z }; + return p; +} + + +/******************************** CvSize's & CvBox **************************************/ + +typedef struct CvSize +{ + int width; + int height; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSize() __attribute__(( warning("Non-initialized variable") )) {} + template CvSize(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + width = 0; height = 0; + if (list.size() == 2) + { + width = list.begin()[0]; height = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvSize(int w = 0, int h = 0): width(w), height(h) {} + template + CvSize(const cv::Size_<_Tp>& sz): width(cv::saturate_cast(sz.width)), height(cv::saturate_cast(sz.height)) {} +#endif +#ifdef __cplusplus + template + operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); } +#endif +} +CvSize; + +/** constructs CvSize structure. */ +CV_INLINE CvSize cvSize( int width, int height ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize s = { width, height }; +#else + CvSize s(width, height); +#endif + return s; +} + +#ifdef __cplusplus +CV_INLINE CvSize cvSize(const cv::Size& sz) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize s = { sz.width, sz.height }; +#else + CvSize s(sz.width, sz.height); +#endif + return s; +} +#endif + +typedef struct CvSize2D32f +{ + float width; + float height; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSize2D32f() __attribute__(( warning("Non-initialized variable") )) {} + template CvSize2D32f(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + width = 0; height = 0; + if (list.size() == 2) + { + width = list.begin()[0]; height = list.begin()[1]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvSize2D32f(float w = 0, float h = 0): width(w), height(h) {} + template + CvSize2D32f(const cv::Size_<_Tp>& sz): width(cv::saturate_cast(sz.width)), height(cv::saturate_cast(sz.height)) {} +#endif +#ifdef __cplusplus + template + operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); } +#endif +} +CvSize2D32f; + +/** constructs CvSize2D32f structure. */ +CV_INLINE CvSize2D32f cvSize2D32f( double width, double height ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize2D32f s = { (float)width, (float)height }; +#else + CvSize2D32f s((float)width, (float)height); +#endif + return s; +} +#ifdef __cplusplus +template +CvSize2D32f cvSize2D32f(const cv::Size_<_Tp>& sz) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvSize2D32f s = { (float)sz.width, (float)sz.height }; +#else + CvSize2D32f s((float)sz.width, (float)sz.height); +#endif + return s; +} +#endif + +/** @sa RotatedRect + */ +typedef struct CvBox2D +{ + CvPoint2D32f center; /**< Center of the box. */ + CvSize2D32f size; /**< Box width and length. */ + float angle; /**< Angle between the horizontal axis */ + /**< and the first side (i.e. length) in degrees */ + +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) : center(c), size(s), angle(a) {} + CvBox2D(const cv::RotatedRect& rr) : center(rr.center), size(rr.size), angle(rr.angle) {} +#endif +#ifdef __cplusplus + operator cv::RotatedRect() const { return cv::RotatedRect(center, size, angle); } +#endif +} +CvBox2D; + + +#ifdef __cplusplus +CV_INLINE CvBox2D cvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) +{ + CvBox2D self; + self.center = c; + self.size = s; + self.angle = a; + return self; +} +CV_INLINE CvBox2D cvBox2D(const cv::RotatedRect& rr) +{ + CvBox2D self; + self.center = cvPoint2D32f(rr.center); + self.size = cvSize2D32f(rr.size); + self.angle = rr.angle; + return self; +} +#endif + + +/** Line iterator state: */ +typedef struct CvLineIterator +{ + /** Pointer to the current point: */ + uchar* ptr; + + /* Bresenham algorithm state: */ + int err; + int plus_delta; + int minus_delta; + int plus_step; + int minus_step; +} +CvLineIterator; + + + +/************************************* CvSlice ******************************************/ +#define CV_WHOLE_SEQ_END_INDEX 0x3fffffff +#define CV_WHOLE_SEQ cvSlice(0, CV_WHOLE_SEQ_END_INDEX) + +typedef struct CvSlice +{ + int start_index, end_index; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvSlice() __attribute__(( warning("Non-initialized variable") )) {} + template CvSlice(const std::initializer_list<_Tp> list) + { + CV_Assert(list.size() == 0 || list.size() == 2); + start_index = end_index = 0; + if (list.size() == 2) + { + start_index = list.begin()[0]; end_index = list.begin()[1]; + } + }; +#endif +#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__) + CvSlice(int start = 0, int end = 0) : start_index(start), end_index(end) {} + CvSlice(const cv::Range& r) { *this = (r.start != INT_MIN && r.end != INT_MAX) ? CvSlice(r.start, r.end) : CvSlice(0, CV_WHOLE_SEQ_END_INDEX); } + operator cv::Range() const { return (start_index == 0 && end_index == CV_WHOLE_SEQ_END_INDEX ) ? cv::Range::all() : cv::Range(start_index, end_index); } +#endif +} +CvSlice; + +CV_INLINE CvSlice cvSlice( int start, int end ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__)) + CvSlice slice = { start, end }; +#else + CvSlice slice(start, end); +#endif + return slice; +} + +#if defined(__cplusplus) +CV_INLINE CvSlice cvSlice(const cv::Range& r) +{ + CvSlice slice = (r.start != INT_MIN && r.end != INT_MAX) ? cvSlice(r.start, r.end) : cvSlice(0, CV_WHOLE_SEQ_END_INDEX); + return slice; +} +#endif + + +/************************************* CvScalar *****************************************/ +/** @sa Scalar_ + */ +typedef struct CvScalar +{ + double val[4]; + +#ifdef CV__VALIDATE_UNUNITIALIZED_VARS + CvScalar() __attribute__(( warning("Non-initialized variable") )) {} + CvScalar(const std::initializer_list list) + { + CV_Assert(list.size() == 0 || list.size() == 4); + val[0] = val[1] = val[2] = val[3] = 0; + if (list.size() == 4) + { + val[0] = list.begin()[0]; val[1] = list.begin()[1]; val[2] = list.begin()[2]; val[3] = list.begin()[3]; + } + }; +#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) + CvScalar() {} + CvScalar(double d0, double d1 = 0, double d2 = 0, double d3 = 0) { val[0] = d0; val[1] = d1; val[2] = d2; val[3] = d3; } + template + CvScalar(const cv::Scalar_<_Tp>& s) { val[0] = s.val[0]; val[1] = s.val[1]; val[2] = s.val[2]; val[3] = s.val[3]; } + template + CvScalar(const cv::Vec<_Tp, cn>& v) + { + int i; + for( i = 0; i < (cn < 4 ? cn : 4); i++ ) val[i] = v.val[i]; + for( ; i < 4; i++ ) val[i] = 0; + } +#endif +#ifdef __cplusplus + template + operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); } +#endif +} +CvScalar; + +CV_INLINE CvScalar cvScalar( double val0, double val1 CV_DEFAULT(0), + double val2 CV_DEFAULT(0), double val3 CV_DEFAULT(0)) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = val0; scalar.val[1] = val1; + scalar.val[2] = val2; scalar.val[3] = val3; + return scalar; +} + +#ifdef __cplusplus +CV_INLINE CvScalar cvScalar() +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = scalar.val[1] = scalar.val[2] = scalar.val[3] = 0; + return scalar; +} +CV_INLINE CvScalar cvScalar(const cv::Scalar& s) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = s.val[0]; + scalar.val[1] = s.val[1]; + scalar.val[2] = s.val[2]; + scalar.val[3] = s.val[3]; + return scalar; +} +#endif + +CV_INLINE CvScalar cvRealScalar( double val0 ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = val0; + scalar.val[1] = scalar.val[2] = scalar.val[3] = 0; + return scalar; +} + +CV_INLINE CvScalar cvScalarAll( double val0123 ) +{ +#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)) + CvScalar scalar = CV_STRUCT_INITIALIZER; +#else + CvScalar scalar; +#endif + scalar.val[0] = val0123; + scalar.val[1] = val0123; + scalar.val[2] = val0123; + scalar.val[3] = val0123; + return scalar; +} + +/****************************************************************************************\ +* Dynamic Data structures * +\****************************************************************************************/ + +/******************************** Memory storage ****************************************/ + +typedef struct CvMemBlock +{ + struct CvMemBlock* prev; + struct CvMemBlock* next; +} +CvMemBlock; + +#define CV_STORAGE_MAGIC_VAL 0x42890000 + +typedef struct CvMemStorage +{ + int signature; + CvMemBlock* bottom; /**< First allocated block. */ + CvMemBlock* top; /**< Current memory block - top of the stack. */ + struct CvMemStorage* parent; /**< We get new blocks from parent as needed. */ + int block_size; /**< Block size. */ + int free_space; /**< Remaining free space in current block. */ +} +CvMemStorage; + +#define CV_IS_STORAGE(storage) \ + ((storage) != NULL && \ + (((CvMemStorage*)(storage))->signature & CV_MAGIC_MASK) == CV_STORAGE_MAGIC_VAL) + + +typedef struct CvMemStoragePos +{ + CvMemBlock* top; + int free_space; +} +CvMemStoragePos; + + +/*********************************** Sequence *******************************************/ + +typedef struct CvSeqBlock +{ + struct CvSeqBlock* prev; /**< Previous sequence block. */ + struct CvSeqBlock* next; /**< Next sequence block. */ + int start_index; /**< Index of the first element in the block + */ + /**< sequence->first->start_index. */ + int count; /**< Number of elements in the block. */ + schar* data; /**< Pointer to the first element of the block. */ +} +CvSeqBlock; + + +#define CV_TREE_NODE_FIELDS(node_type) \ + int flags; /**< Miscellaneous flags. */ \ + int header_size; /**< Size of sequence header. */ \ + struct node_type* h_prev; /**< Previous sequence. */ \ + struct node_type* h_next; /**< Next sequence. */ \ + struct node_type* v_prev; /**< 2nd previous sequence. */ \ + struct node_type* v_next /**< 2nd next sequence. */ + +/** + Read/Write sequence. + Elements can be dynamically inserted to or deleted from the sequence. +*/ +#define CV_SEQUENCE_FIELDS() \ + CV_TREE_NODE_FIELDS(CvSeq); \ + int total; /**< Total number of elements. */ \ + int elem_size; /**< Size of sequence element in bytes. */ \ + schar* block_max; /**< Maximal bound of the last block. */ \ + schar* ptr; /**< Current write pointer. */ \ + int delta_elems; /**< Grow seq this many at a time. */ \ + CvMemStorage* storage; /**< Where the seq is stored. */ \ + CvSeqBlock* free_blocks; /**< Free blocks list. */ \ + CvSeqBlock* first; /**< Pointer to the first sequence block. */ + +typedef struct CvSeq +{ + CV_SEQUENCE_FIELDS() +} +CvSeq; + +#define CV_TYPE_NAME_SEQ "opencv-sequence" +#define CV_TYPE_NAME_SEQ_TREE "opencv-sequence-tree" + +/*************************************** Set ********************************************/ +/** @brief Set + Order is not preserved. There can be gaps between sequence elements. + After the element has been inserted it stays in the same place all the time. + The MSB(most-significant or sign bit) of the first field (flags) is 0 iff the element exists. +*/ +#define CV_SET_ELEM_FIELDS(elem_type) \ + int flags; \ + struct elem_type* next_free; + +typedef struct CvSetElem +{ + CV_SET_ELEM_FIELDS(CvSetElem) +} +CvSetElem; + +#define CV_SET_FIELDS() \ + CV_SEQUENCE_FIELDS() \ + CvSetElem* free_elems; \ + int active_count; + +typedef struct CvSet +{ + CV_SET_FIELDS() +} +CvSet; + + +#define CV_SET_ELEM_IDX_MASK ((1 << 26) - 1) +#define CV_SET_ELEM_FREE_FLAG (1 << (sizeof(int)*8-1)) + +/** Checks whether the element pointed by ptr belongs to a set or not */ +#define CV_IS_SET_ELEM( ptr ) (((CvSetElem*)(ptr))->flags >= 0) + +/************************************* Graph ********************************************/ + +/** @name Graph + +We represent a graph as a set of vertices. Vertices contain their adjacency lists (more exactly, +pointers to first incoming or outcoming edge (or 0 if isolated vertex)). Edges are stored in +another set. There is a singly-linked list of incoming/outcoming edges for each vertex. + +Each edge consists of: + +- Two pointers to the starting and ending vertices (vtx[0] and vtx[1] respectively). + + A graph may be oriented or not. In the latter case, edges between vertex i to vertex j are not +distinguished during search operations. + +- Two pointers to next edges for the starting and ending vertices, where next[0] points to the +next edge in the vtx[0] adjacency list and next[1] points to the next edge in the vtx[1] +adjacency list. + +@see CvGraphEdge, CvGraphVtx, CvGraphVtx2D, CvGraph +@{ +*/ +#define CV_GRAPH_EDGE_FIELDS() \ + int flags; \ + float weight; \ + struct CvGraphEdge* next[2]; \ + struct CvGraphVtx* vtx[2]; + + +#define CV_GRAPH_VERTEX_FIELDS() \ + int flags; \ + struct CvGraphEdge* first; + + +typedef struct CvGraphEdge +{ + CV_GRAPH_EDGE_FIELDS() +} +CvGraphEdge; + +typedef struct CvGraphVtx +{ + CV_GRAPH_VERTEX_FIELDS() +} +CvGraphVtx; + +typedef struct CvGraphVtx2D +{ + CV_GRAPH_VERTEX_FIELDS() + CvPoint2D32f* ptr; +} +CvGraphVtx2D; + +/** + Graph is "derived" from the set (this is set a of vertices) + and includes another set (edges) +*/ +#define CV_GRAPH_FIELDS() \ + CV_SET_FIELDS() \ + CvSet* edges; + +typedef struct CvGraph +{ + CV_GRAPH_FIELDS() +} +CvGraph; + +#define CV_TYPE_NAME_GRAPH "opencv-graph" + +/** @} */ + +/*********************************** Chain/Contour *************************************/ + +typedef struct CvChain +{ + CV_SEQUENCE_FIELDS() + CvPoint origin; +} +CvChain; + +#define CV_CONTOUR_FIELDS() \ + CV_SEQUENCE_FIELDS() \ + CvRect rect; \ + int color; \ + int reserved[3]; + +typedef struct CvContour +{ + CV_CONTOUR_FIELDS() +} +CvContour; + +typedef CvContour CvPoint2DSeq; + +/****************************************************************************************\ +* Sequence types * +\****************************************************************************************/ + +#define CV_SEQ_MAGIC_VAL 0x42990000 + +#define CV_IS_SEQ(seq) \ + ((seq) != NULL && (((CvSeq*)(seq))->flags & CV_MAGIC_MASK) == CV_SEQ_MAGIC_VAL) + +#define CV_SET_MAGIC_VAL 0x42980000 +#define CV_IS_SET(set) \ + ((set) != NULL && (((CvSeq*)(set))->flags & CV_MAGIC_MASK) == CV_SET_MAGIC_VAL) + +#define CV_SEQ_ELTYPE_BITS 12 +#define CV_SEQ_ELTYPE_MASK ((1 << CV_SEQ_ELTYPE_BITS) - 1) + +#define CV_SEQ_ELTYPE_POINT CV_32SC2 /**< (x,y) */ +#define CV_SEQ_ELTYPE_CODE CV_8UC1 /**< freeman code: 0..7 */ +#define CV_SEQ_ELTYPE_GENERIC 0 +#define CV_SEQ_ELTYPE_PTR CV_MAKE_TYPE(CV_8U, 8 /*sizeof(void*)*/) +#define CV_SEQ_ELTYPE_PPOINT CV_SEQ_ELTYPE_PTR /**< &(x,y) */ +#define CV_SEQ_ELTYPE_INDEX CV_32SC1 /**< #(x,y) */ +#define CV_SEQ_ELTYPE_GRAPH_EDGE 0 /**< &next_o, &next_d, &vtx_o, &vtx_d */ +#define CV_SEQ_ELTYPE_GRAPH_VERTEX 0 /**< first_edge, &(x,y) */ +#define CV_SEQ_ELTYPE_TRIAN_ATR 0 /**< vertex of the binary tree */ +#define CV_SEQ_ELTYPE_CONNECTED_COMP 0 /**< connected component */ +#define CV_SEQ_ELTYPE_POINT3D CV_32FC3 /**< (x,y,z) */ + +#define CV_SEQ_KIND_BITS 2 +#define CV_SEQ_KIND_MASK (((1 << CV_SEQ_KIND_BITS) - 1)<flags & CV_SEQ_ELTYPE_MASK) +#define CV_SEQ_KIND( seq ) ((seq)->flags & CV_SEQ_KIND_MASK ) + +/** flag checking */ +#define CV_IS_SEQ_INDEX( seq ) ((CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_INDEX) && \ + (CV_SEQ_KIND(seq) == CV_SEQ_KIND_GENERIC)) + +#define CV_IS_SEQ_CURVE( seq ) (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE) +#define CV_IS_SEQ_CLOSED( seq ) (((seq)->flags & CV_SEQ_FLAG_CLOSED) != 0) +#define CV_IS_SEQ_CONVEX( seq ) 0 +#define CV_IS_SEQ_HOLE( seq ) (((seq)->flags & CV_SEQ_FLAG_HOLE) != 0) +#define CV_IS_SEQ_SIMPLE( seq ) 1 + +/** type checking macros */ +#define CV_IS_SEQ_POINT_SET( seq ) \ + ((CV_SEQ_ELTYPE(seq) == CV_32SC2 || CV_SEQ_ELTYPE(seq) == CV_32FC2)) + +#define CV_IS_SEQ_POINT_SUBSET( seq ) \ + (CV_IS_SEQ_INDEX( seq ) || CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_PPOINT) + +#define CV_IS_SEQ_POLYLINE( seq ) \ + (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && CV_IS_SEQ_POINT_SET(seq)) + +#define CV_IS_SEQ_POLYGON( seq ) \ + (CV_IS_SEQ_POLYLINE(seq) && CV_IS_SEQ_CLOSED(seq)) + +#define CV_IS_SEQ_CHAIN( seq ) \ + (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && (seq)->elem_size == 1) + +#define CV_IS_SEQ_CONTOUR( seq ) \ + (CV_IS_SEQ_CLOSED(seq) && (CV_IS_SEQ_POLYLINE(seq) || CV_IS_SEQ_CHAIN(seq))) + +#define CV_IS_SEQ_CHAIN_CONTOUR( seq ) \ + (CV_IS_SEQ_CHAIN( seq ) && CV_IS_SEQ_CLOSED( seq )) + +#define CV_IS_SEQ_POLYGON_TREE( seq ) \ + (CV_SEQ_ELTYPE (seq) == CV_SEQ_ELTYPE_TRIAN_ATR && \ + CV_SEQ_KIND( seq ) == CV_SEQ_KIND_BIN_TREE ) + +#define CV_IS_GRAPH( seq ) \ + (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_GRAPH) + +#define CV_IS_GRAPH_ORIENTED( seq ) \ + (((seq)->flags & CV_GRAPH_FLAG_ORIENTED) != 0) + +#define CV_IS_SUBDIV2D( seq ) \ + (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_SUBDIV2D) + +/****************************************************************************************/ +/* Sequence writer & reader */ +/****************************************************************************************/ + +#define CV_SEQ_WRITER_FIELDS() \ + int header_size; \ + CvSeq* seq; /**< the sequence written */ \ + CvSeqBlock* block; /**< current block */ \ + schar* ptr; /**< pointer to free space */ \ + schar* block_min; /**< pointer to the beginning of block*/\ + schar* block_max; /**< pointer to the end of block */ + +typedef struct CvSeqWriter +{ + CV_SEQ_WRITER_FIELDS() +} +CvSeqWriter; + + +#define CV_SEQ_READER_FIELDS() \ + int header_size; \ + CvSeq* seq; /**< sequence, beign read */ \ + CvSeqBlock* block; /**< current block */ \ + schar* ptr; /**< pointer to element be read next */ \ + schar* block_min; /**< pointer to the beginning of block */\ + schar* block_max; /**< pointer to the end of block */ \ + int delta_index;/**< = seq->first->start_index */ \ + schar* prev_elem; /**< pointer to previous element */ + +typedef struct CvSeqReader +{ + CV_SEQ_READER_FIELDS() +} +CvSeqReader; + +/****************************************************************************************/ +/* Operations on sequences */ +/****************************************************************************************/ + +#define CV_SEQ_ELEM( seq, elem_type, index ) \ +/** assert gives some guarantee that parameter is valid */ \ +( assert(sizeof((seq)->first[0]) == sizeof(CvSeqBlock) && \ + (seq)->elem_size == sizeof(elem_type)), \ + (elem_type*)((seq)->first && (unsigned)index < \ + (unsigned)((seq)->first->count) ? \ + (seq)->first->data + (index) * sizeof(elem_type) : \ + cvGetSeqElem( (CvSeq*)(seq), (index) ))) +#define CV_GET_SEQ_ELEM( elem_type, seq, index ) CV_SEQ_ELEM( (seq), elem_type, (index) ) + +/** Add element to sequence: */ +#define CV_WRITE_SEQ_ELEM_VAR( elem_ptr, writer ) \ +{ \ + if( (writer).ptr >= (writer).block_max ) \ + { \ + cvCreateSeqBlock( &writer); \ + } \ + memcpy((writer).ptr, elem_ptr, (writer).seq->elem_size);\ + (writer).ptr += (writer).seq->elem_size; \ +} + +#define CV_WRITE_SEQ_ELEM( elem, writer ) \ +{ \ + assert( (writer).seq->elem_size == sizeof(elem)); \ + if( (writer).ptr >= (writer).block_max ) \ + { \ + cvCreateSeqBlock( &writer); \ + } \ + assert( (writer).ptr <= (writer).block_max - sizeof(elem));\ + memcpy((writer).ptr, &(elem), sizeof(elem)); \ + (writer).ptr += sizeof(elem); \ +} + + +/** Move reader position forward: */ +#define CV_NEXT_SEQ_ELEM( elem_size, reader ) \ +{ \ + if( ((reader).ptr += (elem_size)) >= (reader).block_max ) \ + { \ + cvChangeSeqBlock( &(reader), 1 ); \ + } \ +} + + +/** Move reader position backward: */ +#define CV_PREV_SEQ_ELEM( elem_size, reader ) \ +{ \ + if( ((reader).ptr -= (elem_size)) < (reader).block_min ) \ + { \ + cvChangeSeqBlock( &(reader), -1 ); \ + } \ +} + +/** Read element and move read position forward: */ +#define CV_READ_SEQ_ELEM( elem, reader ) \ +{ \ + assert( (reader).seq->elem_size == sizeof(elem)); \ + memcpy( &(elem), (reader).ptr, sizeof((elem))); \ + CV_NEXT_SEQ_ELEM( sizeof(elem), reader ) \ +} + +/** Read element and move read position backward: */ +#define CV_REV_READ_SEQ_ELEM( elem, reader ) \ +{ \ + assert( (reader).seq->elem_size == sizeof(elem)); \ + memcpy(&(elem), (reader).ptr, sizeof((elem))); \ + CV_PREV_SEQ_ELEM( sizeof(elem), reader ) \ +} + + +#define CV_READ_CHAIN_POINT( _pt, reader ) \ +{ \ + (_pt) = (reader).pt; \ + if( (reader).ptr ) \ + { \ + CV_READ_SEQ_ELEM( (reader).code, (reader)); \ + assert( ((reader).code & ~7) == 0 ); \ + (reader).pt.x += (reader).deltas[(int)(reader).code][0]; \ + (reader).pt.y += (reader).deltas[(int)(reader).code][1]; \ + } \ +} + +#define CV_CURRENT_POINT( reader ) (*((CvPoint*)((reader).ptr))) +#define CV_PREV_POINT( reader ) (*((CvPoint*)((reader).prev_elem))) + +#define CV_READ_EDGE( pt1, pt2, reader ) \ +{ \ + assert( sizeof(pt1) == sizeof(CvPoint) && \ + sizeof(pt2) == sizeof(CvPoint) && \ + reader.seq->elem_size == sizeof(CvPoint)); \ + (pt1) = CV_PREV_POINT( reader ); \ + (pt2) = CV_CURRENT_POINT( reader ); \ + (reader).prev_elem = (reader).ptr; \ + CV_NEXT_SEQ_ELEM( sizeof(CvPoint), (reader)); \ +} + +/************ Graph macros ************/ + +/** Return next graph edge for given vertex: */ +#define CV_NEXT_GRAPH_EDGE( edge, vertex ) \ + (assert((edge)->vtx[0] == (vertex) || (edge)->vtx[1] == (vertex)), \ + (edge)->next[(edge)->vtx[1] == (vertex)]) + + + +/****************************************************************************************\ +* Data structures for persistence (a.k.a serialization) functionality * +\****************************************************************************************/ + +#if 0 + +/** "black box" file storage */ +typedef struct CvFileStorage CvFileStorage; + +/** Storage flags: */ +#define CV_STORAGE_READ 0 +#define CV_STORAGE_WRITE 1 +#define CV_STORAGE_WRITE_TEXT CV_STORAGE_WRITE +#define CV_STORAGE_WRITE_BINARY CV_STORAGE_WRITE +#define CV_STORAGE_APPEND 2 +#define CV_STORAGE_MEMORY 4 +#define CV_STORAGE_FORMAT_MASK (7<<3) +#define CV_STORAGE_FORMAT_AUTO 0 +#define CV_STORAGE_FORMAT_XML 8 +#define CV_STORAGE_FORMAT_YAML 16 +#define CV_STORAGE_FORMAT_JSON 24 +#define CV_STORAGE_BASE64 64 +#define CV_STORAGE_WRITE_BASE64 (CV_STORAGE_BASE64 | CV_STORAGE_WRITE) + +/** @brief List of attributes. : + +In the current implementation, attributes are used to pass extra parameters when writing user +objects (see cvWrite). XML attributes inside tags are not supported, aside from the object type +specification (type_id attribute). +@see cvAttrList, cvAttrValue + */ +typedef struct CvAttrList +{ + const char** attr; /**< NULL-terminated array of (attribute_name,attribute_value) pairs. */ + struct CvAttrList* next; /**< Pointer to next chunk of the attributes list. */ +} +CvAttrList; + +/** initializes CvAttrList structure */ +CV_INLINE CvAttrList cvAttrList( const char** attr CV_DEFAULT(NULL), + CvAttrList* next CV_DEFAULT(NULL) ) +{ + CvAttrList l; + l.attr = attr; + l.next = next; + + return l; +} + +struct CvTypeInfo; + +#define CV_NODE_NONE 0 +#define CV_NODE_INT 1 +#define CV_NODE_INTEGER CV_NODE_INT +#define CV_NODE_REAL 2 +#define CV_NODE_FLOAT CV_NODE_REAL +#define CV_NODE_STR 3 +#define CV_NODE_STRING CV_NODE_STR +#define CV_NODE_REF 4 /**< not used */ +#define CV_NODE_SEQ 5 +#define CV_NODE_MAP 6 +#define CV_NODE_TYPE_MASK 7 + +#define CV_NODE_TYPE(flags) ((flags) & CV_NODE_TYPE_MASK) + +/** file node flags */ +#define CV_NODE_FLOW 8 /**= CV_NODE_SEQ) +#define CV_NODE_IS_FLOW(flags) (((flags) & CV_NODE_FLOW) != 0) +#define CV_NODE_IS_EMPTY(flags) (((flags) & CV_NODE_EMPTY) != 0) +#define CV_NODE_IS_USER(flags) (((flags) & CV_NODE_USER) != 0) +#define CV_NODE_HAS_NAME(flags) (((flags) & CV_NODE_NAMED) != 0) + +#define CV_NODE_SEQ_SIMPLE 256 +#define CV_NODE_SEQ_IS_SIMPLE(seq) (((seq)->flags & CV_NODE_SEQ_SIMPLE) != 0) + +typedef struct CvString +{ + int len; + char* ptr; +} +CvString; + +/** All the keys (names) of elements in the read file storage + are stored in the hash to speed up the lookup operations: */ +typedef struct CvStringHashNode +{ + unsigned hashval; + CvString str; + struct CvStringHashNode* next; +} +CvStringHashNode; + +typedef struct CvGenericHash CvFileNodeHash; + +/** Basic element of the file storage - scalar or collection: */ +typedef struct CvFileNode +{ + int tag; + struct CvTypeInfo* info; /**< type information + (only for user-defined object, for others it is 0) */ + union + { + double f; /**< scalar floating-point number */ + int i; /**< scalar integer number */ + CvString str; /**< text string */ + CvSeq* seq; /**< sequence (ordered collection of file nodes) */ + CvFileNodeHash* map; /**< map (collection of named file nodes) */ + } data; +} +CvFileNode; + +#ifdef __cplusplus +extern "C" { +#endif +typedef int (CV_CDECL *CvIsInstanceFunc)( const void* struct_ptr ); +typedef void (CV_CDECL *CvReleaseFunc)( void** struct_dblptr ); +typedef void* (CV_CDECL *CvReadFunc)( CvFileStorage* storage, CvFileNode* node ); +typedef void (CV_CDECL *CvWriteFunc)( CvFileStorage* storage, const char* name, + const void* struct_ptr, CvAttrList attributes ); +typedef void* (CV_CDECL *CvCloneFunc)( const void* struct_ptr ); +#ifdef __cplusplus +} +#endif + +/** @brief Type information + +The structure contains information about one of the standard or user-defined types. Instances of the +type may or may not contain a pointer to the corresponding CvTypeInfo structure. In any case, there +is a way to find the type info structure for a given object using the cvTypeOf function. +Alternatively, type info can be found by type name using cvFindType, which is used when an object +is read from file storage. The user can register a new type with cvRegisterType that adds the type +information structure into the beginning of the type list. Thus, it is possible to create +specialized types from generic standard types and override the basic methods. + */ +typedef struct CvTypeInfo +{ + int flags; /**< not used */ + int header_size; /**< sizeof(CvTypeInfo) */ + struct CvTypeInfo* prev; /**< previous registered type in the list */ + struct CvTypeInfo* next; /**< next registered type in the list */ + const char* type_name; /**< type name, written to file storage */ + CvIsInstanceFunc is_instance; /**< checks if the passed object belongs to the type */ + CvReleaseFunc release; /**< releases object (memory etc.) */ + CvReadFunc read; /**< reads object from file storage */ + CvWriteFunc write; /**< writes object to file storage */ + CvCloneFunc clone; /**< creates a copy of the object */ +} +CvTypeInfo; +#endif + +/** @} */ + +#endif /*OPENCV_CORE_TYPES_H*/ + +/* End of file. */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utility.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utility.hpp new file mode 100644 index 0000000..77b0ee9 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utility.hpp @@ -0,0 +1,1229 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CORE_UTILITY_H +#define OPENCV_CORE_UTILITY_H + +#ifndef __cplusplus +# error utility.hpp header must be compiled as C++ +#endif + +#if defined(check) +# warning Detected Apple 'check' macro definition, it can cause build conflicts. Please, include this header before any Apple headers. +#endif + +#include "opencv2/core.hpp" +#include + +#include + +#if !defined(_M_CEE) +#include // std::mutex, std::lock_guard +#endif + +namespace cv +{ + +//! @addtogroup core_utils +//! @{ + +/** @brief Automatically Allocated Buffer Class + + The class is used for temporary buffers in functions and methods. + If a temporary buffer is usually small (a few K's of memory), + but its size depends on the parameters, it makes sense to create a small + fixed-size array on stack and use it if it's large enough. If the required buffer size + is larger than the fixed size, another buffer of sufficient size is allocated dynamically + and released after the processing. Therefore, in typical cases, when the buffer size is small, + there is no overhead associated with malloc()/free(). + At the same time, there is no limit on the size of processed data. + + This is what AutoBuffer does. The template takes 2 parameters - type of the buffer elements and + the number of stack-allocated elements. Here is how the class is used: + + \code + void my_func(const cv::Mat& m) + { + cv::AutoBuffer buf(1000); // create automatic buffer containing 1000 floats + + buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used, + // otherwise the buffer of "m.rows" floats will be allocated + // dynamically and deallocated in cv::AutoBuffer destructor + ... + } + \endcode +*/ +#ifdef OPENCV_ENABLE_MEMORY_SANITIZER +template class AutoBuffer +#else +template class AutoBuffer +#endif +{ +public: + typedef _Tp value_type; + + //! the default constructor + AutoBuffer(); + //! constructor taking the real buffer size + explicit AutoBuffer(size_t _size); + + //! the copy constructor + AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf); + //! the assignment operator + AutoBuffer<_Tp, fixed_size>& operator = (const AutoBuffer<_Tp, fixed_size>& buf); + + //! destructor. calls deallocate() + ~AutoBuffer(); + + //! allocates the new buffer of size _size. if the _size is small enough, stack-allocated buffer is used + void allocate(size_t _size); + //! deallocates the buffer if it was dynamically allocated + void deallocate(); + //! resizes the buffer and preserves the content + void resize(size_t _size); + //! returns the current buffer size + size_t size() const; + //! returns pointer to the real buffer, stack-allocated or heap-allocated + inline _Tp* data() { return ptr; } + //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated + inline const _Tp* data() const { return ptr; } + +#if !defined(OPENCV_DISABLE_DEPRECATED_COMPATIBILITY) // use to .data() calls instead + //! returns pointer to the real buffer, stack-allocated or heap-allocated + operator _Tp* () { return ptr; } + //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated + operator const _Tp* () const { return ptr; } +#else + //! returns a reference to the element at specified location. No bounds checking is performed in Release builds. + inline _Tp& operator[] (size_t i) { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; } + //! returns a reference to the element at specified location. No bounds checking is performed in Release builds. + inline const _Tp& operator[] (size_t i) const { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; } +#endif + +protected: + //! pointer to the real buffer, can point to buf if the buffer is small enough + _Tp* ptr; + //! size of the real buffer + size_t sz; + //! pre-allocated buffer. At least 1 element to confirm C++ standard requirements + _Tp buf[(fixed_size > 0) ? fixed_size : 1]; +}; + +/** @brief Sets/resets the break-on-error mode. + +When the break-on-error mode is set, the default error handler issues a hardware exception, which +can make debugging more convenient. + +\return the previous state + */ +CV_EXPORTS bool setBreakOnError(bool flag); + +extern "C" typedef int (*ErrorCallback)( int status, const char* func_name, + const char* err_msg, const char* file_name, + int line, void* userdata ); + + +/** @brief Sets the new error handler and the optional user data. + + The function sets the new error handler, called from cv::error(). + + \param errCallback the new error handler. If NULL, the default error handler is used. + \param userdata the optional user data pointer, passed to the callback. + \param prevUserdata the optional output parameter where the previous user data pointer is stored + + \return the previous error handler +*/ +CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0); + +CV_EXPORTS String tempfile( const char* suffix = 0); +CV_EXPORTS void glob(String pattern, std::vector& result, bool recursive = false); + +/** @brief OpenCV will try to set the number of threads for the next parallel region. + +If threads == 0, OpenCV will disable threading optimizations and run all it's functions +sequentially. Passing threads \< 0 will reset threads number to system default. This function must +be called outside of parallel region. + +OpenCV will try to run its functions with specified threads number, but some behaviour differs from +framework: +- `TBB` - User-defined parallel constructions will run with the same threads number, if + another is not specified. If later on user creates his own scheduler, OpenCV will use it. +- `OpenMP` - No special defined behaviour. +- `Concurrency` - If threads == 1, OpenCV will disable threading optimizations and run its + functions sequentially. +- `GCD` - Supports only values \<= 0. +- `C=` - No special defined behaviour. +@param nthreads Number of threads used by OpenCV. +@sa getNumThreads, getThreadNum + */ +CV_EXPORTS_W void setNumThreads(int nthreads); + +/** @brief Returns the number of threads used by OpenCV for parallel regions. + +Always returns 1 if OpenCV is built without threading support. + +The exact meaning of return value depends on the threading framework used by OpenCV library: +- `TBB` - The number of threads, that OpenCV will try to use for parallel regions. If there is + any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns + default number of threads used by TBB library. +- `OpenMP` - An upper bound on the number of threads that could be used to form a new team. +- `Concurrency` - The number of threads, that OpenCV will try to use for parallel regions. +- `GCD` - Unsupported; returns the GCD thread pool limit (512) for compatibility. +- `C=` - The number of threads, that OpenCV will try to use for parallel regions, if before + called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs, + available for the process. +@sa setNumThreads, getThreadNum + */ +CV_EXPORTS_W int getNumThreads(); + +/** @brief Returns the index of the currently executed thread within the current parallel region. Always +returns 0 if called outside of parallel region. + +@deprecated Current implementation doesn't corresponding to this documentation. + +The exact meaning of the return value depends on the threading framework used by OpenCV library: +- `TBB` - Unsupported with current 4.1 TBB release. Maybe will be supported in future. +- `OpenMP` - The thread number, within the current team, of the calling thread. +- `Concurrency` - An ID for the virtual processor that the current context is executing on (0 + for master thread and unique number for others, but not necessary 1,2,3,...). +- `GCD` - System calling thread's ID. Never returns 0 inside parallel region. +- `C=` - The index of the current parallel task. +@sa setNumThreads, getNumThreads + */ +CV_EXPORTS_W int getThreadNum(); + +/** @brief Returns full configuration time cmake output. + +Returned value is raw cmake output including version control system revision, compiler version, +compiler flags, enabled modules and third party libraries, etc. Output format depends on target +architecture. + */ +CV_EXPORTS_W const String& getBuildInformation(); + +/** @brief Returns library version string + +For example "3.4.1-dev". + +@sa getMajorVersion, getMinorVersion, getRevisionVersion +*/ +CV_EXPORTS_W String getVersionString(); + +/** @brief Returns major library version */ +CV_EXPORTS_W int getVersionMajor(); + +/** @brief Returns minor library version */ +CV_EXPORTS_W int getVersionMinor(); + +/** @brief Returns revision field of the library version */ +CV_EXPORTS_W int getVersionRevision(); + +/** @brief Returns the number of ticks. + +The function returns the number of ticks after the certain event (for example, when the machine was +turned on). It can be used to initialize RNG or to measure a function execution time by reading the +tick count before and after the function call. +@sa getTickFrequency, TickMeter + */ +CV_EXPORTS_W int64 getTickCount(); + +/** @brief Returns the number of ticks per second. + +The function returns the number of ticks per second. That is, the following code computes the +execution time in seconds: +@code + double t = (double)getTickCount(); + // do something ... + t = ((double)getTickCount() - t)/getTickFrequency(); +@endcode +@sa getTickCount, TickMeter + */ +CV_EXPORTS_W double getTickFrequency(); + +/** @brief a Class to measure passing time. + +The class computes passing time by counting the number of ticks per second. That is, the following code computes the +execution time in seconds: +@snippet snippets/core_various.cpp TickMeter_total + +It is also possible to compute the average time over multiple runs: +@snippet snippets/core_various.cpp TickMeter_average + +@sa getTickCount, getTickFrequency +*/ +class CV_EXPORTS_W TickMeter +{ +public: + //! the default constructor + CV_WRAP TickMeter() + { + reset(); + } + + //! starts counting ticks. + CV_WRAP void start() + { + startTime = cv::getTickCount(); + } + + //! stops counting ticks. + CV_WRAP void stop() + { + int64 time = cv::getTickCount(); + if (startTime == 0) + return; + ++counter; + sumTime += (time - startTime); + startTime = 0; + } + + //! returns counted ticks. + CV_WRAP int64 getTimeTicks() const + { + return sumTime; + } + + //! returns passed time in microseconds. + CV_WRAP double getTimeMicro() const + { + return getTimeMilli()*1e3; + } + + //! returns passed time in milliseconds. + CV_WRAP double getTimeMilli() const + { + return getTimeSec()*1e3; + } + + //! returns passed time in seconds. + CV_WRAP double getTimeSec() const + { + return (double)getTimeTicks() / getTickFrequency(); + } + + //! returns internal counter value. + CV_WRAP int64 getCounter() const + { + return counter; + } + + //! returns average FPS (frames per second) value. + CV_WRAP double getFPS() const + { + const double sec = getTimeSec(); + if (sec < DBL_EPSILON) + return 0.; + return counter / sec; + } + + //! returns average time in seconds + CV_WRAP double getAvgTimeSec() const + { + if (counter <= 0) + return 0.; + return getTimeSec() / counter; + } + + //! returns average time in milliseconds + CV_WRAP double getAvgTimeMilli() const + { + return getAvgTimeSec() * 1e3; + } + + //! resets internal values. + CV_WRAP void reset() + { + startTime = 0; + sumTime = 0; + counter = 0; + } + +private: + int64 counter; + int64 sumTime; + int64 startTime; +}; + +/** @brief output operator +@code +TickMeter tm; +tm.start(); +// do something ... +tm.stop(); +std::cout << tm; +@endcode +*/ + +static inline +std::ostream& operator << (std::ostream& out, const TickMeter& tm) +{ + return out << tm.getTimeSec() << "sec"; +} + +/** @brief Returns the number of CPU ticks. + +The function returns the current number of CPU ticks on some architectures (such as x86, x64, +PowerPC). On other platforms the function is equivalent to getTickCount. It can also be used for +very accurate time measurements, as well as for RNG initialization. Note that in case of multi-CPU +systems a thread, from which getCPUTickCount is called, can be suspended and resumed at another CPU +with its own counter. So, theoretically (and practically) the subsequent calls to the function do +not necessary return the monotonously increasing values. Also, since a modern CPU varies the CPU +frequency depending on the load, the number of CPU clocks spent in some code cannot be directly +converted to time units. Therefore, getTickCount is generally a preferable solution for measuring +execution time. + */ +CV_EXPORTS_W int64 getCPUTickCount(); + +/** @brief Returns true if the specified feature is supported by the host hardware. + +The function returns true if the host hardware supports the specified feature. When user calls +setUseOptimized(false), the subsequent calls to checkHardwareSupport() will return false until +setUseOptimized(true) is called. This way user can dynamically switch on and off the optimized code +in OpenCV. +@param feature The feature of interest, one of cv::CpuFeatures + */ +CV_EXPORTS_W bool checkHardwareSupport(int feature); + +/** @brief Returns feature name by ID + +Returns empty string if feature is not defined +*/ +CV_EXPORTS_W String getHardwareFeatureName(int feature); + +/** @brief Returns list of CPU features enabled during compilation. + +Returned value is a string containing space separated list of CPU features with following markers: + +- no markers - baseline features +- prefix `*` - features enabled in dispatcher +- suffix `?` - features enabled but not available in HW + +Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?` +*/ +CV_EXPORTS_W std::string getCPUFeaturesLine(); + +/** @brief Returns the number of logical CPUs available for the process. + */ +CV_EXPORTS_W int getNumberOfCPUs(); + + +/** @brief Aligns a pointer to the specified number of bytes. + +The function returns the aligned pointer of the same type as the input pointer: +\f[\texttt{(_Tp*)(((size_t)ptr + n-1) & -n)}\f] +@param ptr Aligned pointer. +@param n Alignment size that must be a power of two. + */ +template static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp)) +{ + CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2 + return (_Tp*)(((size_t)ptr + n-1) & -n); +} + +/** @brief Aligns a buffer size to the specified number of bytes. + +The function returns the minimum number that is greater than or equal to sz and is divisible by n : +\f[\texttt{(sz + n-1) & -n}\f] +@param sz Buffer size to align. +@param n Alignment size that must be a power of two. + */ +static inline size_t alignSize(size_t sz, int n) +{ + CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2 + return (sz + n-1) & -n; +} + +/** @brief Integer division with result round up. + +Use this function instead of `ceil((float)a / b)` expressions. + +@sa alignSize +*/ +static inline int divUp(int a, unsigned int b) +{ + CV_DbgAssert(a >= 0); + return (a + b - 1) / b; +} +/** @overload */ +static inline size_t divUp(size_t a, unsigned int b) +{ + return (a + b - 1) / b; +} + +/** @brief Round first value up to the nearest multiple of second value. + +Use this function instead of `ceil((float)a / b) * b` expressions. + +@sa divUp +*/ +static inline int roundUp(int a, unsigned int b) +{ + CV_DbgAssert(a >= 0); + return a + b - 1 - (a + b -1) % b; +} +/** @overload */ +static inline size_t roundUp(size_t a, unsigned int b) +{ + return a + b - 1 - (a + b - 1) % b; +} + +/** @brief Alignment check of passed values + +Usage: `isAligned(...)` + +@note Alignment(N) must be a power of 2 (2**k, 2^k) +*/ +template static inline +bool isAligned(const T& data) +{ + CV_StaticAssert((N & (N - 1)) == 0, ""); // power of 2 + return (((size_t)data) & (N - 1)) == 0; +} +/** @overload */ +template static inline +bool isAligned(const void* p1) +{ + return isAligned((size_t)p1); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2) +{ + return isAligned(((size_t)p1)|((size_t)p2)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4)); +} + +/** @brief Enables or disables the optimized code. + +The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2, +and other instructions on the platforms that support it). It sets a global flag that is further +checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only +safe to call the function on the very top level in your application where you can be sure that no +other OpenCV function is currently executed. + +By default, the optimized code is enabled unless you disable it in CMake. The current status can be +retrieved using useOptimized. +@param onoff The boolean flag specifying whether the optimized code should be used (onoff=true) +or not (onoff=false). + */ +CV_EXPORTS_W void setUseOptimized(bool onoff); + +/** @brief Returns the status of optimized code usage. + +The function returns true if the optimized code is enabled. Otherwise, it returns false. + */ +CV_EXPORTS_W bool useOptimized(); + +static inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); } + +/////////////////////////////// Parallel Primitives ////////////////////////////////// + +/** @brief Base class for parallel data processors + +@ingroup core_parallel +*/ +class CV_EXPORTS ParallelLoopBody +{ +public: + virtual ~ParallelLoopBody(); + virtual void operator() (const Range& range) const = 0; +}; + +/** @brief Parallel data processor + +@ingroup core_parallel +*/ +CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.); + +//! @ingroup core_parallel +class ParallelLoopBodyLambdaWrapper : public ParallelLoopBody +{ +private: + std::function m_functor; +public: + inline + ParallelLoopBodyLambdaWrapper(std::function functor) + : m_functor(functor) + { + // nothing + } + + virtual void operator() (const cv::Range& range) const CV_OVERRIDE + { + m_functor(range); + } +}; + +//! @ingroup core_parallel +static inline +void parallel_for_(const Range& range, std::function functor, double nstripes=-1.) +{ + parallel_for_(range, ParallelLoopBodyLambdaWrapper(functor), nstripes); +} + + +/////////////////////////////// forEach method of cv::Mat //////////////////////////// +template inline +void Mat::forEach_impl(const Functor& operation) { + if (false) { + operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast(0)); + // If your compiler fails in this line. + // Please check that your functor signature is + // (_Tp&, const int*) <- multi-dimensional + // or (_Tp&, void*) <- in case you don't need current idx. + } + + CV_Assert(!empty()); + CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX); + const int LINES = static_cast(this->total() / this->size[this->dims - 1]); + + class PixelOperationWrapper :public ParallelLoopBody + { + public: + PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation) + : mat(frame), op(_operation) {} + virtual ~PixelOperationWrapper(){} + // ! Overloaded virtual operator + // convert range call to row call. + virtual void operator()(const Range &range) const CV_OVERRIDE + { + const int DIMS = mat->dims; + const int COLS = mat->size[DIMS - 1]; + if (DIMS <= 2) { + for (int row = range.start; row < range.end; ++row) { + this->rowCall2(row, COLS); + } + } else { + std::vector idx(DIMS); /// idx is modified in this->rowCall + idx[DIMS - 2] = range.start - 1; + + for (int line_num = range.start; line_num < range.end; ++line_num) { + idx[DIMS - 2]++; + for (int i = DIMS - 2; i >= 0; --i) { + if (idx[i] >= mat->size[i]) { + idx[i - 1] += idx[i] / mat->size[i]; + idx[i] %= mat->size[i]; + continue; // carry-over; + } + else { + break; + } + } + this->rowCall(&idx[0], COLS, DIMS); + } + } + } + private: + Mat_<_Tp>* const mat; + const Functor op; + // ! Call operator for each elements in this row. + inline void rowCall(int* const idx, const int COLS, const int DIMS) const { + int &col = idx[DIMS - 1]; + col = 0; + _Tp* pixel = &(mat->template at<_Tp>(idx)); + + while (col < COLS) { + op(*pixel, const_cast(idx)); + pixel++; col++; + } + col = 0; + } + // ! Call operator for each elements in this row. 2d mat special version. + inline void rowCall2(const int row, const int COLS) const { + union Index{ + int body[2]; + operator const int*() const { + return reinterpret_cast(this); + } + int& operator[](const int i) { + return body[i]; + } + } idx = {{row, 0}}; + // Special union is needed to avoid + // "error: array subscript is above array bounds [-Werror=array-bounds]" + // when call the functor `op` such that access idx[3]. + + _Tp* pixel = &(mat->template at<_Tp>(idx)); + const _Tp* const pixel_end = pixel + COLS; + while(pixel < pixel_end) { + op(*pixel++, static_cast(idx)); + idx[1]++; + } + } + PixelOperationWrapper& operator=(const PixelOperationWrapper &) { + CV_Assert(false); + // We can not remove this implementation because Visual Studio warning C4822. + return *this; + } + }; + + parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast*>(this), operation)); +} + +/////////////////////////// Synchronization Primitives /////////////////////////////// + +#if !defined(_M_CEE) +#ifndef OPENCV_DISABLE_THREAD_SUPPORT +typedef std::recursive_mutex Mutex; +typedef std::lock_guard AutoLock; +#else // OPENCV_DISABLE_THREAD_SUPPORT +// Custom (failing) implementation of `std::recursive_mutex`. +struct Mutex { + void lock_(){ + CV_Error(cv::Error::StsNotImplemented, + "cv::Mutex is disabled by OPENCV_DISABLE_THREAD_SUPPORT=ON"); + } + void unlock(){ + CV_Error(cv::Error::StsNotImplemented, + "cv::Mutex is disabled by OPENCV_DISABLE_THREAD_SUPPORT=ON"); + } +}; +// Stub for cv::AutoLock when threads are disabled. +struct AutoLock { + AutoLock(Mutex &) { } +}; +#endif // OPENCV_DISABLE_THREAD_SUPPORT +#endif // !defined(_M_CEE) + + +/** @brief Designed for command line parsing + +The sample below demonstrates how to use CommandLineParser: +@code + CommandLineParser parser(argc, argv, keys); + parser.about("Application name v1.0.0"); + + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + int N = parser.get("N"); + double fps = parser.get("fps"); + String path = parser.get("path"); + + use_time_stamp = parser.has("timestamp"); + + String img1 = parser.get(0); + String img2 = parser.get(1); + + int repeat = parser.get(2); + + if (!parser.check()) + { + parser.printErrors(); + return 0; + } +@endcode + +### Keys syntax + +The keys parameter is a string containing several blocks, each one is enclosed in curly braces and +describes one argument. Each argument contains three parts separated by the `|` symbol: + +-# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol) +-# default value will be used if the argument was not provided (can be empty) +-# help message (can be empty) + +For example: + +@code{.cpp} + const String keys = + "{help h usage ? | | print this message }" + "{@image1 | | image1 for compare }" + "{@image2 || image2 for compare }" + "{@repeat |1 | number }" + "{path |. | path to file }" + "{fps | -1.0 | fps for output video }" + "{N count |100 | count of objects }" + "{ts timestamp | | use time stamp }" + ; +} +@endcode + +Note that there are no default values for `help` and `timestamp` so we can check their presence using the `has()` method. +Arguments with default values are considered to be always present. Use the `get()` method in these cases to check their +actual value instead. + +String keys like `get("@image1")` return the empty string `""` by default - even with an empty default value. +Use the special `` default value to enforce that the returned string must not be empty. (like in `get("@image2")`) + +### Usage + +For the described keys: + +@code{.sh} + # Good call (3 positional parameters: image1, image2 and repeat; N is 200, ts is true) + $ ./app -N=200 1.png 2.jpg 19 -ts + + # Bad call + $ ./app -fps=aaa + ERRORS: + Parameter 'fps': can not convert: [aaa] to [double] +@endcode + */ +class CV_EXPORTS CommandLineParser +{ +public: + + /** @brief Constructor + + Initializes command line parser object + + @param argc number of command line arguments (from main()) + @param argv array of command line arguments (from main()) + @param keys string describing acceptable command line parameters (see class description for syntax) + */ + CommandLineParser(int argc, const char* const argv[], const String& keys); + + /** @brief Copy constructor */ + CommandLineParser(const CommandLineParser& parser); + + /** @brief Assignment operator */ + CommandLineParser& operator = (const CommandLineParser& parser); + + /** @brief Destructor */ + ~CommandLineParser(); + + /** @brief Returns application path + + This method returns the path to the executable from the command line (`argv[0]`). + + For example, if the application has been started with such a command: + @code{.sh} + $ ./bin/my-executable + @endcode + this method will return `./bin`. + */ + String getPathToApplication() const; + + /** @brief Access arguments by name + + Returns argument converted to selected type. If the argument is not known or can not be + converted to selected type, the error flag is set (can be checked with @ref check). + + For example, define: + @code{.cpp} + String keys = "{N count||}"; + @endcode + + Call: + @code{.sh} + $ ./my-app -N=20 + # or + $ ./my-app --count=20 + @endcode + + Access: + @code{.cpp} + int N = parser.get("N"); + @endcode + + @param name name of the argument + @param space_delete remove spaces from the left and right of the string + @tparam T the argument will be converted to this type if possible + + @note You can access positional arguments by their `@`-prefixed name: + @code{.cpp} + parser.get("@image"); + @endcode + */ + template + T get(const String& name, bool space_delete = true) const + { + T val = T(); + getByName(name, space_delete, ParamType::type, (void*)&val); + return val; + } + + /** @brief Access positional arguments by index + + Returns argument converted to selected type. Indexes are counted from zero. + + For example, define: + @code{.cpp} + String keys = "{@arg1||}{@arg2||}" + @endcode + + Call: + @code{.sh} + ./my-app abc qwe + @endcode + + Access arguments: + @code{.cpp} + String val_1 = parser.get(0); // returns "abc", arg1 + String val_2 = parser.get(1); // returns "qwe", arg2 + @endcode + + @param index index of the argument + @param space_delete remove spaces from the left and right of the string + @tparam T the argument will be converted to this type if possible + */ + template + T get(int index, bool space_delete = true) const + { + T val = T(); + getByIndex(index, space_delete, ParamType::type, (void*)&val); + return val; + } + + /** @brief Check if field was provided in the command line + + @param name argument name to check + */ + bool has(const String& name) const; + + /** @brief Check for parsing errors + + Returns false if error occurred while accessing the parameters (bad conversion, missing arguments, + etc.). Call @ref printErrors to print error messages list. + */ + bool check() const; + + /** @brief Set the about message + + The about message will be shown when @ref printMessage is called, right before arguments table. + */ + void about(const String& message); + + /** @brief Print help message + + This method will print standard help message containing the about message and arguments description. + + @sa about + */ + void printMessage() const; + + /** @brief Print list of errors occurred + + @sa check + */ + void printErrors() const; + +protected: + void getByName(const String& name, bool space_delete, Param type, void* dst) const; + void getByIndex(int index, bool space_delete, Param type, void* dst) const; + + struct Impl; + Impl* impl; +}; + +//! @} core_utils + +//! @cond IGNORED + +/////////////////////////////// AutoBuffer implementation //////////////////////////////////////// + +template inline +AutoBuffer<_Tp, fixed_size>::AutoBuffer() +{ + ptr = buf; + sz = fixed_size; +} + +template inline +AutoBuffer<_Tp, fixed_size>::AutoBuffer(size_t _size) +{ + ptr = buf; + sz = fixed_size; + allocate(_size); +} + +template inline +AutoBuffer<_Tp, fixed_size>::AutoBuffer(const AutoBuffer<_Tp, fixed_size>& abuf ) +{ + ptr = buf; + sz = fixed_size; + allocate(abuf.size()); + for( size_t i = 0; i < sz; i++ ) + ptr[i] = abuf.ptr[i]; +} + +template inline AutoBuffer<_Tp, fixed_size>& +AutoBuffer<_Tp, fixed_size>::operator = (const AutoBuffer<_Tp, fixed_size>& abuf) +{ + if( this != &abuf ) + { + deallocate(); + allocate(abuf.size()); + for( size_t i = 0; i < sz; i++ ) + ptr[i] = abuf.ptr[i]; + } + return *this; +} + +template inline +AutoBuffer<_Tp, fixed_size>::~AutoBuffer() +{ deallocate(); } + +template inline void +AutoBuffer<_Tp, fixed_size>::allocate(size_t _size) +{ + if(_size <= sz) + { + sz = _size; + return; + } + deallocate(); + sz = _size; + if(_size > fixed_size) + { + ptr = new _Tp[_size]; + } +} + +template inline void +AutoBuffer<_Tp, fixed_size>::deallocate() +{ + if( ptr != buf ) + { + delete[] ptr; + ptr = buf; + sz = fixed_size; + } +} + +template inline void +AutoBuffer<_Tp, fixed_size>::resize(size_t _size) +{ + if(_size <= sz) + { + sz = _size; + return; + } + size_t i, prevsize = sz, minsize = MIN(prevsize, _size); + _Tp* prevptr = ptr; + + ptr = _size > fixed_size ? new _Tp[_size] : buf; + sz = _size; + + if( ptr != prevptr ) + for( i = 0; i < minsize; i++ ) + ptr[i] = prevptr[i]; + for( i = prevsize; i < _size; i++ ) + ptr[i] = _Tp(); + + if( prevptr != buf ) + delete[] prevptr; +} + +template inline size_t +AutoBuffer<_Tp, fixed_size>::size() const +{ return sz; } + +//! @endcond + + +// Basic Node class for tree building +template +class CV_EXPORTS Node +{ +public: + Node() + { + m_pParent = 0; + } + Node(OBJECT& payload) : m_payload(payload) + { + m_pParent = 0; + } + ~Node() + { + removeChilds(); + if (m_pParent) + { + int idx = m_pParent->findChild(this); + if (idx >= 0) + m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx); + } + } + + Node* findChild(OBJECT& payload) const + { + for(size_t i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i]->m_payload == payload) + return this->m_childs[i]; + } + return NULL; + } + + int findChild(Node *pNode) const + { + for (size_t i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i] == pNode) + return (int)i; + } + return -1; + } + + void addChild(Node *pNode) + { + if(!pNode) + return; + + CV_Assert(pNode->m_pParent == 0); + pNode->m_pParent = this; + this->m_childs.push_back(pNode); + } + + void removeChilds() + { + for(size_t i = 0; i < m_childs.size(); i++) + { + m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming + delete m_childs[i]; + } + m_childs.clear(); + } + + int getDepth() + { + int count = 0; + Node *pParent = m_pParent; + while(pParent) count++, pParent = pParent->m_pParent; + return count; + } + +public: + OBJECT m_payload; + Node* m_pParent; + std::vector*> m_childs; +}; + + +namespace samples { + +//! @addtogroup core_utils_samples +// This section describes utility functions for OpenCV samples. +// +// @note Implementation of these utilities is not thread-safe. +// +//! @{ + +/** @brief Try to find requested data file + +Search directories: + +1. Directories passed via `addSamplesDataSearchPath()` +2. OPENCV_SAMPLES_DATA_PATH_HINT environment variable +3. OPENCV_SAMPLES_DATA_PATH environment variable + If parameter value is not empty and nothing is found then stop searching. +4. Detects build/install path based on: + a. current working directory (CWD) + b. and/or binary module location (opencv_core/opencv_world, doesn't work with static linkage) +5. Scan `/{,data,samples/data}` directories if build directory is detected or the current directory is in source tree. +6. Scan `/share/OpenCV` directory if install directory is detected. + +@see cv::utils::findDataFile + +@param relative_path Relative path to data file +@param required Specify "file not found" handling. + If true, function prints information message and raises cv::Exception. + If false, function returns empty result +@param silentMode Disables messages +@return Returns path (absolute or relative to the current directory) or empty string if file is not found +*/ +CV_EXPORTS_W cv::String findFile(const cv::String& relative_path, bool required = true, bool silentMode = false); + +CV_EXPORTS_W cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode = false); + +inline cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode) +{ + cv::String res = findFile(relative_path, false, silentMode); + if (res.empty()) + return relative_path; + return res; +} + +/** @brief Override search data path by adding new search location + +Use this only to override default behavior +Passed paths are used in LIFO order. + +@param path Path to used samples data +*/ +CV_EXPORTS_W void addSamplesDataSearchPath(const cv::String& path); + +/** @brief Append samples search data sub directory + +General usage is to add OpenCV modules name (`/modules//samples/data` -> `/samples/data` + `modules//samples/data`). +Passed subdirectories are used in LIFO order. + +@param subdir samples data sub directory +*/ +CV_EXPORTS_W void addSamplesDataSearchSubDirectory(const cv::String& subdir); + +//! @} +} // namespace samples + +namespace utils { + +CV_EXPORTS int getThreadID(); + +} // namespace + +} //namespace cv + +#ifdef CV_COLLECT_IMPL_DATA +#include "opencv2/core/utils/instrumentation.hpp" +#else +/// Collect implementation data on OpenCV function call. Requires ENABLE_IMPL_COLLECTION build option. +#define CV_IMPL_ADD(impl) +#endif + +#endif //OPENCV_CORE_UTILITY_H diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/allocator_stats.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/allocator_stats.hpp new file mode 100644 index 0000000..79e9338 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/allocator_stats.hpp @@ -0,0 +1,29 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ALLOCATOR_STATS_HPP +#define OPENCV_CORE_ALLOCATOR_STATS_HPP + +#include "../cvdef.h" + +namespace cv { namespace utils { + +class AllocatorStatisticsInterface +{ +protected: + AllocatorStatisticsInterface() {} + virtual ~AllocatorStatisticsInterface() {} +public: + virtual uint64_t getCurrentUsage() const = 0; + virtual uint64_t getTotalUsage() const = 0; + virtual uint64_t getNumberOfAllocations() const = 0; + virtual uint64_t getPeakUsage() const = 0; + + /** set peak usage = current usage */ + virtual void resetPeakUsage() = 0; +}; + +}} // namespace + +#endif // OPENCV_CORE_ALLOCATOR_STATS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/allocator_stats.impl.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/allocator_stats.impl.hpp new file mode 100644 index 0000000..eb5ecde --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/allocator_stats.impl.hpp @@ -0,0 +1,158 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP +#define OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP + +#include "./allocator_stats.hpp" + +//#define OPENCV_DISABLE_ALLOCATOR_STATS + +#ifdef CV_CXX11 + +#include + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#if defined(__GNUC__) && (\ + (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4) || \ + (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)) \ + ) +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int +#endif +#endif + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long +#endif + +#else // CV_CXX11 + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int // CV_XADD supports int only +#endif + +#endif // CV_CXX11 + +namespace cv { namespace utils { + +#ifdef CV__ALLOCATOR_STATS_LOG +namespace { +#endif + +class AllocatorStatistics : public AllocatorStatisticsInterface +{ +#ifdef OPENCV_DISABLE_ALLOCATOR_STATS + +public: + AllocatorStatistics() {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return 0; } + uint64_t getTotalUsage() const CV_OVERRIDE { return 0; } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return 0; } + uint64_t getPeakUsage() const CV_OVERRIDE { return 0; } + + /** set peak usage = current usage */ + void resetPeakUsage() CV_OVERRIDE {}; + + void onAllocate(size_t /*sz*/) {} + void onFree(size_t /*sz*/) {} + +#elif defined(CV_CXX11) + +protected: + typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t; + std::atomic curr, total, total_allocs, peak; +public: + AllocatorStatistics() {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr.load(); } + uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total.load(); } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs.load(); } + uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak.load(); } + + /** set peak usage = current usage */ + void resetPeakUsage() CV_OVERRIDE { peak.store(curr.load()); } + + // Controller interface + void onAllocate(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load())); +#endif + + counter_t new_curr = curr.fetch_add((counter_t)sz) + (counter_t)sz; + + // peak = std::max((uint64_t)peak, new_curr); + auto prev_peak = peak.load(); + while (prev_peak < new_curr) + { + if (peak.compare_exchange_weak(prev_peak, new_curr)) + break; + } + // end of peak = max(...) + + total += (counter_t)sz; + total_allocs++; + } + void onFree(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load())); +#endif + curr -= (counter_t)sz; + } + +#else // non C++11 + +protected: + typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t; + volatile counter_t curr, total, total_allocs, peak; // overflow is possible, CV_XADD operates with 'int' only +public: + AllocatorStatistics() + : curr(0), total(0), total_allocs(0), peak(0) + {} + ~AllocatorStatistics() CV_OVERRIDE {} + + uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr; } + uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total; } + uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs; } + uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak; } + + void resetPeakUsage() CV_OVERRIDE { peak = curr; } + + // Controller interface + void onAllocate(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr)); +#endif + + counter_t new_curr = (counter_t)CV_XADD(&curr, (counter_t)sz) + (counter_t)sz; + + peak = std::max((counter_t)peak, new_curr); // non-thread safe + + //CV_XADD(&total, (uint64_t)sz); // overflow with int, non-reliable... + total += sz; + + CV_XADD(&total_allocs, (counter_t)1); + } + void onFree(size_t sz) + { +#ifdef CV__ALLOCATOR_STATS_LOG + CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr)); +#endif + CV_XADD(&curr, (counter_t)-sz); + } +#endif +}; + +#ifdef CV__ALLOCATOR_STATS_LOG +} // namespace +#endif + +}} // namespace + +#endif // OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/filesystem.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/filesystem.hpp new file mode 100644 index 0000000..a98d220 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/filesystem.hpp @@ -0,0 +1,82 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_FILESYSTEM_HPP +#define OPENCV_UTILS_FILESYSTEM_HPP + +namespace cv { namespace utils { namespace fs { + + +CV_EXPORTS bool exists(const cv::String& path); +CV_EXPORTS bool isDirectory(const cv::String& path); + +CV_EXPORTS void remove_all(const cv::String& path); + + +CV_EXPORTS cv::String getcwd(); + +/** @brief Converts path p to a canonical absolute path + * Symlinks are processed if there is support for them on running platform. + * + * @param path input path. Target file/directory should exist. + */ +CV_EXPORTS cv::String canonical(const cv::String& path); + +/** Join path components */ +CV_EXPORTS cv::String join(const cv::String& base, const cv::String& path); + +/** Get parent directory */ +CV_EXPORTS cv::String getParent(const cv::String &path); +CV_EXPORTS std::wstring getParent(const std::wstring& path); + +/** + * Generate a list of all files that match the globbing pattern. + * + * Result entries are prefixed by base directory path. + * + * @param directory base directory + * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results + * @param[out] result result of globing. + * @param recursive scan nested directories too + * @param includeDirectories include directories into results list + */ +CV_EXPORTS void glob(const cv::String& directory, const cv::String& pattern, + CV_OUT std::vector& result, + bool recursive = false, bool includeDirectories = false); + +/** + * Generate a list of all files that match the globbing pattern. + * + * @param directory base directory + * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results + * @param[out] result globbing result with relative paths from base directory + * @param recursive scan nested directories too + * @param includeDirectories include directories into results list + */ +CV_EXPORTS void glob_relative(const cv::String& directory, const cv::String& pattern, + CV_OUT std::vector& result, + bool recursive = false, bool includeDirectories = false); + + +CV_EXPORTS bool createDirectory(const cv::String& path); +CV_EXPORTS bool createDirectories(const cv::String& path); + +#ifdef __OPENCV_BUILD +// TODO +//CV_EXPORTS cv::String getTempDirectory(); + +/** + * @brief Returns directory to store OpenCV cache files + * Create sub-directory in common OpenCV cache directory if it doesn't exist. + * @param sub_directory_name name of sub-directory. NULL or "" value asks to return root cache directory. + * @param configuration_name optional name of configuration parameter name which overrides default behavior. + * @return Path to cache directory. Returns empty string if cache directories support is not available. Returns "disabled" if cache disabled by user. + */ +CV_EXPORTS cv::String getCacheDirectory(const char* sub_directory_name, const char* configuration_name = NULL); + +#endif + +}}} // namespace + +#endif // OPENCV_UTILS_FILESYSTEM_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/fp_control_utils.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/fp_control_utils.hpp new file mode 100644 index 0000000..930bc5d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/fp_control_utils.hpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP +#define OPENCV_CORE_FP_CONTROL_UTILS_HPP + +namespace cv { + +namespace details { + +struct FPDenormalsModeState +{ + uint32_t reserved[16]; // 64-bytes +}; // FPDenormalsModeState + +CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state); +CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state); +CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state); + +class FPDenormalsIgnoreHintScope +{ +public: + inline explicit FPDenormalsIgnoreHintScope(bool ignore = true) + { + details::setFPDenormalsIgnoreHint(ignore, saved_state); + } + + inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state) + { + details::saveFPDenormalsState(saved_state); + details::restoreFPDenormalsState(state); + } + + inline ~FPDenormalsIgnoreHintScope() + { + details::restoreFPDenormalsState(saved_state); + } + +protected: + FPDenormalsModeState saved_state; +}; // FPDenormalsIgnoreHintScope + +class FPDenormalsIgnoreHintScopeNOOP +{ +public: + inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); } + inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); } + inline ~FPDenormalsIgnoreHintScopeNOOP() { } +}; // FPDenormalsIgnoreHintScopeNOOP + +} // namespace details + + +// Should depend on target compilation architecture only +// Note: previously added archs should NOT be removed to preserve ABI compatibility +#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT) + // preserve configuration overloading through ports +#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_) +typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope; +#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1 +#else +#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0 +typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope; +#endif + +} // namespace cv + +#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/instrumentation.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/instrumentation.hpp new file mode 100644 index 0000000..3639867 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/instrumentation.hpp @@ -0,0 +1,125 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_INSTR_HPP +#define OPENCV_UTILS_INSTR_HPP + +#include +#include + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +#ifdef CV_COLLECT_IMPL_DATA +CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays +CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays +// Get stored implementation flags and functions names arrays +// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function +CV_EXPORTS int getImpl(std::vector &impl, std::vector &funName); + +CV_EXPORTS bool useCollection(); // return implementation collection state +CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state + +#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation +#define CV_IMPL_OCL 0x02 // OpenCL implementation +#define CV_IMPL_IPP 0x04 // IPP implementation +#define CV_IMPL_MT 0x10 // multithreaded implementation + +#undef CV_IMPL_ADD +#define CV_IMPL_ADD(impl) \ + if(cv::useCollection()) \ + { \ + cv::addImpl(impl, CV_Func); \ + } +#endif + +// Instrumentation external interface +namespace instr +{ + +#if !defined OPENCV_ABI_CHECK + +enum TYPE +{ + TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function + TYPE_MARKER, // Information marker + TYPE_WRAPPER, // Wrapper function for implementation + TYPE_FUN, // Simple function call +}; + +enum IMPL +{ + IMPL_PLAIN = 0, + IMPL_IPP, + IMPL_OPENCL, +}; + +struct NodeDataTls +{ + NodeDataTls() + { + m_ticksTotal = 0; + } + uint64 m_ticksTotal; +}; + +class CV_EXPORTS NodeData +{ +public: + NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); + NodeData(NodeData &ref); + ~NodeData(); + NodeData& operator=(const NodeData&); + + cv::String m_funName; + cv::instr::TYPE m_instrType; + cv::instr::IMPL m_implType; + const char* m_fileName; + int m_lineNum; + void* m_retAddress; + bool m_alwaysExpand; + bool m_funError; + + volatile int m_counter; + volatile uint64 m_ticksTotal; + TLSDataAccumulator m_tls; + int m_threads; + + // No synchronization + double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; } + double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; } +}; +bool operator==(const NodeData& lhs, const NodeData& rhs); + +typedef Node InstrNode; + +CV_EXPORTS InstrNode* getTrace(); + +#endif // !defined OPENCV_ABI_CHECK + + +CV_EXPORTS bool useInstrumentation(); +CV_EXPORTS void setUseInstrumentation(bool flag); +CV_EXPORTS void resetTrace(); + +enum FLAGS +{ + FLAGS_NONE = 0, + FLAGS_MAPPING = 0x01, + FLAGS_EXPAND_SAME_NAMES = 0x02, +}; + +CV_EXPORTS void setFlags(FLAGS modeFlags); +static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); } +CV_EXPORTS FLAGS getFlags(); + +} // namespace instr + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logger.defines.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logger.defines.hpp new file mode 100644 index 0000000..7d73f02 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logger.defines.hpp @@ -0,0 +1,42 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_LOGGER_DEFINES_HPP +#define OPENCV_LOGGER_DEFINES_HPP + +//! @addtogroup core_logging +//! @{ + +// Supported logging levels and their semantic +#define CV_LOG_LEVEL_SILENT 0 //!< for using in setLogLevel() call +#define CV_LOG_LEVEL_FATAL 1 //!< Fatal (critical) error (unrecoverable internal error) +#define CV_LOG_LEVEL_ERROR 2 //!< Error message +#define CV_LOG_LEVEL_WARN 3 //!< Warning message +#define CV_LOG_LEVEL_INFO 4 //!< Info message +#define CV_LOG_LEVEL_DEBUG 5 //!< Debug message. Disabled in the "Release" build. +#define CV_LOG_LEVEL_VERBOSE 6 //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. + +namespace cv { +namespace utils { +namespace logging { + +//! Supported logging levels and their semantic +enum LogLevel { + LOG_LEVEL_SILENT = 0, //!< for using in setLogVevel() call + LOG_LEVEL_FATAL = 1, //!< Fatal (critical) error (unrecoverable internal error) + LOG_LEVEL_ERROR = 2, //!< Error message + LOG_LEVEL_WARNING = 3, //!< Warning message + LOG_LEVEL_INFO = 4, //!< Info message + LOG_LEVEL_DEBUG = 5, //!< Debug message. Disabled in the "Release" build. + LOG_LEVEL_VERBOSE = 6, //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. +#ifndef CV_DOXYGEN + ENUM_LOG_LEVEL_FORCE_INT = INT_MAX +#endif +}; + +}}} // namespace + +//! @} + +#endif // OPENCV_LOGGER_DEFINES_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logger.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logger.hpp new file mode 100644 index 0000000..accb860 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logger.hpp @@ -0,0 +1,218 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_LOGGER_HPP +#define OPENCV_LOGGER_HPP + +#include +#include +#include // INT_MAX + +#include "logger.defines.hpp" +#include "logtag.hpp" + +namespace cv { +namespace utils { +namespace logging { + +//! @addtogroup core_logging +//! @{ + +/** Set global logging level +@return previous logging level +*/ +CV_EXPORTS LogLevel setLogLevel(LogLevel logLevel); +/** Get global logging level */ +CV_EXPORTS LogLevel getLogLevel(); + +CV_EXPORTS void registerLogTag(cv::utils::logging::LogTag* plogtag); + +CV_EXPORTS void setLogTagLevel(const char* tag, cv::utils::logging::LogLevel level); + +CV_EXPORTS cv::utils::logging::LogLevel getLogTagLevel(const char* tag); + +namespace internal { + +/** Get global log tag */ +CV_EXPORTS cv::utils::logging::LogTag* getGlobalLogTag(); + +/** Write log message */ +CV_EXPORTS void writeLogMessage(LogLevel logLevel, const char* message); + +/** Write log message */ +CV_EXPORTS void writeLogMessageEx(LogLevel logLevel, const char* tag, const char* file, int line, const char* func, const char* message); + +} // namespace + +struct LogTagAuto + : public LogTag +{ + inline LogTagAuto(const char* _name, LogLevel _level) + : LogTag(_name, _level) + { + registerLogTag(this); + } +}; + +/** + * \def CV_LOG_STRIP_LEVEL + * + * Define CV_LOG_STRIP_LEVEL=CV_LOG_LEVEL_[DEBUG|INFO|WARN|ERROR|FATAL|SILENT] to compile out anything at that and before that logging level + */ +#ifndef CV_LOG_STRIP_LEVEL +# if defined NDEBUG +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG +# else +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE +# endif +#endif + +#define CV_LOGTAG_PTR_CAST(expr) static_cast(expr) + +// CV_LOGTAG_EXPAND_NAME is intended to be re-defined (undef and then define again) +// to allows logging users to use a shorter name argument when calling +// CV_LOG_WITH_TAG or its related macros such as CV_LOG_INFO. +// +// This macro is intended to modify the tag argument as a string (token), via +// preprocessor token pasting or metaprogramming techniques. A typical usage +// is to apply a prefix, such as +// ...... #define CV_LOGTAG_EXPAND_NAME(tag) cv_logtag_##tag +// +// It is permitted to re-define to a hard-coded expression, ignoring the tag. +// This would work identically like the CV_LOGTAG_FALLBACK macro. +// +// Important: When the logging macro is called with tag being NULL, a user-defined +// CV_LOGTAG_EXPAND_NAME may expand it into cv_logtag_0, cv_logtag_NULL, or +// cv_logtag_nullptr. Use with care. Also be mindful of C++ symbol redefinitions. +// +// If there is significant amount of logging code with tag being NULL, it is +// recommended to use (re-define) CV_LOGTAG_FALLBACK to inject locally a default +// tag at the beginning of a compilation unit, to minimize lines of code changes. +// +#define CV_LOGTAG_EXPAND_NAME(tag) tag + +// CV_LOGTAG_FALLBACK is intended to be re-defined (undef and then define again) +// by any other compilation units to provide a log tag when the logging statement +// does not specify one. The macro needs to expand into a C++ expression that can +// be static_cast into (cv::utils::logging::LogTag*). Null (nullptr) is permitted. +#define CV_LOGTAG_FALLBACK nullptr + +// CV_LOGTAG_GLOBAL is the tag used when a log tag is not specified in the logging +// statement nor the compilation unit. The macro needs to expand into a C++ +// expression that can be static_cast into (cv::utils::logging::LogTag*). Must be +// non-null. Do not re-define. +#define CV_LOGTAG_GLOBAL cv::utils::logging::internal::getGlobalLogTag() + +#define CV_LOG_WITH_TAG(tag, msgLevel, extra_check0, extra_check1, ...) \ + for(;;) { \ + extra_check0; \ + const auto cv_temp_msglevel = (cv::utils::logging::LogLevel)(msgLevel); \ + if (cv_temp_msglevel >= (CV_LOG_STRIP_LEVEL)) break; \ + auto cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_EXPAND_NAME(tag)); \ + if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_FALLBACK); \ + if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_GLOBAL); \ + if (cv_temp_logtagptr && (cv_temp_msglevel > cv_temp_logtagptr->level)) break; \ + extra_check1; \ + std::stringstream cv_temp_logstream; \ + cv_temp_logstream << __VA_ARGS__; \ + cv::utils::logging::internal::writeLogMessageEx( \ + cv_temp_msglevel, \ + (cv_temp_logtagptr ? cv_temp_logtagptr->name : nullptr), \ + __FILE__, \ + __LINE__, \ + CV_Func, \ + cv_temp_logstream.str().c_str()); \ + break; \ + } + +#define CV_LOG_FATAL(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , , __VA_ARGS__) +#define CV_LOG_ERROR(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , , __VA_ARGS__) +#define CV_LOG_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , , __VA_ARGS__) +#define CV_LOG_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , , __VA_ARGS__) +#define CV_LOG_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , , __VA_ARGS__) +#define CV_LOG_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , , __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_INFO +#define CV_LOG_INFO(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_DEBUG +#define CV_LOG_DEBUG(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_VERBOSE +#define CV_LOG_VERBOSE(tag, v, ...) +#endif + +//! @cond IGNORED +#define CV__LOG_ONCE_CHECK_PRE \ + static bool _cv_log_once_ ## __LINE__ = false; \ + if (_cv_log_once_ ## __LINE__) break; + +#define CV__LOG_ONCE_CHECK_POST \ + _cv_log_once_ ## __LINE__ = true; + +#define CV__LOG_IF_CHECK(logging_cond) \ + if (!(logging_cond)) break; + +//! @endcond + + +// CV_LOG_ONCE_XXX macros + +#define CV_LOG_ONCE_ERROR(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) +#define CV_LOG_ONCE_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_ONCE_INFO +#define CV_LOG_ONCE_INFO(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_ONCE_DEBUG +#define CV_LOG_ONCE_DEBUG(tag, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_ONCE_VERBOSE +#define CV_LOG_ONCE_VERBOSE(tag, v, ...) +#endif + + +// CV_LOG_IF_XXX macros + +#define CV_LOG_IF_FATAL(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_ERROR(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_WARNING(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_INFO(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_DEBUG(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) +#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__) + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#undef CV_LOG_IF_INFO +#define CV_LOG_IF_INFO(tag, logging_cond, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#undef CV_LOG_IF_DEBUG +#define CV_LOG_IF_DEBUG(tag, logging_cond, ...) +#endif + +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#undef CV_LOG_IF_VERBOSE +#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...) +#endif + + +//! @} + +}}} // namespace + +#endif // OPENCV_LOGGER_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logtag.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logtag.hpp new file mode 100644 index 0000000..4089720 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/logtag.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_LOGTAG_HPP +#define OPENCV_CORE_LOGTAG_HPP + +#include "opencv2/core/cvstd.hpp" +#include "logger.defines.hpp" + +namespace cv { +namespace utils { +namespace logging { + +struct LogTag +{ + const char* name; + LogLevel level; + + inline LogTag(const char* _name, LogLevel _level) + : name(_name) + , level(_level) + {} +}; + +}}} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/tls.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/tls.hpp new file mode 100644 index 0000000..124caeb --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/tls.hpp @@ -0,0 +1,235 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_TLS_HPP +#define OPENCV_UTILS_TLS_HPP + +#ifndef OPENCV_CORE_UTILITY_H +#error "tls.hpp must be included after opencv2/core/utility.hpp or opencv2/core.hpp" +#endif + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +namespace details { class TlsStorage; } + +/** TLS container base implementation + * + * Don't use directly. + * + * @sa TLSData, TLSDataAccumulator templates + */ +class CV_EXPORTS TLSDataContainer +{ +protected: + TLSDataContainer(); + virtual ~TLSDataContainer(); + + /// @deprecated use detachData() instead + void gatherData(std::vector &data) const; + /// get TLS data and detach all data from threads (similar to cleanup() call) + void detachData(std::vector& data); + + void* getData() const; + void release(); + +protected: + virtual void* createDataInstance() const = 0; + virtual void deleteDataInstance(void* pData) const = 0; + +private: + int key_; + + friend class cv::details::TlsStorage; // core/src/system.cpp + +public: + void cleanup(); //!< Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid. + +private: + // Disable copy/assign (noncopyable pattern) + TLSDataContainer(TLSDataContainer &) = delete; + TLSDataContainer& operator =(const TLSDataContainer &) = delete; +}; + + +/** @brief Simple TLS data class + * + * @sa TLSDataAccumulator + */ +template +class TLSData : protected TLSDataContainer +{ +public: + inline TLSData() {} + inline ~TLSData() { release(); } + + inline T* get() const { return (T*)getData(); } //!< Get data associated with key + inline T& getRef() const { T* ptr = (T*)getData(); CV_DbgAssert(ptr); return *ptr; } //!< Get data associated with key + + /// Release associated thread data + inline void cleanup() + { + TLSDataContainer::cleanup(); + } + +protected: + /// Wrapper to allocate data by template + virtual void* createDataInstance() const CV_OVERRIDE { return new T; } + /// Wrapper to release data by template + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE { delete (T*)pData; } +}; + + +/// TLS data accumulator with gathering methods +template +class TLSDataAccumulator : public TLSData +{ + mutable cv::Mutex mutex; + mutable std::vector dataFromTerminatedThreads; + std::vector detachedData; + bool cleanupMode; +public: + TLSDataAccumulator() : cleanupMode(false) {} + ~TLSDataAccumulator() + { + release(); + } + + /** @brief Get data from all threads + * @deprecated replaced by detachData() + * + * Lifetime of vector data is valid until next detachData()/cleanup()/release() calls + * + * @param[out] data result buffer (should be empty) + */ + void gather(std::vector &data) const + { + CV_Assert(cleanupMode == false); // state is not valid + CV_Assert(data.empty()); + { + std::vector &dataVoid = reinterpret_cast&>(data); + TLSDataContainer::gatherData(dataVoid); + } + { + AutoLock lock(mutex); + data.reserve(data.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + data.push_back((T*)*i); + } + } + } + + /** @brief Get and detach data from all threads + * + * Call cleanupDetachedData() when returned vector is not needed anymore. + * + * @return Vector with associated data. Content is preserved (including lifetime of attached data pointers) until next detachData()/cleanupDetachedData()/cleanup()/release() calls + */ + std::vector& detachData() + { + CV_Assert(cleanupMode == false); // state is not valid + std::vector dataVoid; + { + TLSDataContainer::detachData(dataVoid); + } + { + AutoLock lock(mutex); + detachedData.reserve(dataVoid.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + detachedData.push_back((T*)*i); + } + dataFromTerminatedThreads.clear(); + for (typename std::vector::const_iterator i = dataVoid.begin(); i != dataVoid.end(); ++i) + { + detachedData.push_back((T*)(void*)*i); + } + } + dataVoid.clear(); + return detachedData; + } + + /// Release associated thread data returned by detachData() call + void cleanupDetachedData() + { + AutoLock lock(mutex); + cleanupMode = true; + _cleanupDetachedData(); + cleanupMode = false; + } + + /// Release associated thread data + void cleanup() + { + cleanupMode = true; + TLSDataContainer::cleanup(); + + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + cleanupMode = false; + } + + /// Release associated thread data and free TLS key + void release() + { + cleanupMode = true; + TLSDataContainer::release(); + { + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + } + } + +protected: + // synchronized + void _cleanupDetachedData() + { + for (typename std::vector::iterator i = detachedData.begin(); i != detachedData.end(); ++i) + { + deleteDataInstance((T*)*i); + } + detachedData.clear(); + } + + // synchronized + void _cleanupTerminatedData() + { + for (typename std::vector::iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + deleteDataInstance((T*)*i); + } + dataFromTerminatedThreads.clear(); + } + +protected: + virtual void* createDataInstance() const CV_OVERRIDE + { + // Note: we can collect all allocated data here, but this would require raced mutex locks + return new T; + } + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE + { + if (cleanupMode) + { + delete (T*)pData; + } + else + { + AutoLock lock(mutex); + dataFromTerminatedThreads.push_back((T*)pData); + } + } +}; + + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/trace.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/trace.hpp new file mode 100644 index 0000000..ef5d35b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/utils/trace.hpp @@ -0,0 +1,252 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACE_HPP +#define OPENCV_TRACE_HPP + +#include + +namespace cv { +namespace utils { +namespace trace { + +//! @addtogroup core_logging +//! @{ + +//! Macro to trace function +#define CV_TRACE_FUNCTION() + +#define CV_TRACE_FUNCTION_SKIP_NESTED() + +//! Trace code scope. +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "initialize". +#define CV_TRACE_REGION(name_as_static_string_literal) +//! mark completed of the current opened region and create new one +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "step1". +#define CV_TRACE_REGION_NEXT(name_as_static_string_literal) + +//! Macro to trace argument value +#define CV_TRACE_ARG(arg_id) + +//! Macro to trace argument value (expanded version) +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) + +//! @cond IGNORED +#define CV_TRACE_NS cv::utils::trace + +#if !defined(OPENCV_DISABLE_TRACE) && defined(__EMSCRIPTEN__) +#define OPENCV_DISABLE_TRACE 1 +#endif + +namespace details { + +#ifndef __OPENCV_TRACE +# if defined __OPENCV_BUILD && !defined __OPENCV_TESTS && !defined __OPENCV_APPS +# define __OPENCV_TRACE 1 +# else +# define __OPENCV_TRACE 0 +# endif +#endif + +#ifndef CV_TRACE_FILENAME +# define CV_TRACE_FILENAME __FILE__ +#endif + +#ifndef CV__TRACE_FUNCTION +# if defined _MSC_VER +# define CV__TRACE_FUNCTION __FUNCSIG__ +# elif defined __GNUC__ +# define CV__TRACE_FUNCTION __PRETTY_FUNCTION__ +# else +# define CV__TRACE_FUNCTION "" +# endif +#endif + +//! Thread-local instance (usually allocated on stack) +class CV_EXPORTS Region +{ +public: + struct LocationExtraData; + struct LocationStaticStorage + { + LocationExtraData** ppExtra; //< implementation specific data + const char* name; //< region name (function name or other custom name) + const char* filename; //< source code filename + int line; //< source code line + int flags; //< flags (implementation code path: Plain, IPP, OpenCL) + }; + + Region(const LocationStaticStorage& location); + inline ~Region() + { + if (implFlags != 0) + destroy(); + CV_DbgAssert(implFlags == 0); + CV_DbgAssert(pImpl == NULL); + } + + class Impl; + Impl* pImpl; // NULL if current region is not active + int implFlags; // see RegionFlag, 0 if region is ignored + + bool isActive() const { return pImpl != NULL; } + + void destroy(); +private: + Region(const Region&); // disabled + Region& operator= (const Region&); // disabled +}; + +//! Specify region flags +enum RegionLocationFlag { + REGION_FLAG_FUNCTION = (1 << 0), //< region is function (=1) / nested named region (=0) + REGION_FLAG_APP_CODE = (1 << 1), //< region is Application code (=1) / OpenCV library code (=0) + REGION_FLAG_SKIP_NESTED = (1 << 2), //< avoid processing of nested regions + + REGION_FLAG_IMPL_IPP = (1 << 16), //< region is part of IPP code path + REGION_FLAG_IMPL_OPENCL = (2 << 16), //< region is part of OpenCL code path + REGION_FLAG_IMPL_OPENVX = (3 << 16), //< region is part of OpenVX code path + + REGION_FLAG_IMPL_MASK = (15 << 16), + + REGION_FLAG_REGION_FORCE = (1 << 30), + REGION_FLAG_REGION_NEXT = (1 << 31), //< close previous region (see #CV_TRACE_REGION_NEXT macro) + + ENUM_REGION_FLAG_FORCE_INT = INT_MAX +}; + +struct CV_EXPORTS TraceArg { +public: + struct ExtraData; + ExtraData** ppExtra; + const char* name; + int flags; +}; +/** @brief Add meta information to current region (function) + * See CV_TRACE_ARG macro + * @param arg argument information structure (global static cache) + * @param value argument value (can by dynamic string literal in case of string, static allocation is not required) + */ +CV_EXPORTS void traceArg(const TraceArg& arg, const char* value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int64 value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, double value); + +#define CV__TRACE_LOCATION_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_, loc_id), __LINE__) +#define CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_extra_, loc_id) , __LINE__) + +#define CV__TRACE_DEFINE_LOCATION_(loc_id, name, flags) \ + static CV_TRACE_NS::details::Region::LocationExtraData* CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) = 0; \ + static const CV_TRACE_NS::details::Region::LocationStaticStorage \ + CV__TRACE_LOCATION_VARNAME(loc_id) = { &(CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id)), name, CV_TRACE_FILENAME, __LINE__, flags}; + +#define CV__TRACE_DEFINE_LOCATION_FN(name, flags) CV__TRACE_DEFINE_LOCATION_(fn, name, ((flags) | CV_TRACE_NS::details::REGION_FLAG_FUNCTION)) + + +#define CV__TRACE_OPENCV_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME_SKIP_NESTED(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED | CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_REGION_(name_as_static_string_literal, flags) \ + CV__TRACE_DEFINE_LOCATION_(region, name_as_static_string_literal, flags); \ + CV_TRACE_NS::details::Region CVAUX_CONCAT(__region_, __LINE__)(CV__TRACE_LOCATION_VARNAME(region)); + +#define CV__TRACE_REGION(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, 0) +#define CV__TRACE_REGION_NEXT(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, CV_TRACE_NS::details::REGION_FLAG_REGION_NEXT) + +#define CV__TRACE_ARG_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_ ## arg_id, __LINE__) +#define CV__TRACE_ARG_EXTRA_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_extra_ ## arg_id, __LINE__) + +#define CV__TRACE_DEFINE_ARG_(arg_id, name, flags) \ + static CV_TRACE_NS::details::TraceArg::ExtraData* CV__TRACE_ARG_EXTRA_VARNAME(arg_id) = 0; \ + static const CV_TRACE_NS::details::TraceArg \ + CV__TRACE_ARG_VARNAME(arg_id) = { &(CV__TRACE_ARG_EXTRA_VARNAME(arg_id)), name, flags }; + +#define CV__TRACE_ARG_VALUE(arg_id, arg_name, value) \ + CV__TRACE_DEFINE_ARG_(arg_id, arg_name, 0); \ + CV_TRACE_NS::details::traceArg((CV__TRACE_ARG_VARNAME(arg_id)), value); + +#define CV__TRACE_ARG(arg_id) CV_TRACE_ARG_VALUE(arg_id, #arg_id, (arg_id)) + +} // namespace + +#ifndef OPENCV_DISABLE_TRACE +#undef CV_TRACE_FUNCTION +#undef CV_TRACE_FUNCTION_SKIP_NESTED +#if __OPENCV_TRACE +#define CV_TRACE_FUNCTION CV__TRACE_OPENCV_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED +#else +#define CV_TRACE_FUNCTION CV__TRACE_APP_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_APP_FUNCTION_SKIP_NESTED +#endif + +#undef CV_TRACE_REGION +#define CV_TRACE_REGION CV__TRACE_REGION + +#undef CV_TRACE_REGION_NEXT +#define CV_TRACE_REGION_NEXT CV__TRACE_REGION_NEXT + +#undef CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) \ + if (__region_fn.isActive()) \ + { \ + CV__TRACE_ARG_VALUE(arg_id, arg_name, value); \ + } + +#undef CV_TRACE_ARG +#define CV_TRACE_ARG CV__TRACE_ARG + +#endif // OPENCV_DISABLE_TRACE + +#ifdef OPENCV_TRACE_VERBOSE +#define CV_TRACE_FUNCTION_VERBOSE CV_TRACE_FUNCTION +#define CV_TRACE_REGION_VERBOSE CV_TRACE_REGION +#define CV_TRACE_REGION_NEXT_VERBOSE CV_TRACE_REGION_NEXT +#define CV_TRACE_ARG_VALUE_VERBOSE CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VERBOSE CV_TRACE_ARG +#else +#define CV_TRACE_FUNCTION_VERBOSE(...) +#define CV_TRACE_REGION_VERBOSE(...) +#define CV_TRACE_REGION_NEXT_VERBOSE(...) +#define CV_TRACE_ARG_VALUE_VERBOSE(...) +#define CV_TRACE_ARG_VERBOSE(...) +#endif + +//! @endcond + +//! @} + +}}} // namespace + +#endif // OPENCV_TRACE_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/va_intel.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/va_intel.hpp new file mode 100644 index 0000000..b37ce75 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/va_intel.hpp @@ -0,0 +1,75 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2015, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_CORE_VA_INTEL_HPP +#define OPENCV_CORE_VA_INTEL_HPP + +#ifndef __cplusplus +# error va_intel.hpp header must be compiled as C++ +#endif + +#include "opencv2/core.hpp" +#include "ocl.hpp" + +#if defined(HAVE_VA) +# include "va/va.h" +#else // HAVE_VA +# if !defined(_VA_H_) + typedef void* VADisplay; + typedef unsigned int VASurfaceID; +# endif // !_VA_H_ +#endif // HAVE_VA + +namespace cv { namespace va_intel { + +/** @addtogroup core_va_intel +This section describes Intel VA-API/OpenCL (CL-VA) interoperability. + +To enable basic VA interoperability build OpenCV with libva library integration enabled: `-DWITH_VA=ON` (corresponding dev package should be installed). + +To enable advanced CL-VA interoperability support on Intel HW, enable option: `-DWITH_VA_INTEL=ON` (OpenCL integration should be enabled which is the default setting). Special runtime environment should be set up in order to use this feature: correct combination of [libva](https://github.com/intel/libva), [OpenCL runtime](https://github.com/intel/compute-runtime) and [media driver](https://github.com/intel/media-driver) should be installed. + +Check usage example for details: samples/va_intel/va_intel_interop.cpp +*/ +//! @{ + +/////////////////// CL-VA Interoperability Functions /////////////////// + +namespace ocl { +using namespace cv::ocl; + +// TODO static functions in the Context class +/** @brief Creates OpenCL context from VA. +@param display - VADisplay for which CL interop should be established. +@param tryInterop - try to set up for interoperability, if true; set up for use slow copy if false. +@return Returns reference to OpenCL Context + */ +CV_EXPORTS Context& initializeContextFromVA(VADisplay display, bool tryInterop = true); + +} // namespace cv::va_intel::ocl + +/** @brief Converts InputArray to VASurfaceID object. +@param display - VADisplay object. +@param src - source InputArray. +@param surface - destination VASurfaceID object. +@param size - size of image represented by VASurfaceID object. + */ +CV_EXPORTS void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size); + +/** @brief Converts VASurfaceID object to OutputArray. +@param display - VADisplay object. +@param surface - source VASurfaceID object. +@param size - size of image represented by VASurfaceID object. +@param dst - destination OutputArray. + */ +CV_EXPORTS void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, OutputArray dst); + +//! @} + +}} // namespace cv::va_intel + +#endif /* OPENCV_CORE_VA_INTEL_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/version.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/version.hpp new file mode 100644 index 0000000..7b129b6 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/version.hpp @@ -0,0 +1,26 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_VERSION_HPP +#define OPENCV_VERSION_HPP + +#define CV_VERSION_MAJOR 4 +#define CV_VERSION_MINOR 6 +#define CV_VERSION_REVISION 0 +#define CV_VERSION_STATUS "" + +#define CVAUX_STR_EXP(__A) #__A +#define CVAUX_STR(__A) CVAUX_STR_EXP(__A) + +#define CVAUX_STRW_EXP(__A) L ## #__A +#define CVAUX_STRW(__A) CVAUX_STRW_EXP(__A) + +#define CV_VERSION CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS + +/* old style version constants*/ +#define CV_MAJOR_VERSION CV_VERSION_MAJOR +#define CV_MINOR_VERSION CV_VERSION_MINOR +#define CV_SUBMINOR_VERSION CV_VERSION_REVISION + +#endif // OPENCV_VERSION_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/vsx_utils.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/vsx_utils.hpp new file mode 100644 index 0000000..79a1074 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/core/vsx_utils.hpp @@ -0,0 +1,1047 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_HAL_VSX_UTILS_HPP +#define OPENCV_HAL_VSX_UTILS_HPP + +#include "opencv2/core/cvdef.h" + +#ifndef SKIP_INCLUDES +# include +#endif + +//! @addtogroup core_utils_vsx +//! @{ +#if CV_VSX + +#define __VSX_S16__(c, v) (c){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v} +#define __VSX_S8__(c, v) (c){v, v, v, v, v, v, v, v} +#define __VSX_S4__(c, v) (c){v, v, v, v} +#define __VSX_S2__(c, v) (c){v, v} + +typedef __vector unsigned char vec_uchar16; +#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__} +#define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, (unsigned char)c)) +#define vec_uchar16_c(v) ((vec_uchar16)(v)) +#define vec_uchar16_z vec_uchar16_sp(0) + +typedef __vector signed char vec_char16; +#define vec_char16_set(...) (vec_char16){__VA_ARGS__} +#define vec_char16_sp(c) (__VSX_S16__(vec_char16, (signed char)c)) +#define vec_char16_c(v) ((vec_char16)(v)) +#define vec_char16_z vec_char16_sp(0) + +typedef __vector unsigned short vec_ushort8; +#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__} +#define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, (unsigned short)c)) +#define vec_ushort8_c(v) ((vec_ushort8)(v)) +#define vec_ushort8_z vec_ushort8_sp(0) + +typedef __vector signed short vec_short8; +#define vec_short8_set(...) (vec_short8){__VA_ARGS__} +#define vec_short8_sp(c) (__VSX_S8__(vec_short8, (signed short)c)) +#define vec_short8_c(v) ((vec_short8)(v)) +#define vec_short8_z vec_short8_sp(0) + +typedef __vector unsigned int vec_uint4; +#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__} +#define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, (unsigned int)c)) +#define vec_uint4_c(v) ((vec_uint4)(v)) +#define vec_uint4_z vec_uint4_sp(0) + +typedef __vector signed int vec_int4; +#define vec_int4_set(...) (vec_int4){__VA_ARGS__} +#define vec_int4_sp(c) (__VSX_S4__(vec_int4, (signed int)c)) +#define vec_int4_c(v) ((vec_int4)(v)) +#define vec_int4_z vec_int4_sp(0) + +typedef __vector float vec_float4; +#define vec_float4_set(...) (vec_float4){__VA_ARGS__} +#define vec_float4_sp(c) (__VSX_S4__(vec_float4, c)) +#define vec_float4_c(v) ((vec_float4)(v)) +#define vec_float4_z vec_float4_sp(0) + +typedef __vector unsigned long long vec_udword2; +#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__} +#define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, (unsigned long long)c)) +#define vec_udword2_c(v) ((vec_udword2)(v)) +#define vec_udword2_z vec_udword2_sp(0) + +typedef __vector signed long long vec_dword2; +#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__} +#define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, (signed long long)c)) +#define vec_dword2_c(v) ((vec_dword2)(v)) +#define vec_dword2_z vec_dword2_sp(0) + +typedef __vector double vec_double2; +#define vec_double2_set(...) (vec_double2){__VA_ARGS__} +#define vec_double2_c(v) ((vec_double2)(v)) +#define vec_double2_sp(c) (__VSX_S2__(vec_double2, c)) +#define vec_double2_z vec_double2_sp(0) + +#define vec_bchar16 __vector __bool char +#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__} +#define vec_bchar16_c(v) ((vec_bchar16)(v)) + +#define vec_bshort8 __vector __bool short +#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__} +#define vec_bshort8_c(v) ((vec_bshort8)(v)) + +#define vec_bint4 __vector __bool int +#define vec_bint4_set(...) (vec_bint4){__VA_ARGS__} +#define vec_bint4_c(v) ((vec_bint4)(v)) + +#define vec_bdword2 __vector __bool long long +#define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__} +#define vec_bdword2_c(v) ((vec_bdword2)(v)) + +#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline)) + +#define VSX_REDIRECT_1RG(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fn2(a); } + +#define VSX_REDIRECT_2RG(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); } + +/* + * GCC VSX compatibility +**/ +#if defined(__GNUG__) && !defined(__clang__) + +// inline asm helper +#define VSX_IMPL_1RG(rt, rg, opc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=wa" (rs) : "wa" (a)); return rs; } + +#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ rt rs; __asm__ __volatile__(#opc" %0,%1" : "=v" (rs) : "v" (a)); return rs; } + +#define VSX_IMPL_2VRG_F(rt, rg, fopc, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a, const rg& b) \ +{ rt rs; __asm__ __volatile__(fopc : "=v" (rs) : "v" (a), "v" (b)); return rs; } + +#define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm) + +#if __GNUG__ < 8 + + // Support for int4 -> dword2 expanding multiply was added in GCC 8. + #ifdef vec_mule + #undef vec_mule + #endif + #ifdef vec_mulo + #undef vec_mulo + #endif + + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mulo, __builtin_vec_mulo) + + // dword2 support arrived in ISA 2.07 and GCC 8+ + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulosw, vec_mule) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmulouw, vec_mule) + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulesw, vec_mulo) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmuleuw, vec_mulo) + +#endif + +#if __GNUG__ < 7 +// up to GCC 6 vec_mul only supports precisions and llong +# ifdef vec_mul +# undef vec_mul +# endif +/* + * there's no a direct instruction for supporting 8-bit, 16-bit multiplication in ISA 2.07, + * XLC Implement it by using instruction "multiply even", "multiply odd" and "permute" +**/ +# define VSX_IMPL_MULH(Tvec, cperm) \ + VSX_FINLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b) \ + { \ + static const vec_uchar16 ev_od = {cperm}; \ + return vec_perm((Tvec)vec_mule(a, b), (Tvec)vec_mulo(a, b), ev_od); \ + } + #define VSX_IMPL_MULH_P16 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 + VSX_IMPL_MULH(vec_char16, VSX_IMPL_MULH_P16) + VSX_IMPL_MULH(vec_uchar16, VSX_IMPL_MULH_P16) + #define VSX_IMPL_MULH_P8 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29 + VSX_IMPL_MULH(vec_short8, VSX_IMPL_MULH_P8) + VSX_IMPL_MULH(vec_ushort8, VSX_IMPL_MULH_P8) + // vmuluwm can be used for unsigned or signed integers, that's what they said + VSX_IMPL_2VRG(vec_int4, vec_int4, vmuluwm, vec_mul) + VSX_IMPL_2VRG(vec_uint4, vec_uint4, vmuluwm, vec_mul) + // redirect to GCC builtin vec_mul, since it already supports precisions and llong + VSX_REDIRECT_2RG(vec_float4, vec_float4, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mul, __builtin_vec_mul) + VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mul, __builtin_vec_mul) +#endif // __GNUG__ < 7 + +#if __GNUG__ < 6 +/* + * Instruction "compare greater than or equal" in ISA 2.07 only supports single + * and double precision. + * In XLC and new versions of GCC implement integers by using instruction "greater than" and NOR. +**/ +# ifdef vec_cmpge +# undef vec_cmpge +# endif +# ifdef vec_cmple +# undef vec_cmple +# endif +# define vec_cmple(a, b) vec_cmpge(b, a) +# define VSX_IMPL_CMPGE(rt, rg, opc, fnm) \ + VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%2,%1\n\t xxlnor %x0,%x0,%x0", fnm) + + VSX_IMPL_CMPGE(vec_bchar16, vec_char16, vcmpgtsb, vec_cmpge) + VSX_IMPL_CMPGE(vec_bchar16, vec_uchar16, vcmpgtub, vec_cmpge) + VSX_IMPL_CMPGE(vec_bshort8, vec_short8, vcmpgtsh, vec_cmpge) + VSX_IMPL_CMPGE(vec_bshort8, vec_ushort8, vcmpgtuh, vec_cmpge) + VSX_IMPL_CMPGE(vec_bint4, vec_int4, vcmpgtsw, vec_cmpge) + VSX_IMPL_CMPGE(vec_bint4, vec_uint4, vcmpgtuw, vec_cmpge) + VSX_IMPL_CMPGE(vec_bdword2, vec_dword2, vcmpgtsd, vec_cmpge) + VSX_IMPL_CMPGE(vec_bdword2, vec_udword2, vcmpgtud, vec_cmpge) + +// redirect to GCC builtin cmpge, since it already supports precisions + VSX_REDIRECT_2RG(vec_bint4, vec_float4, vec_cmpge, __builtin_vec_cmpge) + VSX_REDIRECT_2RG(vec_bdword2, vec_double2, vec_cmpge, __builtin_vec_cmpge) + +// up to gcc5 vec_nor doesn't support bool long long +# undef vec_nor + template + VSX_REDIRECT_2RG(T, T, vec_nor, __builtin_vec_nor) + + VSX_FINLINE(vec_bdword2) vec_nor(const vec_bdword2& a, const vec_bdword2& b) + { return vec_bdword2_c(__builtin_vec_nor(vec_dword2_c(a), vec_dword2_c(b))); } + +// vec_packs doesn't support double words in gcc4 and old versions of gcc5 +# undef vec_packs + VSX_REDIRECT_2RG(vec_char16, vec_short8, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_uchar16, vec_ushort8, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_short8, vec_int4, vec_packs, __builtin_vec_packs) + VSX_REDIRECT_2RG(vec_ushort8, vec_uint4, vec_packs, __builtin_vec_packs) + + VSX_IMPL_2VRG_F(vec_int4, vec_dword2, "vpksdss %0,%2,%1", vec_packs) + VSX_IMPL_2VRG_F(vec_uint4, vec_udword2, "vpkudus %0,%2,%1", vec_packs) +#endif // __GNUG__ < 6 + +#if __GNUG__ < 5 +// vec_xxpermdi in gcc4 missing little-endian supports just like clang +# define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) +// same as vec_xxpermdi +# undef vec_vbpermq + VSX_IMPL_2VRG(vec_udword2, vec_uchar16, vbpermq, vec_vbpermq) + VSX_IMPL_2VRG(vec_dword2, vec_char16, vbpermq, vec_vbpermq) +#else +# define vec_permi vec_xxpermdi +#endif // __GNUG__ < 5 + +// shift left double by word immediate +#ifndef vec_sldw +# define vec_sldw __builtin_vsx_xxsldwi +#endif + +// vector population count +VSX_IMPL_1VRG(vec_uchar16, vec_uchar16, vpopcntb, vec_popcntu) +VSX_IMPL_1VRG(vec_uchar16, vec_char16, vpopcntb, vec_popcntu) +VSX_IMPL_1VRG(vec_ushort8, vec_ushort8, vpopcnth, vec_popcntu) +VSX_IMPL_1VRG(vec_ushort8, vec_short8, vpopcnth, vec_popcntu) +VSX_IMPL_1VRG(vec_uint4, vec_uint4, vpopcntw, vec_popcntu) +VSX_IMPL_1VRG(vec_uint4, vec_int4, vpopcntw, vec_popcntu) +VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcntu) +VSX_IMPL_1VRG(vec_udword2, vec_dword2, vpopcntd, vec_popcntu) + +// converts between single and double-precision +VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) +VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) + +// converts word and doubleword to double-precision +#undef vec_ctd +VSX_IMPL_1RG(vec_double2, vec_int4, xvcvsxwdp, vec_ctdo) +VSX_IMPL_1RG(vec_double2, vec_uint4, xvcvuxwdp, vec_ctdo) +VSX_IMPL_1RG(vec_double2, vec_dword2, xvcvsxddp, vec_ctd) +VSX_IMPL_1RG(vec_double2, vec_udword2, xvcvuxddp, vec_ctd) + +// converts word and doubleword to single-precision +#undef vec_ctf +VSX_IMPL_1RG(vec_float4, vec_int4, xvcvsxwsp, vec_ctf) +VSX_IMPL_1RG(vec_float4, vec_uint4, xvcvuxwsp, vec_ctf) +VSX_IMPL_1RG(vec_float4, vec_dword2, xvcvsxdsp, vec_ctfo) +VSX_IMPL_1RG(vec_float4, vec_udword2, xvcvuxdsp, vec_ctfo) + +// converts single and double precision to signed word +#undef vec_cts +VSX_IMPL_1RG(vec_int4, vec_double2, xvcvdpsxws, vec_ctso) +VSX_IMPL_1RG(vec_int4, vec_float4, xvcvspsxws, vec_cts) + +// converts single and double precision to unsigned word +#undef vec_ctu +VSX_IMPL_1RG(vec_uint4, vec_double2, xvcvdpuxws, vec_ctuo) +VSX_IMPL_1RG(vec_uint4, vec_float4, xvcvspuxws, vec_ctu) + +// converts single and double precision to signed doubleword +#undef vec_ctsl +VSX_IMPL_1RG(vec_dword2, vec_double2, xvcvdpsxds, vec_ctsl) +VSX_IMPL_1RG(vec_dword2, vec_float4, xvcvspsxds, vec_ctslo) + +// converts single and double precision to unsigned doubleword +#undef vec_ctul +VSX_IMPL_1RG(vec_udword2, vec_double2, xvcvdpuxds, vec_ctul) +VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo) + +// just in case if GCC doesn't define it +#ifndef vec_xl +# define vec_xl vec_vsx_ld +# define vec_xst vec_vsx_st +#endif + +#endif // GCC VSX compatibility + +/* + * CLANG VSX compatibility +**/ +#if defined(__clang__) && !defined(__IBMCPP__) + +/* + * CLANG doesn't support %x in the inline asm template which fixes register number + * when using any of the register constraints wa, wd, wf + * + * For more explanation checkout PowerPC and IBM RS6000 in https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html + * Also there's already an open bug https://bugs.llvm.org/show_bug.cgi?id=31837 + * + * So we're not able to use inline asm and only use built-in functions that CLANG supports + * and use __builtin_convertvector if clang missing any of vector conversions built-in functions + * + * todo: clang asm template bug is fixed, need to reconsider the current workarounds. +*/ + +// convert vector helper +#define VSX_IMPL_CONVERT(rt, rg, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } + +#ifndef vec_permi +#if __clang_major__ < 5 +// implement vec_permi in a dirty way +# define VSX_IMPL_CLANG_4_PERMI(Tvec) \ + VSX_FINLINE(Tvec) vec_permi(const Tvec& a, const Tvec& b, unsigned const char c) \ + { \ + switch (c) \ + { \ + case 0: \ + return vec_mergeh(a, b); \ + case 1: \ + return vec_mergel(vec_mergeh(a, a), b); \ + case 2: \ + return vec_mergeh(vec_mergel(a, a), b); \ + default: \ + return vec_mergel(a, b); \ + } \ + } + VSX_IMPL_CLANG_4_PERMI(vec_udword2) + VSX_IMPL_CLANG_4_PERMI(vec_dword2) + VSX_IMPL_CLANG_4_PERMI(vec_double2) + +// vec_xxsldwi is missing in clang 4 +# define vec_xxsldwi(a, b, c) vec_sld(a, b, (c) * 4) +#else +// vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4 +# define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) +#endif // __clang_major__ < 5 +#endif + +// shift left double by word immediate +#ifndef vec_sldw +# define vec_sldw vec_xxsldwi +#endif + +#if __clang_major__ < 13 +// Implement vec_rsqrt since clang only supports vec_rsqrte +#ifndef vec_rsqrt + VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a) + { return vec_div(vec_float4_sp(1), vec_sqrt(a)); } + + VSX_FINLINE(vec_double2) vec_rsqrt(const vec_double2& a) + { return vec_div(vec_double2_sp(1), vec_sqrt(a)); } +#endif + +// vec_promote missing support for doubleword +VSX_FINLINE(vec_dword2) vec_promote(long long a, int b) +{ + vec_dword2 ret = vec_dword2_z; + ret[b & 1] = a; + return ret; +} + +VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b) +{ + vec_udword2 ret = vec_udword2_z; + ret[b & 1] = a; + return ret; +} +#endif + +// vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt +#define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast) \ +VSX_FINLINE(Tvec) vec_popcntu(const Tvec2& a) \ +{ return ucast(vec_popcnt(a)); } +VSX_IMPL_POPCNTU(vec_uchar16, vec_char16, vec_uchar16_c); +VSX_IMPL_POPCNTU(vec_ushort8, vec_short8, vec_ushort8_c); +VSX_IMPL_POPCNTU(vec_uint4, vec_int4, vec_uint4_c); +VSX_IMPL_POPCNTU(vec_udword2, vec_dword2, vec_udword2_c); +// redirect unsigned types +VSX_REDIRECT_1RG(vec_uchar16, vec_uchar16, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_ushort8, vec_ushort8, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_uint4, vec_uint4, vec_popcntu, vec_popcnt) +VSX_REDIRECT_1RG(vec_udword2, vec_udword2, vec_popcntu, vec_popcnt) + +// converts between single and double precision +VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) +VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) + +// converts word and doubleword to double-precision +#ifdef vec_ctd +# undef vec_ctd +#endif +VSX_REDIRECT_1RG(vec_double2, vec_int4, vec_ctdo, __builtin_vsx_xvcvsxwdp) +VSX_REDIRECT_1RG(vec_double2, vec_uint4, vec_ctdo, __builtin_vsx_xvcvuxwdp) + +VSX_IMPL_CONVERT(vec_double2, vec_dword2, vec_ctd) +VSX_IMPL_CONVERT(vec_double2, vec_udword2, vec_ctd) + +// converts word and doubleword to single-precision +#if __clang_major__ > 4 +# undef vec_ctf +#endif +VSX_IMPL_CONVERT(vec_float4, vec_int4, vec_ctf) +VSX_IMPL_CONVERT(vec_float4, vec_uint4, vec_ctf) +VSX_REDIRECT_1RG(vec_float4, vec_dword2, vec_ctfo, __builtin_vsx_xvcvsxdsp) +VSX_REDIRECT_1RG(vec_float4, vec_udword2, vec_ctfo, __builtin_vsx_xvcvuxdsp) + +// converts single and double precision to signed word +#if __clang_major__ > 4 +# undef vec_cts +#endif +VSX_REDIRECT_1RG(vec_int4, vec_double2, vec_ctso, __builtin_vsx_xvcvdpsxws) +VSX_IMPL_CONVERT(vec_int4, vec_float4, vec_cts) + +// converts single and double precision to unsigned word +#if __clang_major__ > 4 +# undef vec_ctu +#endif +VSX_REDIRECT_1RG(vec_uint4, vec_double2, vec_ctuo, __builtin_vsx_xvcvdpuxws) +VSX_IMPL_CONVERT(vec_uint4, vec_float4, vec_ctu) + +// converts single and double precision to signed doubleword +#ifdef vec_ctsl +# undef vec_ctsl +#endif +VSX_IMPL_CONVERT(vec_dword2, vec_double2, vec_ctsl) +// __builtin_convertvector unable to convert, xvcvspsxds is missing on it +VSX_FINLINE(vec_dword2) vec_ctslo(const vec_float4& a) +{ return vec_ctsl(vec_cvfo(a)); } + +// converts single and double precision to unsigned doubleword +#ifdef vec_ctul +# undef vec_ctul +#endif +VSX_IMPL_CONVERT(vec_udword2, vec_double2, vec_ctul) +// __builtin_convertvector unable to convert, xvcvspuxds is missing on it +VSX_FINLINE(vec_udword2) vec_ctulo(const vec_float4& a) +{ return vec_ctul(vec_cvfo(a)); } + +#endif // CLANG VSX compatibility + +/* + * Common GCC, CLANG compatibility +**/ +#if defined(__GNUG__) && !defined(__IBMCPP__) + +#ifdef vec_cvf +# undef vec_cvf +#endif + +#define VSX_IMPL_CONV_EVEN_4_2(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ return fn2(vec_sldw(a, a, 1)); } + +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_float4, vec_cvf, vec_cvfo) +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_int4, vec_ctd, vec_ctdo) +VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_uint4, vec_ctd, vec_ctdo) + +VSX_IMPL_CONV_EVEN_4_2(vec_dword2, vec_float4, vec_ctsl, vec_ctslo) +VSX_IMPL_CONV_EVEN_4_2(vec_udword2, vec_float4, vec_ctul, vec_ctulo) + +#define VSX_IMPL_CONV_EVEN_2_4(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ \ + rt v4 = fn2(a); \ + return vec_sldw(v4, v4, 3); \ +} + +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_double2, vec_cvf, vec_cvfo) +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_dword2, vec_ctf, vec_ctfo) +VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_udword2, vec_ctf, vec_ctfo) + +VSX_IMPL_CONV_EVEN_2_4(vec_int4, vec_double2, vec_cts, vec_ctso) +VSX_IMPL_CONV_EVEN_2_4(vec_uint4, vec_double2, vec_ctu, vec_ctuo) + +// Only for Eigen! +/* + * changing behavior of conversion intrinsics for gcc has effect on Eigen + * so we redefine old behavior again only on gcc, clang +*/ +#if !defined(__clang__) || __clang_major__ > 4 + // ignoring second arg since Eigen only truncates toward zero +# define VSX_IMPL_CONV_2VARIANT(rt, rg, fnm, fn2) \ + VSX_FINLINE(rt) fnm(const rg& a, int only_truncate) \ + { \ + assert(only_truncate == 0); \ + CV_UNUSED(only_truncate); \ + return fn2(a); \ + } + VSX_IMPL_CONV_2VARIANT(vec_int4, vec_float4, vec_cts, vec_cts) + VSX_IMPL_CONV_2VARIANT(vec_uint4, vec_float4, vec_ctu, vec_ctu) + VSX_IMPL_CONV_2VARIANT(vec_float4, vec_int4, vec_ctf, vec_ctf) + VSX_IMPL_CONV_2VARIANT(vec_float4, vec_uint4, vec_ctf, vec_ctf) + // define vec_cts for converting double precision to signed doubleword + // which isn't compatible with xlc but its okay since Eigen only uses it for gcc + VSX_IMPL_CONV_2VARIANT(vec_dword2, vec_double2, vec_cts, vec_ctsl) +#endif // Eigen + +#endif // Common GCC, CLANG compatibility + +/* + * XLC VSX compatibility +**/ +#if defined(__IBMCPP__) + +// vector population count +#define vec_popcntu vec_popcnt + +// overload and redirect with setting second arg to zero +// since we only support conversions without the second arg +#define VSX_IMPL_OVERLOAD_Z2(rt, rg, fnm) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fnm(a, 0); } + +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_int4, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_uint4, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_dword2, vec_ctd) +VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_udword2, vec_ctd) + +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_int4, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_uint4, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_dword2, vec_ctf) +VSX_IMPL_OVERLOAD_Z2(vec_float4, vec_udword2, vec_ctf) + +VSX_IMPL_OVERLOAD_Z2(vec_int4, vec_double2, vec_cts) +VSX_IMPL_OVERLOAD_Z2(vec_int4, vec_float4, vec_cts) + +VSX_IMPL_OVERLOAD_Z2(vec_uint4, vec_double2, vec_ctu) +VSX_IMPL_OVERLOAD_Z2(vec_uint4, vec_float4, vec_ctu) + +VSX_IMPL_OVERLOAD_Z2(vec_dword2, vec_double2, vec_ctsl) +VSX_IMPL_OVERLOAD_Z2(vec_dword2, vec_float4, vec_ctsl) + +VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_double2, vec_ctul) +VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_float4, vec_ctul) + +// fixme: implement conversions of odd-numbered elements in a dirty way +// since xlc doesn't support VSX registers operand in inline asm. +#define VSX_IMPL_CONV_ODD_4_2(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) { return fn2(vec_sldw(a, a, 3)); } + +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_float4, vec_cvfo, vec_cvf) +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_int4, vec_ctdo, vec_ctd) +VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_uint4, vec_ctdo, vec_ctd) + +VSX_IMPL_CONV_ODD_4_2(vec_dword2, vec_float4, vec_ctslo, vec_ctsl) +VSX_IMPL_CONV_ODD_4_2(vec_udword2, vec_float4, vec_ctulo, vec_ctul) + +#define VSX_IMPL_CONV_ODD_2_4(rt, rg, fnm, fn2) \ +VSX_FINLINE(rt) fnm(const rg& a) \ +{ \ + rt v4 = fn2(a); \ + return vec_sldw(v4, v4, 1); \ +} + +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_double2, vec_cvfo, vec_cvf) +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_dword2, vec_ctfo, vec_ctf) +VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_udword2, vec_ctfo, vec_ctf) + +VSX_IMPL_CONV_ODD_2_4(vec_int4, vec_double2, vec_ctso, vec_cts) +VSX_IMPL_CONV_ODD_2_4(vec_uint4, vec_double2, vec_ctuo, vec_ctu) + +#endif // XLC VSX compatibility + +// ignore GCC warning that caused by -Wunused-but-set-variable in rare cases +#if defined(__GNUG__) && !defined(__clang__) +# define VSX_UNUSED(Tvec) Tvec __attribute__((__unused__)) +#else // CLANG, XLC +# define VSX_UNUSED(Tvec) Tvec +#endif + +// gcc can find his way in casting log int and XLC, CLANG ambiguous +#if defined(__clang__) || defined(__IBMCPP__) + VSX_FINLINE(vec_udword2) vec_splats(uint64 v) + { return vec_splats((unsigned long long) v); } + + VSX_FINLINE(vec_dword2) vec_splats(int64 v) + { return vec_splats((long long) v); } + + VSX_FINLINE(vec_udword2) vec_promote(uint64 a, int b) + { return vec_promote((unsigned long long) a, b); } + + VSX_FINLINE(vec_dword2) vec_promote(int64 a, int b) + { return vec_promote((long long) a, b); } +#endif + +/* + * implement vsx_ld(offset, pointer), vsx_st(vector, offset, pointer) + * load and set using offset depend on the pointer type + * + * implement vsx_ldf(offset, pointer), vsx_stf(vector, offset, pointer) + * load and set using offset depend on fixed bytes size + * + * Note: In clang vec_xl and vec_xst fails to load unaligned addresses + * so we are using vec_vsx_ld, vec_vsx_st instead +*/ + +#if defined(__clang__) && !defined(__IBMCPP__) +# define vsx_ldf vec_vsx_ld +# define vsx_stf vec_vsx_st +#else // GCC , XLC +# define vsx_ldf vec_xl +# define vsx_stf vec_xst +#endif + +#define VSX_OFFSET(o, p) ((o) * sizeof(*(p))) +#define vsx_ld(o, p) vsx_ldf(VSX_OFFSET(o, p), p) +#define vsx_st(v, o, p) vsx_stf(v, VSX_OFFSET(o, p), p) + +/* + * implement vsx_ld2(offset, pointer), vsx_st2(vector, offset, pointer) to load and store double words + * In GCC vec_xl and vec_xst it maps to vec_vsx_ld, vec_vsx_st which doesn't support long long + * and in CLANG we are using vec_vsx_ld, vec_vsx_st because vec_xl, vec_xst fails to load unaligned addresses + * + * In XLC vec_xl and vec_xst fail to cast int64(long int) to long long +*/ +#if (defined(__GNUG__) || defined(__clang__)) && !defined(__IBMCPP__) + VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p) + { return vec_udword2_c(vsx_ldf(VSX_OFFSET(o, p), (unsigned int*)p)); } + + VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p) + { return vec_dword2_c(vsx_ldf(VSX_OFFSET(o, p), (int*)p)); } + + VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p) + { vsx_stf(vec_uint4_c(vec), VSX_OFFSET(o, p), (unsigned int*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p) + { vsx_stf(vec_int4_c(vec), VSX_OFFSET(o, p), (int*)p); } +#else // XLC + VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p) + { return vsx_ldf(VSX_OFFSET(o, p), (unsigned long long*)p); } + + VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p) + { return vsx_ldf(VSX_OFFSET(o, p), (long long*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p) + { vsx_stf(vec, VSX_OFFSET(o, p), (unsigned long long*)p); } + + VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p) + { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); } +#endif + +// Store lower 8 byte +#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0) + +// Store higher 8 byte +#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1) + +// Load 64-bits of integer data to lower part +#define VSX_IMPL_LOAD_L8(Tvec, Tp) \ +VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \ +{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } + +VSX_IMPL_LOAD_L8(vec_uchar16, uchar) +VSX_IMPL_LOAD_L8(vec_char16, schar) +VSX_IMPL_LOAD_L8(vec_ushort8, ushort) +VSX_IMPL_LOAD_L8(vec_short8, short) +VSX_IMPL_LOAD_L8(vec_uint4, uint) +VSX_IMPL_LOAD_L8(vec_int4, int) +VSX_IMPL_LOAD_L8(vec_float4, float) +VSX_IMPL_LOAD_L8(vec_udword2, uint64) +VSX_IMPL_LOAD_L8(vec_dword2, int64) +VSX_IMPL_LOAD_L8(vec_double2, double) + +// logical not +#define vec_not(a) vec_nor(a, a) + +// power9 yaya +// not equal +#ifndef vec_cmpne +# define vec_cmpne(a, b) vec_not(vec_cmpeq(a, b)) +#endif + +// absolute difference +#ifndef _ARCH_PWR9 +# undef vec_absd +# define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b)) +#endif + +/* + * Implement vec_unpacklu and vec_unpackhu + * since vec_unpackl, vec_unpackh only support signed integers +**/ +#define VSX_IMPL_UNPACKU(rt, rg, zero) \ +VSX_FINLINE(rt) vec_unpacklu(const rg& a) \ +{ return (rt)(vec_mergel(a, zero)); } \ +VSX_FINLINE(rt) vec_unpackhu(const rg& a) \ +{ return (rt)(vec_mergeh(a, zero)); } + +VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z) +VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z) +VSX_IMPL_UNPACKU(vec_udword2, vec_uint4, vec_uint4_z) + +/* + * Implement vec_mergesqe and vec_mergesqo + * Merges the sequence values of even and odd elements of two vectors +*/ +#define VSX_IMPL_PERM(rt, fnm, ...) \ +VSX_FINLINE(rt) fnm(const rt& a, const rt& b) \ +{ static const vec_uchar16 perm = {__VA_ARGS__}; return vec_perm(a, b, perm); } + +// 16 +#define perm16_mergesqe 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +#define perm16_mergesqo 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 +VSX_IMPL_PERM(vec_uchar16, vec_mergesqe, perm16_mergesqe) +VSX_IMPL_PERM(vec_uchar16, vec_mergesqo, perm16_mergesqo) +VSX_IMPL_PERM(vec_char16, vec_mergesqe, perm16_mergesqe) +VSX_IMPL_PERM(vec_char16, vec_mergesqo, perm16_mergesqo) +// 8 +#define perm8_mergesqe 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 +#define perm8_mergesqo 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 +VSX_IMPL_PERM(vec_ushort8, vec_mergesqe, perm8_mergesqe) +VSX_IMPL_PERM(vec_ushort8, vec_mergesqo, perm8_mergesqo) +VSX_IMPL_PERM(vec_short8, vec_mergesqe, perm8_mergesqe) +VSX_IMPL_PERM(vec_short8, vec_mergesqo, perm8_mergesqo) +// 4 +#define perm4_mergesqe 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +#define perm4_mergesqo 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +VSX_IMPL_PERM(vec_uint4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_uint4, vec_mergesqo, perm4_mergesqo) +VSX_IMPL_PERM(vec_int4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_int4, vec_mergesqo, perm4_mergesqo) +VSX_IMPL_PERM(vec_float4, vec_mergesqe, perm4_mergesqe) +VSX_IMPL_PERM(vec_float4, vec_mergesqo, perm4_mergesqo) +// 2 +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqo, vec_mergel) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqo, vec_mergel) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqe, vec_mergeh) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqo, vec_mergel) + +/* + * Implement vec_mergesqh and vec_mergesql + * Merges the sequence most and least significant halves of two vectors +*/ +#define VSX_IMPL_MERGESQHL(Tvec) \ +VSX_FINLINE(Tvec) vec_mergesqh(const Tvec& a, const Tvec& b) \ +{ return (Tvec)vec_mergeh(vec_udword2_c(a), vec_udword2_c(b)); } \ +VSX_FINLINE(Tvec) vec_mergesql(const Tvec& a, const Tvec& b) \ +{ return (Tvec)vec_mergel(vec_udword2_c(a), vec_udword2_c(b)); } +VSX_IMPL_MERGESQHL(vec_uchar16) +VSX_IMPL_MERGESQHL(vec_char16) +VSX_IMPL_MERGESQHL(vec_ushort8) +VSX_IMPL_MERGESQHL(vec_short8) +VSX_IMPL_MERGESQHL(vec_uint4) +VSX_IMPL_MERGESQHL(vec_int4) +VSX_IMPL_MERGESQHL(vec_float4) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesql, vec_mergel) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_dword2, vec_dword2, vec_mergesql, vec_mergel) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqh, vec_mergeh) +VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesql, vec_mergel) + + +// 2 and 4 channels interleave for all types except 2 lanes +#define VSX_IMPL_ST_INTERLEAVE(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \ +{ \ + vsx_stf(vec_mergeh(a, b), 0, ptr); \ + vsx_stf(vec_mergel(a, b), 16, ptr); \ +} \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, const Tvec& d, Tp* ptr) \ +{ \ + Tvec ac = vec_mergeh(a, c); \ + Tvec bd = vec_mergeh(b, d); \ + vsx_stf(vec_mergeh(ac, bd), 0, ptr); \ + vsx_stf(vec_mergel(ac, bd), 16, ptr); \ + ac = vec_mergel(a, c); \ + bd = vec_mergel(b, d); \ + vsx_stf(vec_mergeh(ac, bd), 32, ptr); \ + vsx_stf(vec_mergel(ac, bd), 48, ptr); \ +} +VSX_IMPL_ST_INTERLEAVE(uchar, vec_uchar16) +VSX_IMPL_ST_INTERLEAVE(schar, vec_char16) +VSX_IMPL_ST_INTERLEAVE(ushort, vec_ushort8) +VSX_IMPL_ST_INTERLEAVE(short, vec_short8) +VSX_IMPL_ST_INTERLEAVE(uint, vec_uint4) +VSX_IMPL_ST_INTERLEAVE(int, vec_int4) +VSX_IMPL_ST_INTERLEAVE(float, vec_float4) + +// 2 and 4 channels deinterleave for 16 lanes +#define VSX_IMPL_ST_DINTERLEAVE_8(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(16, ptr); \ + a = vec_mergesqe(v0, v1); \ + b = vec_mergesqo(v0, v1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(16, ptr); \ + Tvec v2 = vsx_ld(32, ptr); \ + Tvec v3 = vsx_ld(48, ptr); \ + Tvec m0 = vec_mergesqe(v0, v1); \ + Tvec m1 = vec_mergesqe(v2, v3); \ + a = vec_mergesqe(m0, m1); \ + c = vec_mergesqo(m0, m1); \ + m0 = vec_mergesqo(v0, v1); \ + m1 = vec_mergesqo(v2, v3); \ + b = vec_mergesqe(m0, m1); \ + d = vec_mergesqo(m0, m1); \ +} +VSX_IMPL_ST_DINTERLEAVE_8(uchar, vec_uchar16) +VSX_IMPL_ST_DINTERLEAVE_8(schar, vec_char16) + +// 2 and 4 channels deinterleave for 8 lanes +#define VSX_IMPL_ST_DINTERLEAVE_16(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(8, ptr); \ + a = vec_mergesqe(v0, v1); \ + b = vec_mergesqo(v0, v1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(8, ptr); \ + Tvec m0 = vec_mergeh(v0, v1); \ + Tvec m1 = vec_mergel(v0, v1); \ + Tvec ab0 = vec_mergeh(m0, m1); \ + Tvec cd0 = vec_mergel(m0, m1); \ + v0 = vsx_ld(16, ptr); \ + v1 = vsx_ld(24, ptr); \ + m0 = vec_mergeh(v0, v1); \ + m1 = vec_mergel(v0, v1); \ + Tvec ab1 = vec_mergeh(m0, m1); \ + Tvec cd1 = vec_mergel(m0, m1); \ + a = vec_mergesqh(ab0, ab1); \ + b = vec_mergesql(ab0, ab1); \ + c = vec_mergesqh(cd0, cd1); \ + d = vec_mergesql(cd0, cd1); \ +} +VSX_IMPL_ST_DINTERLEAVE_16(ushort, vec_ushort8) +VSX_IMPL_ST_DINTERLEAVE_16(short, vec_short8) + +// 2 and 4 channels deinterleave for 4 lanes +#define VSX_IMPL_ST_DINTERLEAVE_32(Tp, Tvec) \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + a = vsx_ld(0, ptr); \ + b = vsx_ld(4, ptr); \ + Tvec m0 = vec_mergeh(a, b); \ + Tvec m1 = vec_mergel(a, b); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = vsx_ld(0, ptr); \ + Tvec v1 = vsx_ld(4, ptr); \ + Tvec v2 = vsx_ld(8, ptr); \ + Tvec v3 = vsx_ld(12, ptr); \ + Tvec m0 = vec_mergeh(v0, v2); \ + Tvec m1 = vec_mergeh(v1, v3); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ + m0 = vec_mergel(v0, v2); \ + m1 = vec_mergel(v1, v3); \ + c = vec_mergeh(m0, m1); \ + d = vec_mergel(m0, m1); \ +} +VSX_IMPL_ST_DINTERLEAVE_32(uint, vec_uint4) +VSX_IMPL_ST_DINTERLEAVE_32(int, vec_int4) +VSX_IMPL_ST_DINTERLEAVE_32(float, vec_float4) + +// 2 and 4 channels interleave and deinterleave for 2 lanes +#define VSX_IMPL_ST_D_INTERLEAVE_64(Tp, Tvec, ld_func, st_func) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_mergel(a, b), 2, ptr); \ +} \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, const Tvec& d, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_mergeh(c, d), 2, ptr); \ + st_func(vec_mergel(a, b), 4, ptr); \ + st_func(vec_mergel(c, d), 6, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ +{ \ + Tvec m0 = ld_func(0, ptr); \ + Tvec m1 = ld_func(2, ptr); \ + a = vec_mergeh(m0, m1); \ + b = vec_mergel(m0, m1); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ + Tvec& c, Tvec& d) \ +{ \ + Tvec v0 = ld_func(0, ptr); \ + Tvec v1 = ld_func(2, ptr); \ + Tvec v2 = ld_func(4, ptr); \ + Tvec v3 = ld_func(6, ptr); \ + a = vec_mergeh(v0, v2); \ + b = vec_mergel(v0, v2); \ + c = vec_mergeh(v1, v3); \ + d = vec_mergel(v1, v3); \ +} +VSX_IMPL_ST_D_INTERLEAVE_64(int64, vec_dword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_D_INTERLEAVE_64(double, vec_double2, vsx_ld, vsx_st) + +/* 3 channels */ +#define VSX_IMPL_ST_INTERLEAVE_3CH_16(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + static const vec_uchar16 a12 = {0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5}; \ + static const vec_uchar16 a123 = {0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr); \ + static const vec_uchar16 b12 = {21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26}; \ + static const vec_uchar16 b123 = {0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 16, ptr); \ + static const vec_uchar16 c12 = {0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0}; \ + static const vec_uchar16 c123 = {26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31}; \ + vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 32, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(16, ptr); \ + Tvec v3 = vsx_ld(32, ptr); \ + static const vec_uchar16 a12_perm = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 20, 23, 26, 29}; \ + a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm); \ + static const vec_uchar16 b12_perm = {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 21, 24, 27, 30}; \ + b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm); \ + static const vec_uchar16 c12_perm = {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 19, 22, 25, 28, 31}; \ + c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_16(uchar, vec_uchar16) +VSX_IMPL_ST_INTERLEAVE_3CH_16(schar, vec_char16) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_8(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + static const vec_uchar16 a12 = {0, 1, 16, 17, 0, 0, 2, 3, 18, 19, 0, 0, 4, 5, 20, 21}; \ + static const vec_uchar16 a123 = {0, 1, 2, 3, 16, 17, 6, 7, 8, 9, 18, 19, 12, 13, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr); \ + static const vec_uchar16 b12 = {0, 0, 6, 7, 22, 23, 0, 0, 8, 9, 24, 25, 0, 0, 10, 11}; \ + static const vec_uchar16 b123 = {20, 21, 2, 3, 4, 5, 22, 23, 8, 9, 10, 11, 24, 25, 14, 15}; \ + vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 8, ptr); \ + static const vec_uchar16 c12 = {26, 27, 0, 0, 12, 13, 28, 29, 0, 0, 14, 15, 30, 31, 0, 0}; \ + static const vec_uchar16 c123 = {0, 1, 26, 27, 4, 5, 6, 7, 28, 29, 10, 11, 12, 13, 30, 31}; \ + vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 16, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(8, ptr); \ + Tvec v3 = vsx_ld(16, ptr); \ + static const vec_uchar16 a12_perm = {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 0, 0, 0, 0}; \ + static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 26, 27}; \ + a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm); \ + static const vec_uchar16 b12_perm = {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 22, 23, 28, 29}; \ + b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm); \ + static const vec_uchar16 c12_perm = {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 0, 0, 0, 0, 0, 0}; \ + static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 19, 24, 25, 30, 31}; \ + c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_8(ushort, vec_ushort8) +VSX_IMPL_ST_INTERLEAVE_3CH_8(short, vec_short8) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_4(Tp, Tvec) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + Tvec hbc = vec_mergeh(b, c); \ + static const vec_uchar16 ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7}; \ + vsx_st(vec_perm(a, hbc, ahbc), 0, ptr); \ + Tvec lab = vec_mergel(a, b); \ + vsx_st(vec_sld(lab, hbc, 8), 4, ptr); \ + static const vec_uchar16 clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};\ + vsx_st(vec_perm(c, lab, clab), 8, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = vsx_ld(0, ptr); \ + Tvec v2 = vsx_ld(4, ptr); \ + Tvec v3 = vsx_ld(8, ptr); \ + static const vec_uchar16 flp = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31}; \ + a = vec_perm(v1, vec_sld(v3, v2, 8), flp); \ + static const vec_uchar16 flp2 = {28, 29, 30, 31, 0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19}; \ + b = vec_perm(v2, vec_sld(v1, v3, 8), flp2); \ + c = vec_perm(vec_sld(v2, v1, 8), v3, flp); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_4(uint, vec_uint4) +VSX_IMPL_ST_INTERLEAVE_3CH_4(int, vec_int4) +VSX_IMPL_ST_INTERLEAVE_3CH_4(float, vec_float4) + +#define VSX_IMPL_ST_INTERLEAVE_3CH_2(Tp, Tvec, ld_func, st_func) \ +VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ + const Tvec& c, Tp* ptr) \ +{ \ + st_func(vec_mergeh(a, b), 0, ptr); \ + st_func(vec_permi(c, a, 1), 2, ptr); \ + st_func(vec_mergel(b, c), 4, ptr); \ +} \ +VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, \ + Tvec& b, Tvec& c) \ +{ \ + Tvec v1 = ld_func(0, ptr); \ + Tvec v2 = ld_func(2, ptr); \ + Tvec v3 = ld_func(4, ptr); \ + a = vec_permi(v1, v2, 1); \ + b = vec_permi(v1, v3, 2); \ + c = vec_permi(v2, v3, 1); \ +} +VSX_IMPL_ST_INTERLEAVE_3CH_2(int64, vec_dword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_INTERLEAVE_3CH_2(uint64, vec_udword2, vsx_ld2, vsx_st2) +VSX_IMPL_ST_INTERLEAVE_3CH_2(double, vec_double2, vsx_ld, vsx_st) + +#endif // CV_VSX + +//! @} + +#endif // OPENCV_HAL_VSX_UTILS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/cvconfig.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/cvconfig.h new file mode 100644 index 0000000..ceeeb8d --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/cvconfig.h @@ -0,0 +1,149 @@ +#ifndef OPENCV_CVCONFIG_H_INCLUDED +#define OPENCV_CVCONFIG_H_INCLUDED + +/* OpenCV compiled as static or dynamic libs */ +/* #undef BUILD_SHARED_LIBS */ + +/* OpenCV intrinsics optimized code */ +#define CV_ENABLE_INTRINSICS + +/* OpenCV additional optimized code */ +/* #undef CV_DISABLE_OPTIMIZATION */ + +/* Compile for 'real' NVIDIA GPU architectures */ +#define CUDA_ARCH_BIN "" + +/* NVIDIA GPU features are used */ +#define CUDA_ARCH_FEATURES "" + +/* Compile for 'virtual' NVIDIA PTX architectures */ +#define CUDA_ARCH_PTX "" + +/* AMD's Basic Linear Algebra Subprograms Library*/ +/* #undef HAVE_CLAMDBLAS */ + +/* AMD's OpenCL Fast Fourier Transform Library*/ +/* #undef HAVE_CLAMDFFT */ + +/* Clp support */ +/* #undef HAVE_CLP */ + +/* NVIDIA CUDA Runtime API*/ +/* #undef HAVE_CUDA */ + +/* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/ +/* #undef HAVE_CUBLAS */ + +/* NVIDIA CUDA Deep Neural Network (cuDNN) API*/ +/* #undef HAVE_CUDNN */ + +/* NVIDIA CUDA Fast Fourier Transform (FFT) API*/ +/* #undef HAVE_CUFFT */ + +/* DirectX */ +/* #undef HAVE_DIRECTX */ +/* #undef HAVE_DIRECTX_NV12 */ +/* #undef HAVE_D3D11 */ +/* #undef HAVE_D3D10 */ +/* #undef HAVE_D3D9 */ + +/* Eigen Matrix & Linear Algebra Library */ +/* #undef HAVE_EIGEN */ + +/* Geospatial Data Abstraction Library */ +/* #undef HAVE_GDAL */ + +/* Halide support */ +/* #undef HAVE_HALIDE */ + +/* Vulkan support */ +/* #undef HAVE_VULKAN */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_INTTYPES_H */ + +/* Intel Integrated Performance Primitives */ +/* #undef HAVE_IPP */ +/* #undef HAVE_IPP_ICV */ +/* #undef HAVE_IPP_IW */ +/* #undef HAVE_IPP_IW_LL */ + +/* JPEG-2000 codec */ +/* #undef HAVE_OPENJPEG */ +/* #undef HAVE_JASPER */ + +/* IJG JPEG codec */ +/* #undef HAVE_JPEG */ + +/* libpng/png.h needs to be included */ +/* #undef HAVE_LIBPNG_PNG_H */ + +/* GDCM DICOM codec */ +/* #undef HAVE_GDCM */ + +/* NVIDIA Video Decoding API*/ +/* #undef HAVE_NVCUVID */ +/* #undef HAVE_NVCUVID_HEADER */ +/* #undef HAVE_DYNLINK_NVCUVID_HEADER */ + +/* NVIDIA Video Encoding API*/ +/* #undef HAVE_NVCUVENC */ + +/* OpenCL Support */ +/* #undef HAVE_OPENCL */ +/* #undef HAVE_OPENCL_STATIC */ +/* #undef HAVE_OPENCL_SVM */ + +/* NVIDIA OpenCL D3D Extensions support */ +/* #undef HAVE_OPENCL_D3D11_NV */ + +/* OpenEXR codec */ +/* #undef HAVE_OPENEXR */ + +/* OpenGL support*/ +/* #undef HAVE_OPENGL */ + +/* PNG codec */ +/* #undef HAVE_PNG */ + +/* Posix threads (pthreads) */ +#define HAVE_PTHREAD + +/* parallel_for with pthreads */ +/* #undef HAVE_PTHREADS_PF */ + +/* Intel Threading Building Blocks */ +/* #undef HAVE_TBB */ + +/* Ste||ar Group High Performance ParallelX */ +/* #undef HAVE_HPX */ + +/* TIFF codec */ +/* #undef HAVE_TIFF */ + +/* Define if your processor stores words with the most significant byte + first (like Motorola and SPARC, unlike Intel and VAX). */ +/* #undef WORDS_BIGENDIAN */ + +/* VA library (libva) */ +/* #undef HAVE_VA */ + +/* Intel VA-API/OpenCL */ +/* #undef HAVE_VA_INTEL */ + +/* Lapack */ +/* #undef HAVE_LAPACK */ + +/* Library was compiled with functions instrumentation */ +/* #undef ENABLE_INSTRUMENTATION */ + +/* OpenVX */ +/* #undef HAVE_OPENVX */ + +/* OpenCV trace utilities */ +/* #undef OPENCV_TRACE */ + +/* Library QR-code decoding */ +/* #undef HAVE_QUIRC */ + +#endif // OPENCV_CVCONFIG_H_INCLUDED diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d.hpp new file mode 100644 index 0000000..60230e7 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d.hpp @@ -0,0 +1,1535 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_FEATURES_2D_HPP +#define OPENCV_FEATURES_2D_HPP + +#include "opencv2/opencv_modules.hpp" +#include "opencv2/core.hpp" + +#ifdef HAVE_OPENCV_FLANN +#include "opencv2/flann/miniflann.hpp" +#endif + +/** + @defgroup features2d 2D Features Framework + @{ + @defgroup features2d_main Feature Detection and Description + @defgroup features2d_match Descriptor Matchers + +Matchers of keypoint descriptors in OpenCV have wrappers with a common interface that enables you to +easily switch between different algorithms solving the same problem. This section is devoted to +matching descriptors that are represented as vectors in a multidimensional space. All objects that +implement vector descriptor matchers inherit the DescriptorMatcher interface. + + @defgroup features2d_draw Drawing Function of Keypoints and Matches + @defgroup features2d_category Result_Object Categorization + +This section describes approaches based on local 2D features and used to categorize objects. + + @defgroup feature2d_hal Hardware Acceleration Layer + @{ + @defgroup features2d_hal_interface Interface + @} + @} + */ + +namespace cv +{ + +//! @addtogroup features2d_main +//! @{ + +// //! writes vector of keypoints to the file storage +// CV_EXPORTS void write(FileStorage& fs, const String& name, const std::vector& keypoints); +// //! reads vector of keypoints from the specified file storage node +// CV_EXPORTS void read(const FileNode& node, CV_OUT std::vector& keypoints); + +/** @brief A class filters a vector of keypoints. + + Because now it is difficult to provide a convenient interface for all usage scenarios of the + keypoints filter class, it has only several needed by now static methods. + */ +class CV_EXPORTS KeyPointsFilter +{ +public: + KeyPointsFilter(){} + + /* + * Remove keypoints within borderPixels of an image edge. + */ + static void runByImageBorder( std::vector& keypoints, Size imageSize, int borderSize ); + /* + * Remove keypoints of sizes out of range. + */ + static void runByKeypointSize( std::vector& keypoints, float minSize, + float maxSize=FLT_MAX ); + /* + * Remove keypoints from some image by mask for pixels of this image. + */ + static void runByPixelsMask( std::vector& keypoints, const Mat& mask ); + /* + * Remove duplicated keypoints. + */ + static void removeDuplicated( std::vector& keypoints ); + /* + * Remove duplicated keypoints and sort the remaining keypoints + */ + static void removeDuplicatedSorted( std::vector& keypoints ); + + /* + * Retain the specified number of the best keypoints (according to the response) + */ + static void retainBest( std::vector& keypoints, int npoints ); +}; + + +/************************************ Base Classes ************************************/ + +/** @brief Abstract base class for 2D image feature detectors and descriptor extractors +*/ +#ifdef __EMSCRIPTEN__ +class CV_EXPORTS_W Feature2D : public Algorithm +#else +class CV_EXPORTS_W Feature2D : public virtual Algorithm +#endif +{ +public: + virtual ~Feature2D(); + + /** @brief Detects keypoints in an image (first variant) or image set (second variant). + + @param image Image. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer + matrix with non-zero values in the region of interest. + */ + CV_WRAP virtual void detect( InputArray image, + CV_OUT std::vector& keypoints, + InputArray mask=noArray() ); + + /** @overload + @param images Image set. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param masks Masks for each input image specifying where to look for keypoints (optional). + masks[i] is a mask for images[i]. + */ + CV_WRAP virtual void detect( InputArrayOfArrays images, + CV_OUT std::vector >& keypoints, + InputArrayOfArrays masks=noArray() ); + + /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set + (second variant). + + @param image Image. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. + */ + CV_WRAP virtual void compute( InputArray image, + CV_OUT CV_IN_OUT std::vector& keypoints, + OutputArray descriptors ); + + /** @overload + + @param images Image set. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. + */ + CV_WRAP virtual void compute( InputArrayOfArrays images, + CV_OUT CV_IN_OUT std::vector >& keypoints, + OutputArrayOfArrays descriptors ); + + /** Detects keypoints and computes the descriptors */ + CV_WRAP virtual void detectAndCompute( InputArray image, InputArray mask, + CV_OUT std::vector& keypoints, + OutputArray descriptors, + bool useProvidedKeypoints=false ); + + CV_WRAP virtual int descriptorSize() const; + CV_WRAP virtual int descriptorType() const; + CV_WRAP virtual int defaultNorm() const; + + CV_WRAP void write( const String& fileName ) const; + + CV_WRAP void read( const String& fileName ); + + virtual void write( FileStorage&) const CV_OVERRIDE; + + // see corresponding cv::Algorithm method + CV_WRAP virtual void read( const FileNode&) CV_OVERRIDE; + + //! Return true if detector object is empty + CV_WRAP virtual bool empty() const CV_OVERRIDE; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; + + // see corresponding cv::Algorithm method + CV_WRAP inline void write(const Ptr& fs, const String& name = String()) const { Algorithm::write(fs, name); } +}; + +/** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch +between different algorithms solving the same problem. All objects that implement keypoint detectors +inherit the FeatureDetector interface. */ +typedef Feature2D FeatureDetector; + +/** Extractors of keypoint descriptors in OpenCV have wrappers with a common interface that enables you +to easily switch between different algorithms solving the same problem. This section is devoted to +computing descriptors represented as vectors in a multidimensional space. All objects that implement +the vector descriptor extractors inherit the DescriptorExtractor interface. + */ +typedef Feature2D DescriptorExtractor; + + +/** @brief Class for implementing the wrapper which makes detectors and extractors to be affine invariant, +described as ASIFT in @cite YM11 . +*/ +class CV_EXPORTS_W AffineFeature : public Feature2D +{ +public: + /** + @param backend The detector/extractor you want to use as backend. + @param maxTilt The highest power index of tilt factor. 5 is used in the paper as tilt sampling range n. + @param minTilt The lowest power index of tilt factor. 0 is used in the paper. + @param tiltStep Tilt sampling step \f$\delta_t\f$ in Algorithm 1 in the paper. + @param rotateStepBase Rotation sampling step factor b in Algorithm 1 in the paper. + */ + CV_WRAP static Ptr create(const Ptr& backend, + int maxTilt = 5, int minTilt = 0, float tiltStep = 1.4142135623730951f, float rotateStepBase = 72); + + CV_WRAP virtual void setViewParams(const std::vector& tilts, const std::vector& rolls) = 0; + CV_WRAP virtual void getViewParams(std::vector& tilts, std::vector& rolls) const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +typedef AffineFeature AffineFeatureDetector; +typedef AffineFeature AffineDescriptorExtractor; + + +/** @brief Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform +(SIFT) algorithm by D. Lowe @cite Lowe04 . +*/ +class CV_EXPORTS_W SIFT : public Feature2D +{ +public: + /** + @param nfeatures The number of best features to retain. The features are ranked by their scores + (measured in SIFT algorithm as the local contrast) + + @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The + number of octaves is computed automatically from the image resolution. + + @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform + (low-contrast) regions. The larger the threshold, the less features are produced by the detector. + + @note The contrast threshold will be divided by nOctaveLayers when the filtering is applied. When + nOctaveLayers is set to default and if you want to use the value used in D. Lowe paper, 0.03, set + this argument to 0.09. + + @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning + is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are + filtered out (more features are retained). + + @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image + is captured with a weak camera with soft lenses, you might want to reduce the number. + */ + CV_WRAP static Ptr create(int nfeatures = 0, int nOctaveLayers = 3, + double contrastThreshold = 0.04, double edgeThreshold = 10, + double sigma = 1.6); + + /** @brief Create SIFT with specified descriptorType. + @param nfeatures The number of best features to retain. The features are ranked by their scores + (measured in SIFT algorithm as the local contrast) + + @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The + number of octaves is computed automatically from the image resolution. + + @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform + (low-contrast) regions. The larger the threshold, the less features are produced by the detector. + + @note The contrast threshold will be divided by nOctaveLayers when the filtering is applied. When + nOctaveLayers is set to default and if you want to use the value used in D. Lowe paper, 0.03, set + this argument to 0.09. + + @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning + is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are + filtered out (more features are retained). + + @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image + is captured with a weak camera with soft lenses, you might want to reduce the number. + + @param descriptorType The type of descriptors. Only CV_32F and CV_8U are supported. + */ + CV_WRAP static Ptr create(int nfeatures, int nOctaveLayers, + double contrastThreshold, double edgeThreshold, + double sigma, int descriptorType); + + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +typedef SIFT SiftFeatureDetector; +typedef SIFT SiftDescriptorExtractor; + + +/** @brief Class implementing the BRISK keypoint detector and descriptor extractor, described in @cite LCS11 . + */ +class CV_EXPORTS_W BRISK : public Feature2D +{ +public: + /** @brief The BRISK constructor + + @param thresh AGAST detection threshold score. + @param octaves detection octaves. Use 0 to do single scale. + @param patternScale apply this scale to the pattern used for sampling the neighbourhood of a + keypoint. + */ + CV_WRAP static Ptr create(int thresh=30, int octaves=3, float patternScale=1.0f); + + /** @brief The BRISK constructor for a custom pattern + + @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for + keypoint scale 1). + @param numberList defines the number of sampling points on the sampling circle. Must be the same + size as radiusList.. + @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint + scale 1). + @param dMin threshold for the long pairings used for orientation determination (in pixels for + keypoint scale 1). + @param indexChange index remapping of the bits. */ + CV_WRAP static Ptr create(const std::vector &radiusList, const std::vector &numberList, + float dMax=5.85f, float dMin=8.2f, const std::vector& indexChange=std::vector()); + + /** @brief The BRISK constructor for a custom pattern, detection threshold and octaves + + @param thresh AGAST detection threshold score. + @param octaves detection octaves. Use 0 to do single scale. + @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for + keypoint scale 1). + @param numberList defines the number of sampling points on the sampling circle. Must be the same + size as radiusList.. + @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint + scale 1). + @param dMin threshold for the long pairings used for orientation determination (in pixels for + keypoint scale 1). + @param indexChange index remapping of the bits. */ + CV_WRAP static Ptr create(int thresh, int octaves, const std::vector &radiusList, + const std::vector &numberList, float dMax=5.85f, float dMin=8.2f, + const std::vector& indexChange=std::vector()); + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; + + /** @brief Set detection threshold. + @param threshold AGAST detection threshold score. + */ + CV_WRAP virtual void setThreshold(int threshold) { CV_UNUSED(threshold); return; } + CV_WRAP virtual int getThreshold() const { return -1; } + + /** @brief Set detection octaves. + @param octaves detection octaves. Use 0 to do single scale. + */ + CV_WRAP virtual void setOctaves(int octaves) { CV_UNUSED(octaves); return; } + CV_WRAP virtual int getOctaves() const { return -1; } +}; + +/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor + +described in @cite RRKB11 . The algorithm uses FAST in pyramids to detect stable keypoints, selects +the strongest features using FAST or Harris response, finds their orientation using first-order +moments and computes the descriptors using BRIEF (where the coordinates of random point pairs (or +k-tuples) are rotated according to the measured orientation). + */ +class CV_EXPORTS_W ORB : public Feature2D +{ +public: + enum ScoreType { HARRIS_SCORE=0, FAST_SCORE=1 }; + static const int kBytes = 32; + + /** @brief The ORB constructor + + @param nfeatures The maximum number of features to retain. + @param scaleFactor Pyramid decimation ratio, greater than 1. scaleFactor==2 means the classical + pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor + will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor + will mean that to cover certain scale range you will need more pyramid levels and so the speed + will suffer. + @param nlevels The number of pyramid levels. The smallest level will have linear size equal to + input_image_linear_size/pow(scaleFactor, nlevels - firstLevel). + @param edgeThreshold This is size of the border where the features are not detected. It should + roughly match the patchSize parameter. + @param firstLevel The level of pyramid to put source image to. Previous layers are filled + with upscaled source image. + @param WTA_K The number of points that produce each element of the oriented BRIEF descriptor. The + default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, + so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 + random points (of course, those point coordinates are random, but they are generated from the + pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel + rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such + output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, + denoted as NORM_HAMMING2 (2 bits per bin). When WTA_K=4, we take 4 random points to compute each + bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3). + @param scoreType The default HARRIS_SCORE means that Harris algorithm is used to rank features + (the score is written to KeyPoint::score and is used to retain best nfeatures features); + FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, + but it is a little faster to compute. + @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller + pyramid layers the perceived image area covered by a feature will be larger. + @param fastThreshold the fast threshold + */ + CV_WRAP static Ptr create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31, + int firstLevel=0, int WTA_K=2, ORB::ScoreType scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20); + + CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0; + CV_WRAP virtual int getMaxFeatures() const = 0; + + CV_WRAP virtual void setScaleFactor(double scaleFactor) = 0; + CV_WRAP virtual double getScaleFactor() const = 0; + + CV_WRAP virtual void setNLevels(int nlevels) = 0; + CV_WRAP virtual int getNLevels() const = 0; + + CV_WRAP virtual void setEdgeThreshold(int edgeThreshold) = 0; + CV_WRAP virtual int getEdgeThreshold() const = 0; + + CV_WRAP virtual void setFirstLevel(int firstLevel) = 0; + CV_WRAP virtual int getFirstLevel() const = 0; + + CV_WRAP virtual void setWTA_K(int wta_k) = 0; + CV_WRAP virtual int getWTA_K() const = 0; + + CV_WRAP virtual void setScoreType(ORB::ScoreType scoreType) = 0; + CV_WRAP virtual ORB::ScoreType getScoreType() const = 0; + + CV_WRAP virtual void setPatchSize(int patchSize) = 0; + CV_WRAP virtual int getPatchSize() const = 0; + + CV_WRAP virtual void setFastThreshold(int fastThreshold) = 0; + CV_WRAP virtual int getFastThreshold() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @brief Maximally stable extremal region extractor + +The class encapsulates all the parameters of the %MSER extraction algorithm (see [wiki +article](http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions)). + +- there are two different implementation of %MSER: one for grey image, one for color image + +- the grey image algorithm is taken from: @cite nister2008linear ; the paper claims to be faster +than union-find method; it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop. + +- the color image algorithm is taken from: @cite forssen2007maximally ; it should be much slower +than grey image method ( 3~4 times ) + +- (Python) A complete example showing the use of the %MSER detector can be found at samples/python/mser.py +*/ +class CV_EXPORTS_W MSER : public Feature2D +{ +public: + /** @brief Full constructor for %MSER detector + + @param delta it compares \f$(size_{i}-size_{i-delta})/size_{i-delta}\f$ + @param min_area prune the area which smaller than minArea + @param max_area prune the area which bigger than maxArea + @param max_variation prune the area have similar size to its children + @param min_diversity for color image, trace back to cut off mser with diversity less than min_diversity + @param max_evolution for color image, the evolution steps + @param area_threshold for color image, the area threshold to cause re-initialize + @param min_margin for color image, ignore too small margin + @param edge_blur_size for color image, the aperture size for edge blur + */ + CV_WRAP static Ptr create( int delta=5, int min_area=60, int max_area=14400, + double max_variation=0.25, double min_diversity=.2, + int max_evolution=200, double area_threshold=1.01, + double min_margin=0.003, int edge_blur_size=5 ); + + /** @brief Detect %MSER regions + + @param image input image (8UC1, 8UC3 or 8UC4, must be greater or equal than 3x3) + @param msers resulting list of point sets + @param bboxes resulting bounding boxes + */ + CV_WRAP virtual void detectRegions( InputArray image, + CV_OUT std::vector >& msers, + CV_OUT std::vector& bboxes ) = 0; + + CV_WRAP virtual void setDelta(int delta) = 0; + CV_WRAP virtual int getDelta() const = 0; + + CV_WRAP virtual void setMinArea(int minArea) = 0; + CV_WRAP virtual int getMinArea() const = 0; + + CV_WRAP virtual void setMaxArea(int maxArea) = 0; + CV_WRAP virtual int getMaxArea() const = 0; + + CV_WRAP virtual void setPass2Only(bool f) = 0; + CV_WRAP virtual bool getPass2Only() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Wrapping class for feature detection using the FAST method. : + */ +class CV_EXPORTS_W FastFeatureDetector : public Feature2D +{ +public: + enum DetectorType + { + TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2 + }; + enum + { + THRESHOLD = 10000, NONMAX_SUPPRESSION=10001, FAST_N=10002 + }; + + + CV_WRAP static Ptr create( int threshold=10, + bool nonmaxSuppression=true, + FastFeatureDetector::DetectorType type=FastFeatureDetector::TYPE_9_16 ); + + CV_WRAP virtual void setThreshold(int threshold) = 0; + CV_WRAP virtual int getThreshold() const = 0; + + CV_WRAP virtual void setNonmaxSuppression(bool f) = 0; + CV_WRAP virtual bool getNonmaxSuppression() const = 0; + + CV_WRAP virtual void setType(FastFeatureDetector::DetectorType type) = 0; + CV_WRAP virtual FastFeatureDetector::DetectorType getType() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @overload */ +CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression=true ); + +/** @brief Detects corners using the FAST algorithm + +@param image grayscale image where keypoints (corners) are detected. +@param keypoints keypoints detected on the image. +@param threshold threshold on difference between intensity of the central pixel and pixels of a +circle around this pixel. +@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners +(keypoints). +@param type one of the three neighborhoods as defined in the paper: +FastFeatureDetector::TYPE_9_16, FastFeatureDetector::TYPE_7_12, +FastFeatureDetector::TYPE_5_8 + +Detects corners using the FAST algorithm by @cite Rosten06 . + +@note In Python API, types are given as cv.FAST_FEATURE_DETECTOR_TYPE_5_8, +cv.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner +detection, use cv.FAST.detect() method. + */ +CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression, FastFeatureDetector::DetectorType type ); + +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Wrapping class for feature detection using the AGAST method. : + */ +class CV_EXPORTS_W AgastFeatureDetector : public Feature2D +{ +public: + enum DetectorType + { + AGAST_5_8 = 0, AGAST_7_12d = 1, AGAST_7_12s = 2, OAST_9_16 = 3, + }; + + enum + { + THRESHOLD = 10000, NONMAX_SUPPRESSION = 10001, + }; + + CV_WRAP static Ptr create( int threshold=10, + bool nonmaxSuppression=true, + AgastFeatureDetector::DetectorType type = AgastFeatureDetector::OAST_9_16); + + CV_WRAP virtual void setThreshold(int threshold) = 0; + CV_WRAP virtual int getThreshold() const = 0; + + CV_WRAP virtual void setNonmaxSuppression(bool f) = 0; + CV_WRAP virtual bool getNonmaxSuppression() const = 0; + + CV_WRAP virtual void setType(AgastFeatureDetector::DetectorType type) = 0; + CV_WRAP virtual AgastFeatureDetector::DetectorType getType() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @overload */ +CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression=true ); + +/** @brief Detects corners using the AGAST algorithm + +@param image grayscale image where keypoints (corners) are detected. +@param keypoints keypoints detected on the image. +@param threshold threshold on difference between intensity of the central pixel and pixels of a +circle around this pixel. +@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners +(keypoints). +@param type one of the four neighborhoods as defined in the paper: +AgastFeatureDetector::AGAST_5_8, AgastFeatureDetector::AGAST_7_12d, +AgastFeatureDetector::AGAST_7_12s, AgastFeatureDetector::OAST_9_16 + +For non-Intel platforms, there is a tree optimised variant of AGAST with same numerical results. +The 32-bit binary tree tables were generated automatically from original code using perl script. +The perl script and examples of tree generation are placed in features2d/doc folder. +Detects corners using the AGAST algorithm by @cite mair2010_agast . + + */ +CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector& keypoints, + int threshold, bool nonmaxSuppression, AgastFeatureDetector::DetectorType type ); + +/** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. : + */ +class CV_EXPORTS_W GFTTDetector : public Feature2D +{ +public: + CV_WRAP static Ptr create( int maxCorners=1000, double qualityLevel=0.01, double minDistance=1, + int blockSize=3, bool useHarrisDetector=false, double k=0.04 ); + CV_WRAP static Ptr create( int maxCorners, double qualityLevel, double minDistance, + int blockSize, int gradiantSize, bool useHarrisDetector=false, double k=0.04 ); + CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0; + CV_WRAP virtual int getMaxFeatures() const = 0; + + CV_WRAP virtual void setQualityLevel(double qlevel) = 0; + CV_WRAP virtual double getQualityLevel() const = 0; + + CV_WRAP virtual void setMinDistance(double minDistance) = 0; + CV_WRAP virtual double getMinDistance() const = 0; + + CV_WRAP virtual void setBlockSize(int blockSize) = 0; + CV_WRAP virtual int getBlockSize() const = 0; + + CV_WRAP virtual void setHarrisDetector(bool val) = 0; + CV_WRAP virtual bool getHarrisDetector() const = 0; + + CV_WRAP virtual void setK(double k) = 0; + CV_WRAP virtual double getK() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @brief Class for extracting blobs from an image. : + +The class implements a simple algorithm for extracting blobs from an image: + +1. Convert the source image to binary images by applying thresholding with several thresholds from + minThreshold (inclusive) to maxThreshold (exclusive) with distance thresholdStep between + neighboring thresholds. +2. Extract connected components from every binary image by findContours and calculate their + centers. +3. Group centers from several binary images by their coordinates. Close centers form one group that + corresponds to one blob, which is controlled by the minDistBetweenBlobs parameter. +4. From the groups, estimate final centers of blobs and their radiuses and return as locations and + sizes of keypoints. + +This class performs several filtrations of returned blobs. You should set filterBy\* to true/false +to turn on/off corresponding filtration. Available filtrations: + +- **By color**. This filter compares the intensity of a binary image at the center of a blob to +blobColor. If they differ, the blob is filtered out. Use blobColor = 0 to extract dark blobs +and blobColor = 255 to extract light blobs. +- **By area**. Extracted blobs have an area between minArea (inclusive) and maxArea (exclusive). +- **By circularity**. Extracted blobs have circularity +(\f$\frac{4*\pi*Area}{perimeter * perimeter}\f$) between minCircularity (inclusive) and +maxCircularity (exclusive). +- **By ratio of the minimum inertia to maximum inertia**. Extracted blobs have this ratio +between minInertiaRatio (inclusive) and maxInertiaRatio (exclusive). +- **By convexity**. Extracted blobs have convexity (area / area of blob convex hull) between +minConvexity (inclusive) and maxConvexity (exclusive). + +Default values of parameters are tuned to extract dark circular blobs. + */ +class CV_EXPORTS_W SimpleBlobDetector : public Feature2D +{ +public: + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + CV_PROP_RW float thresholdStep; + CV_PROP_RW float minThreshold; + CV_PROP_RW float maxThreshold; + CV_PROP_RW size_t minRepeatability; + CV_PROP_RW float minDistBetweenBlobs; + + CV_PROP_RW bool filterByColor; + CV_PROP_RW uchar blobColor; + + CV_PROP_RW bool filterByArea; + CV_PROP_RW float minArea, maxArea; + + CV_PROP_RW bool filterByCircularity; + CV_PROP_RW float minCircularity, maxCircularity; + + CV_PROP_RW bool filterByInertia; + CV_PROP_RW float minInertiaRatio, maxInertiaRatio; + + CV_PROP_RW bool filterByConvexity; + CV_PROP_RW float minConvexity, maxConvexity; + + void read( const FileNode& fn ); + void write( FileStorage& fs ) const; + }; + + CV_WRAP static Ptr + create(const SimpleBlobDetector::Params ¶meters = SimpleBlobDetector::Params()); + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Class implementing the KAZE keypoint detector and descriptor extractor, described in @cite ABD12 . + +@note AKAZE descriptor can only be used with KAZE or AKAZE keypoints .. [ABD12] KAZE Features. Pablo +F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on Computer Vision +(ECCV), Fiorenze, Italy, October 2012. +*/ +class CV_EXPORTS_W KAZE : public Feature2D +{ +public: + enum DiffusivityType + { + DIFF_PM_G1 = 0, + DIFF_PM_G2 = 1, + DIFF_WEICKERT = 2, + DIFF_CHARBONNIER = 3 + }; + + /** @brief The KAZE constructor + + @param extended Set to enable extraction of extended (128-byte) descriptor. + @param upright Set to enable use of upright descriptors (non rotation-invariant). + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or + DIFF_CHARBONNIER + */ + CV_WRAP static Ptr create(bool extended=false, bool upright=false, + float threshold = 0.001f, + int nOctaves = 4, int nOctaveLayers = 4, + KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2); + + CV_WRAP virtual void setExtended(bool extended) = 0; + CV_WRAP virtual bool getExtended() const = 0; + + CV_WRAP virtual void setUpright(bool upright) = 0; + CV_WRAP virtual bool getUpright() const = 0; + + CV_WRAP virtual void setThreshold(double threshold) = 0; + CV_WRAP virtual double getThreshold() const = 0; + + CV_WRAP virtual void setNOctaves(int octaves) = 0; + CV_WRAP virtual int getNOctaves() const = 0; + + CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0; + CV_WRAP virtual int getNOctaveLayers() const = 0; + + CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0; + CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13. + +@details AKAZE descriptors can only be used with KAZE or AKAZE keypoints. This class is thread-safe. + +@note When you need descriptors use Feature2D::detectAndCompute, which +provides better performance. When using Feature2D::detect followed by +Feature2D::compute scale space pyramid is computed twice. + +@note AKAZE implements T-API. When image is passed as UMat some parts of the algorithm +will use OpenCL. + +@note [ANB13] Fast Explicit Diffusion for Accelerated Features in Nonlinear +Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien Bartoli. In +British Machine Vision Conference (BMVC), Bristol, UK, September 2013. + +*/ +class CV_EXPORTS_W AKAZE : public Feature2D +{ +public: + // AKAZE descriptor type + enum DescriptorType + { + DESCRIPTOR_KAZE_UPRIGHT = 2, ///< Upright descriptors, not invariant to rotation + DESCRIPTOR_KAZE = 3, + DESCRIPTOR_MLDB_UPRIGHT = 4, ///< Upright descriptors, not invariant to rotation + DESCRIPTOR_MLDB = 5 + }; + + /** @brief The AKAZE constructor + + @param descriptor_type Type of the extracted descriptor: DESCRIPTOR_KAZE, + DESCRIPTOR_KAZE_UPRIGHT, DESCRIPTOR_MLDB or DESCRIPTOR_MLDB_UPRIGHT. + @param descriptor_size Size of the descriptor in bits. 0 -\> Full size + @param descriptor_channels Number of channels in the descriptor (1, 2, 3) + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or + DIFF_CHARBONNIER + */ + CV_WRAP static Ptr create(AKAZE::DescriptorType descriptor_type = AKAZE::DESCRIPTOR_MLDB, + int descriptor_size = 0, int descriptor_channels = 3, + float threshold = 0.001f, int nOctaves = 4, + int nOctaveLayers = 4, KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2); + + CV_WRAP virtual void setDescriptorType(AKAZE::DescriptorType dtype) = 0; + CV_WRAP virtual AKAZE::DescriptorType getDescriptorType() const = 0; + + CV_WRAP virtual void setDescriptorSize(int dsize) = 0; + CV_WRAP virtual int getDescriptorSize() const = 0; + + CV_WRAP virtual void setDescriptorChannels(int dch) = 0; + CV_WRAP virtual int getDescriptorChannels() const = 0; + + CV_WRAP virtual void setThreshold(double threshold) = 0; + CV_WRAP virtual double getThreshold() const = 0; + + CV_WRAP virtual void setNOctaves(int octaves) = 0; + CV_WRAP virtual int getNOctaves() const = 0; + + CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0; + CV_WRAP virtual int getNOctaveLayers() const = 0; + + CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0; + CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0; + CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; +}; + +//! @} features2d_main + +/****************************************************************************************\ +* Distance * +\****************************************************************************************/ + +template +struct CV_EXPORTS Accumulator +{ + typedef T Type; +}; + +template<> struct Accumulator { typedef float Type; }; +template<> struct Accumulator { typedef float Type; }; +template<> struct Accumulator { typedef float Type; }; +template<> struct Accumulator { typedef float Type; }; + +/* + * Squared Euclidean distance functor + */ +template +struct CV_EXPORTS SL2 +{ + static const NormTypes normType = NORM_L2SQR; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const + { + return normL2Sqr(a, b, size); + } +}; + +/* + * Euclidean distance functor + */ +template +struct L2 +{ + static const NormTypes normType = NORM_L2; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const + { + return (ResultType)std::sqrt((double)normL2Sqr(a, b, size)); + } +}; + +/* + * Manhattan distance (city block distance) functor + */ +template +struct L1 +{ + static const NormTypes normType = NORM_L1; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T* a, const T* b, int size ) const + { + return normL1(a, b, size); + } +}; + +/****************************************************************************************\ +* DescriptorMatcher * +\****************************************************************************************/ + +//! @addtogroup features2d_match +//! @{ + +/** @brief Abstract base class for matching keypoint descriptors. + +It has two groups of match methods: for matching descriptors of an image with another image or with +an image set. + */ +class CV_EXPORTS_W DescriptorMatcher : public Algorithm +{ +public: + enum MatcherType + { + FLANNBASED = 1, + BRUTEFORCE = 2, + BRUTEFORCE_L1 = 3, + BRUTEFORCE_HAMMING = 4, + BRUTEFORCE_HAMMINGLUT = 5, + BRUTEFORCE_SL2 = 6 + }; + + virtual ~DescriptorMatcher(); + + /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor + collection. + + If the collection is not empty, the new descriptors are added to existing train descriptors. + + @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same + train image. + */ + CV_WRAP virtual void add( InputArrayOfArrays descriptors ); + + /** @brief Returns a constant link to the train descriptor collection trainDescCollection . + */ + CV_WRAP const std::vector& getTrainDescriptors() const; + + /** @brief Clears the train descriptor collections. + */ + CV_WRAP virtual void clear() CV_OVERRIDE; + + /** @brief Returns true if there are no train descriptors in the both collections. + */ + CV_WRAP virtual bool empty() const CV_OVERRIDE; + + /** @brief Returns true if the descriptor matcher supports masking permissible matches. + */ + CV_WRAP virtual bool isMaskSupported() const = 0; + + /** @brief Trains a descriptor matcher + + Trains a descriptor matcher (for example, the flann index). In all methods to match, the method + train() is run every time before matching. Some descriptor matchers (for example, BruteForceMatcher) + have an empty implementation of this method. Other matchers really train their inner structures (for + example, FlannBasedMatcher trains flann::Index ). + */ + CV_WRAP virtual void train(); + + /** @brief Finds the best match for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. + */ + CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors, + CV_OUT std::vector& matches, InputArray mask=noArray() ) const; + + /** @brief Finds the k best matches for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + These extended variants of DescriptorMatcher::match methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match + for the details about query and train descriptors. + */ + CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors, + CV_OUT std::vector >& matches, int k, + InputArray mask=noArray(), bool compactResult=false ) const; + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Found matches. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ + CV_WRAP void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors, + CV_OUT std::vector >& matches, float maxDistance, + InputArray mask=noArray(), bool compactResult=false ) const; + + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + */ + CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector& matches, + InputArrayOfArrays masks=noArray() ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Found matches. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + CV_WRAP void radiusMatch( InputArray queryDescriptors, CV_OUT std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ); + + + CV_WRAP void write( const String& fileName ) const + { + FileStorage fs(fileName, FileStorage::WRITE); + write(fs); + } + + CV_WRAP void read( const String& fileName ) + { + FileStorage fs(fileName, FileStorage::READ); + read(fs.root()); + } + // Reads matcher object from a file node + // see corresponding cv::Algorithm method + CV_WRAP virtual void read( const FileNode& ) CV_OVERRIDE; + // Writes matcher object to a file storage + virtual void write( FileStorage& ) const CV_OVERRIDE; + + /** @brief Clones the matcher. + + @param emptyTrainData If emptyTrainData is false, the method creates a deep copy of the object, + that is, copies both parameters and train data. If emptyTrainData is true, the method creates an + object copy with the current parameters but with empty train data. + */ + CV_WRAP CV_NODISCARD_STD virtual Ptr clone( bool emptyTrainData=false ) const = 0; + + /** @brief Creates a descriptor matcher of a given type with the default parameters (using default + constructor). + + @param descriptorMatcherType Descriptor matcher type. Now the following matcher types are + supported: + - `BruteForce` (it uses L2 ) + - `BruteForce-L1` + - `BruteForce-Hamming` + - `BruteForce-Hamming(2)` + - `FlannBased` + */ + CV_WRAP static Ptr create( const String& descriptorMatcherType ); + + CV_WRAP static Ptr create( const DescriptorMatcher::MatcherType& matcherType ); + + + // see corresponding cv::Algorithm method + CV_WRAP inline void write(const Ptr& fs, const String& name = String()) const { Algorithm::write(fs, name); } + +protected: + /** + * Class to work with descriptors from several images as with one merged matrix. + * It is used e.g. in FlannBasedMatcher. + */ + class CV_EXPORTS DescriptorCollection + { + public: + DescriptorCollection(); + DescriptorCollection( const DescriptorCollection& collection ); + virtual ~DescriptorCollection(); + + // Vector of matrices "descriptors" will be merged to one matrix "mergedDescriptors" here. + void set( const std::vector& descriptors ); + virtual void clear(); + + const Mat& getDescriptors() const; + Mat getDescriptor( int imgIdx, int localDescIdx ) const; + Mat getDescriptor( int globalDescIdx ) const; + void getLocalIdx( int globalDescIdx, int& imgIdx, int& localDescIdx ) const; + + int size() const; + + protected: + Mat mergedDescriptors; + std::vector startIdxs; + }; + + //! In fact the matching is implemented only by the following two methods. These methods suppose + //! that the class object has been trained already. Public match methods call these methods + //! after calling train(). + virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0; + virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0; + + static bool isPossibleMatch( InputArray mask, int queryIdx, int trainIdx ); + static bool isMaskedOut( InputArrayOfArrays masks, int queryIdx ); + + CV_NODISCARD_STD static Mat clone_op( Mat m ) { return m.clone(); } + void checkMasks( InputArrayOfArrays masks, int queryDescriptorsCount ) const; + + //! Collection of descriptors from train images. + std::vector trainDescCollection; + std::vector utrainDescCollection; +}; + +/** @brief Brute-force descriptor matcher. + +For each descriptor in the first set, this matcher finds the closest descriptor in the second set +by trying each one. This descriptor matcher supports masking permissible matches of descriptor +sets. + */ +class CV_EXPORTS_W BFMatcher : public DescriptorMatcher +{ +public: + /** @brief Brute-force matcher constructor (obsolete). Please use BFMatcher.create() + * + * + */ + CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false ); + + virtual ~BFMatcher() {} + + virtual bool isMaskSupported() const CV_OVERRIDE { return true; } + + /** @brief Brute-force matcher create method. + @param normType One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are + preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and + BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 (see ORB::ORB constructor + description). + @param crossCheck If it is false, this is will be default BFMatcher behaviour when it finds the k + nearest neighbors for each query descriptor. If crossCheck==true, then the knnMatch() method with + k=1 will only return pairs (i,j) such that for i-th query descriptor the j-th descriptor in the + matcher's collection is the nearest and vice versa, i.e. the BFMatcher will only return consistent + pairs. Such technique usually produces best results with minimal number of outliers when there are + enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper. + */ + CV_WRAP static Ptr create( int normType=NORM_L2, bool crossCheck=false ) ; + + CV_NODISCARD_STD virtual Ptr clone( bool emptyTrainData=false ) const CV_OVERRIDE; +protected: + virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + + int normType; + bool crossCheck; +}; + +#if defined(HAVE_OPENCV_FLANN) || defined(CV_DOXYGEN) + +/** @brief Flann-based descriptor matcher. + +This matcher trains cv::flann::Index on a train descriptor collection and calls its nearest search +methods to find the best matches. So, this matcher may be faster when matching a large train +collection than the brute force matcher. FlannBasedMatcher does not support masking permissible +matches of descriptor sets because flann::Index does not support this. : + */ +class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher +{ +public: + CV_WRAP FlannBasedMatcher( const Ptr& indexParams=makePtr(), + const Ptr& searchParams=makePtr() ); + + virtual void add( InputArrayOfArrays descriptors ) CV_OVERRIDE; + virtual void clear() CV_OVERRIDE; + + // Reads matcher object from a file node + virtual void read( const FileNode& ) CV_OVERRIDE; + // Writes matcher object to a file storage + virtual void write( FileStorage& ) const CV_OVERRIDE; + + virtual void train() CV_OVERRIDE; + virtual bool isMaskSupported() const CV_OVERRIDE; + + CV_WRAP static Ptr create(); + + CV_NODISCARD_STD virtual Ptr clone( bool emptyTrainData=false ) const CV_OVERRIDE; +protected: + static void convertToDMatches( const DescriptorCollection& descriptors, + const Mat& indices, const Mat& distances, + std::vector >& matches ); + + virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, + InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE; + + Ptr indexParams; + Ptr searchParams; + Ptr flannIndex; + + DescriptorCollection mergedDescriptors; + int addedDescCount; +}; + +#endif + +//! @} features2d_match + +/****************************************************************************************\ +* Drawing functions * +\****************************************************************************************/ + +//! @addtogroup features2d_draw +//! @{ + +enum struct DrawMatchesFlags +{ + DEFAULT = 0, //!< Output image matrix will be created (Mat::create), + //!< i.e. existing memory of output image may be reused. + //!< Two source image, matches and single keypoints will be drawn. + //!< For each keypoint only the center point will be drawn (without + //!< the circle around keypoint with keypoint size and orientation). + DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create). + //!< Matches will be drawn on existing content of output image. + NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn. + DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and + //!< orientation will be drawn. +}; +CV_ENUM_FLAGS(DrawMatchesFlags) + +/** @brief Draws keypoints. + +@param image Source image. +@param keypoints Keypoints from the source image. +@param outImage Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param color Color of keypoints. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. See details above in drawMatches . + +@note +For Python API, flags are modified as cv.DRAW_MATCHES_FLAGS_DEFAULT, +cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG, +cv.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS + */ +CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector& keypoints, InputOutputArray outImage, + const Scalar& color=Scalar::all(-1), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); + +/** @brief Draws the found matches of keypoints from two images. + +@param img1 First source image. +@param keypoints1 Keypoints from the first source image. +@param img2 Second source image. +@param keypoints2 Keypoints from the second source image. +@param matches1to2 Matches from the first image to the second one, which means that keypoints1[i] +has a corresponding point in keypoints2[matches[i]] . +@param outImg Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param matchColor Color of matches (lines and connected keypoints). If matchColor==Scalar::all(-1) +, the color is generated randomly. +@param singlePointColor Color of single keypoints (circles), which means that keypoints do not +have the matches. If singlePointColor==Scalar::all(-1) , the color is generated randomly. +@param matchesMask Mask determining which matches are drawn. If the mask is empty, all matches are +drawn. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. + +This function draws matches of keypoints from two images in the output image. Match is a line +connecting two keypoints (circles). See cv::DrawMatchesFlags. + */ +CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& keypoints1, + InputArray img2, const std::vector& keypoints2, + const std::vector& matches1to2, InputOutputArray outImg, + const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), + const std::vector& matchesMask=std::vector(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); + +/** @overload */ +CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& keypoints1, + InputArray img2, const std::vector& keypoints2, + const std::vector& matches1to2, InputOutputArray outImg, + const int matchesThickness, const Scalar& matchColor=Scalar::all(-1), + const Scalar& singlePointColor=Scalar::all(-1), const std::vector& matchesMask=std::vector(), + DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); + +CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector& keypoints1, + InputArray img2, const std::vector& keypoints2, + const std::vector >& matches1to2, InputOutputArray outImg, + const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), + const std::vector >& matchesMask=std::vector >(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT ); + +//! @} features2d_draw + +/****************************************************************************************\ +* Functions to evaluate the feature detectors and [generic] descriptor extractors * +\****************************************************************************************/ + +CV_EXPORTS void evaluateFeatureDetector( const Mat& img1, const Mat& img2, const Mat& H1to2, + std::vector* keypoints1, std::vector* keypoints2, + float& repeatability, int& correspCount, + const Ptr& fdetector=Ptr() ); + +CV_EXPORTS void computeRecallPrecisionCurve( const std::vector >& matches1to2, + const std::vector >& correctMatches1to2Mask, + std::vector& recallPrecisionCurve ); + +CV_EXPORTS float getRecall( const std::vector& recallPrecisionCurve, float l_precision ); +CV_EXPORTS int getNearestPoint( const std::vector& recallPrecisionCurve, float l_precision ); + +/****************************************************************************************\ +* Bag of visual words * +\****************************************************************************************/ + +//! @addtogroup features2d_category +//! @{ + +/** @brief Abstract base class for training the *bag of visual words* vocabulary from a set of descriptors. + +For details, see, for example, *Visual Categorization with Bags of Keypoints* by Gabriella Csurka, +Christopher R. Dance, Lixin Fan, Jutta Willamowski, Cedric Bray, 2004. : + */ +class CV_EXPORTS_W BOWTrainer +{ +public: + BOWTrainer(); + virtual ~BOWTrainer(); + + /** @brief Adds descriptors to a training set. + + @param descriptors Descriptors to add to a training set. Each row of the descriptors matrix is a + descriptor. + + The training set is clustered using clustermethod to construct the vocabulary. + */ + CV_WRAP void add( const Mat& descriptors ); + + /** @brief Returns a training set of descriptors. + */ + CV_WRAP const std::vector& getDescriptors() const; + + /** @brief Returns the count of all descriptors stored in the training set. + */ + CV_WRAP int descriptorsCount() const; + + CV_WRAP virtual void clear(); + + /** @overload */ + CV_WRAP virtual Mat cluster() const = 0; + + /** @brief Clusters train descriptors. + + @param descriptors Descriptors to cluster. Each row of the descriptors matrix is a descriptor. + Descriptors are not added to the inner train descriptor set. + + The vocabulary consists of cluster centers. So, this method returns the vocabulary. In the first + variant of the method, train descriptors stored in the object are clustered. In the second variant, + input descriptors are clustered. + */ + CV_WRAP virtual Mat cluster( const Mat& descriptors ) const = 0; + +protected: + std::vector descriptors; + int size; +}; + +/** @brief kmeans -based class to train visual vocabulary using the *bag of visual words* approach. : + */ +class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer +{ +public: + /** @brief The constructor. + + @see cv::kmeans + */ + CV_WRAP BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(), + int attempts=3, int flags=KMEANS_PP_CENTERS ); + virtual ~BOWKMeansTrainer(); + + // Returns trained vocabulary (i.e. cluster centers). + CV_WRAP virtual Mat cluster() const CV_OVERRIDE; + CV_WRAP virtual Mat cluster( const Mat& descriptors ) const CV_OVERRIDE; + +protected: + + int clusterCount; + TermCriteria termcrit; + int attempts; + int flags; +}; + +/** @brief Class to compute an image descriptor using the *bag of visual words*. + +Such a computation consists of the following steps: + +1. Compute descriptors for a given image and its keypoints set. +2. Find the nearest visual words from the vocabulary for each keypoint descriptor. +3. Compute the bag-of-words image descriptor as is a normalized histogram of vocabulary words +encountered in the image. The i-th bin of the histogram is a frequency of i-th word of the +vocabulary in the given image. + */ +class CV_EXPORTS_W BOWImgDescriptorExtractor +{ +public: + /** @brief The constructor. + + @param dextractor Descriptor extractor that is used to compute descriptors for an input image and + its keypoints. + @param dmatcher Descriptor matcher that is used to find the nearest word of the trained vocabulary + for each keypoint descriptor of the image. + */ + CV_WRAP BOWImgDescriptorExtractor( const Ptr& dextractor, + const Ptr& dmatcher ); + /** @overload */ + BOWImgDescriptorExtractor( const Ptr& dmatcher ); + virtual ~BOWImgDescriptorExtractor(); + + /** @brief Sets a visual vocabulary. + + @param vocabulary Vocabulary (can be trained using the inheritor of BOWTrainer ). Each row of the + vocabulary is a visual word (cluster center). + */ + CV_WRAP void setVocabulary( const Mat& vocabulary ); + + /** @brief Returns the set vocabulary. + */ + CV_WRAP const Mat& getVocabulary() const; + + /** @brief Computes an image descriptor using the set visual vocabulary. + + @param image Image, for which the descriptor is computed. + @param keypoints Keypoints detected in the input image. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + @param descriptors Descriptors of the image keypoints that are returned if they are non-zero. + */ + void compute( InputArray image, std::vector& keypoints, OutputArray imgDescriptor, + std::vector >* pointIdxsOfClusters=0, Mat* descriptors=0 ); + /** @overload + @param keypointDescriptors Computed descriptors to match with vocabulary. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + */ + void compute( InputArray keypointDescriptors, OutputArray imgDescriptor, + std::vector >* pointIdxsOfClusters=0 ); + // compute() is not constant because DescriptorMatcher::match is not constant + + CV_WRAP_AS(compute) void compute2( const Mat& image, std::vector& keypoints, CV_OUT Mat& imgDescriptor ) + { compute(image,keypoints,imgDescriptor); } + + /** @brief Returns an image descriptor size if the vocabulary is set. Otherwise, it returns 0. + */ + CV_WRAP int descriptorSize() const; + + /** @brief Returns an image descriptor type. + */ + CV_WRAP int descriptorType() const; + +protected: + Mat vocabulary; + Ptr dextractor; + Ptr dmatcher; +}; + +//! @} features2d_category + +//! @} features2d + +} /* namespace cv */ + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d/features2d.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d/features2d.hpp new file mode 100644 index 0000000..e81df0a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d/features2d.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/features2d.hpp" diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d/hal/interface.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d/hal/interface.h new file mode 100644 index 0000000..bc3b084 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/features2d/hal/interface.h @@ -0,0 +1,33 @@ +#ifndef OPENCV_FEATURE2D_HAL_INTERFACE_H +#define OPENCV_FEATURE2D_HAL_INTERFACE_H + +#include "opencv2/core/cvdef.h" +//! @addtogroup features2d_hal_interface +//! @{ + +//! @name Fast feature detector types +//! @sa cv::FastFeatureDetector +//! @{ +#define CV_HAL_TYPE_5_8 0 +#define CV_HAL_TYPE_7_12 1 +#define CV_HAL_TYPE_9_16 2 +//! @} + +//! @name Key point +//! @sa cv::KeyPoint +//! @{ +struct CV_EXPORTS cvhalKeyPoint +{ + float x; + float y; + float size; + float angle; + float response; + int octave; + int class_id; +}; +//! @} + +//! @} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/highgui.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/highgui.hpp new file mode 100644 index 0000000..21560ee --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/highgui.hpp @@ -0,0 +1,17 @@ +// +// Copyright (C) 2021 nihui +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "opencv2/highgui/highgui.hpp" diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/highgui/highgui.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/highgui/highgui.hpp new file mode 100644 index 0000000..b337fb6 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/highgui/highgui.hpp @@ -0,0 +1,62 @@ +// +// Copyright (C) 2021 nihui +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef OPENCV_HIGHGUI_HPP +#define OPENCV_HIGHGUI_HPP + +#include "opencv2/core.hpp" + +enum +{ + CV_LOAD_IMAGE_UNCHANGED = -1, + CV_LOAD_IMAGE_GRAYSCALE = 0, + CV_LOAD_IMAGE_COLOR = 1, +}; + +enum +{ + CV_IMWRITE_JPEG_QUALITY = 1 +}; + +namespace cv { + +enum ImreadModes +{ + IMREAD_UNCHANGED = -1, + IMREAD_GRAYSCALE = 0, + IMREAD_COLOR = 1 +}; + +enum ImwriteFlags +{ + IMWRITE_JPEG_QUALITY = 1 +}; + +CV_EXPORTS_W Mat imread(const String& filename, int flags = IMREAD_COLOR); + +CV_EXPORTS_W bool imwrite(const String& filename, InputArray img, const std::vector& params = std::vector()); + +CV_EXPORTS_W Mat imdecode(InputArray buf, int flags); + +CV_EXPORTS_W bool imencode(const String& ext, InputArray img, CV_OUT std::vector& buf, const std::vector& params = std::vector()); + +CV_EXPORTS_W void imshow(const String& winname, InputArray mat); + +CV_EXPORTS_W int waitKey(int delay = 0); + +} // namespace cv + +#endif // OPENCV_HIGHGUI_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc.hpp new file mode 100644 index 0000000..2c91211 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc.hpp @@ -0,0 +1,5005 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_HPP +#define OPENCV_IMGPROC_HPP + +#include "opencv2/core.hpp" + +/** + @defgroup imgproc Image Processing + +This module includes image-processing functions. + + @{ + @defgroup imgproc_filter Image Filtering + +Functions and classes described in this section are used to perform various linear or non-linear +filtering operations on 2D images (represented as Mat's). It means that for each pixel location +\f$(x,y)\f$ in the source image (normally, rectangular), its neighborhood is considered and used to +compute the response. In case of a linear filter, it is a weighted sum of pixel values. In case of +morphological operations, it is the minimum or maximum values, and so on. The computed response is +stored in the destination image at the same location \f$(x,y)\f$. It means that the output image +will be of the same size as the input image. Normally, the functions support multi-channel arrays, +in which case every channel is processed independently. Therefore, the output image will also have +the same number of channels as the input one. + +Another common feature of the functions and classes described in this section is that, unlike +simple arithmetic functions, they need to extrapolate values of some non-existing pixels. For +example, if you want to smooth an image using a Gaussian \f$3 \times 3\f$ filter, then, when +processing the left-most pixels in each row, you need pixels to the left of them, that is, outside +of the image. You can let these pixels be the same as the left-most image pixels ("replicated +border" extrapolation method), or assume that all the non-existing pixels are zeros ("constant +border" extrapolation method), and so on. OpenCV enables you to specify the extrapolation method. +For details, see #BorderTypes + +@anchor filter_depths +### Depth combinations +Input depth (src.depth()) | Output depth (ddepth) +--------------------------|---------------------- +CV_8U | -1/CV_16S/CV_32F/CV_64F +CV_16U/CV_16S | -1/CV_32F/CV_64F +CV_32F | -1/CV_32F/CV_64F +CV_64F | -1/CV_64F + +@note when ddepth=-1, the output image will have the same depth as the source. + + @defgroup imgproc_transform Geometric Image Transformations + +The functions in this section perform various geometrical transformations of 2D images. They do not +change the image content but deform the pixel grid and map this deformed grid to the destination +image. In fact, to avoid sampling artifacts, the mapping is done in the reverse order, from +destination to the source. That is, for each pixel \f$(x, y)\f$ of the destination image, the +functions compute coordinates of the corresponding "donor" pixel in the source image and copy the +pixel value: + +\f[\texttt{dst} (x,y)= \texttt{src} (f_x(x,y), f_y(x,y))\f] + +In case when you specify the forward mapping \f$\left: \texttt{src} \rightarrow +\texttt{dst}\f$, the OpenCV functions first compute the corresponding inverse mapping +\f$\left: \texttt{dst} \rightarrow \texttt{src}\f$ and then use the above formula. + +The actual implementations of the geometrical transformations, from the most generic remap and to +the simplest and the fastest resize, need to solve two main problems with the above formula: + +- Extrapolation of non-existing pixels. Similarly to the filtering functions described in the +previous section, for some \f$(x,y)\f$, either one of \f$f_x(x,y)\f$, or \f$f_y(x,y)\f$, or both +of them may fall outside of the image. In this case, an extrapolation method needs to be used. +OpenCV provides the same selection of extrapolation methods as in the filtering functions. In +addition, it provides the method #BORDER_TRANSPARENT. This means that the corresponding pixels in +the destination image will not be modified at all. + +- Interpolation of pixel values. Usually \f$f_x(x,y)\f$ and \f$f_y(x,y)\f$ are floating-point +numbers. This means that \f$\left\f$ can be either an affine or perspective +transformation, or radial lens distortion correction, and so on. So, a pixel value at fractional +coordinates needs to be retrieved. In the simplest case, the coordinates can be just rounded to the +nearest integer coordinates and the corresponding pixel can be used. This is called a +nearest-neighbor interpolation. However, a better result can be achieved by using more +sophisticated [interpolation methods](http://en.wikipedia.org/wiki/Multivariate_interpolation) , +where a polynomial function is fit into some neighborhood of the computed pixel \f$(f_x(x,y), +f_y(x,y))\f$, and then the value of the polynomial at \f$(f_x(x,y), f_y(x,y))\f$ is taken as the +interpolated pixel value. In OpenCV, you can choose between several interpolation methods. See +#resize for details. + +@note The geometrical transformations do not work with `CV_8S` or `CV_32S` images. + + @defgroup imgproc_misc Miscellaneous Image Transformations + @defgroup imgproc_draw Drawing Functions + +Drawing functions work with matrices/images of arbitrary depth. The boundaries of the shapes can be +rendered with antialiasing (implemented only for 8-bit images for now). All the functions include +the parameter color that uses an RGB value (that may be constructed with the Scalar constructor ) +for color images and brightness for grayscale images. For color images, the channel ordering is +normally *Blue, Green, Red*. This is what imshow, imread, and imwrite expect. So, if you form a +color using the Scalar constructor, it should look like: + +\f[\texttt{Scalar} (blue \_ component, green \_ component, red \_ component[, alpha \_ component])\f] + +If you are using your own image rendering and I/O functions, you can use any channel ordering. The +drawing functions process each channel independently and do not depend on the channel order or even +on the used color space. The whole image can be converted from BGR to RGB or to a different color +space using cvtColor . + +If a drawn figure is partially or completely outside the image, the drawing functions clip it. Also, +many drawing functions can handle pixel coordinates specified with sub-pixel accuracy. This means +that the coordinates can be passed as fixed-point numbers encoded as integers. The number of +fractional bits is specified by the shift parameter and the real point coordinates are calculated as +\f$\texttt{Point}(x,y)\rightarrow\texttt{Point2f}(x*2^{-shift},y*2^{-shift})\f$ . This feature is +especially effective when rendering antialiased shapes. + +@note The functions do not support alpha-transparency when the target image is 4-channel. In this +case, the color[3] is simply copied to the repainted pixels. Thus, if you want to paint +semi-transparent shapes, you can paint them in a separate buffer and then blend it with the main +image. + + @defgroup imgproc_color_conversions Color Space Conversions + @defgroup imgproc_colormap ColorMaps in OpenCV + +The human perception isn't built for observing fine changes in grayscale images. Human eyes are more +sensitive to observing changes between colors, so you often need to recolor your grayscale images to +get a clue about them. OpenCV now comes with various colormaps to enhance the visualization in your +computer vision application. + +In OpenCV you only need applyColorMap to apply a colormap on a given image. The following sample +code reads the path to an image from command line, applies a Jet colormap on it and shows the +result: + +@include snippets/imgproc_applyColorMap.cpp + +@see #ColormapTypes + + @defgroup imgproc_subdiv2d Planar Subdivision + +The Subdiv2D class described in this section is used to perform various planar subdivision on +a set of 2D points (represented as vector of Point2f). OpenCV subdivides a plane into triangles +using the Delaunay's algorithm, which corresponds to the dual graph of the Voronoi diagram. +In the figure below, the Delaunay's triangulation is marked with black lines and the Voronoi +diagram with red lines. + +![Delaunay triangulation (black) and Voronoi (red)](pics/delaunay_voronoi.png) + +The subdivisions can be used for the 3D piece-wise transformation of a plane, morphing, fast +location of points on the plane, building special graphs (such as NNG,RNG), and so forth. + + @defgroup imgproc_hist Histograms + @defgroup imgproc_shape Structural Analysis and Shape Descriptors + @defgroup imgproc_motion Motion Analysis and Result_Object Tracking + @defgroup imgproc_feature Feature Detection + @defgroup imgproc_object Result_Object Detection + @defgroup imgproc_segmentation Image Segmentation + @defgroup imgproc_c C API + @defgroup imgproc_hal Hardware Acceleration Layer + @{ + @defgroup imgproc_hal_functions Functions + @defgroup imgproc_hal_interface Interface + @} + @} +*/ + +namespace cv +{ + +/** @addtogroup imgproc +@{ +*/ + +//! @addtogroup imgproc_filter +//! @{ + +enum SpecialFilter { + FILTER_SCHARR = -1 +}; + +//! type of morphological operation +enum MorphTypes{ + MORPH_ERODE = 0, //!< see #erode + MORPH_DILATE = 1, //!< see #dilate + MORPH_OPEN = 2, //!< an opening operation + //!< \f[\texttt{dst} = \mathrm{open} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \mathrm{erode} ( \texttt{src} , \texttt{element} ))\f] + MORPH_CLOSE = 3, //!< a closing operation + //!< \f[\texttt{dst} = \mathrm{close} ( \texttt{src} , \texttt{element} )= \mathrm{erode} ( \mathrm{dilate} ( \texttt{src} , \texttt{element} ))\f] + MORPH_GRADIENT = 4, //!< a morphological gradient + //!< \f[\texttt{dst} = \mathrm{morph\_grad} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \texttt{src} , \texttt{element} )- \mathrm{erode} ( \texttt{src} , \texttt{element} )\f] + MORPH_TOPHAT = 5, //!< "top hat" + //!< \f[\texttt{dst} = \mathrm{tophat} ( \texttt{src} , \texttt{element} )= \texttt{src} - \mathrm{open} ( \texttt{src} , \texttt{element} )\f] + MORPH_BLACKHAT = 6, //!< "black hat" + //!< \f[\texttt{dst} = \mathrm{blackhat} ( \texttt{src} , \texttt{element} )= \mathrm{close} ( \texttt{src} , \texttt{element} )- \texttt{src}\f] + MORPH_HITMISS = 7 //!< "hit or miss" + //!< .- Only supported for CV_8UC1 binary images. A tutorial can be found in the documentation +}; + +//! shape of the structuring element +enum MorphShapes { + MORPH_RECT = 0, //!< a rectangular structuring element: \f[E_{ij}=1\f] + MORPH_CROSS = 1, //!< a cross-shaped structuring element: + //!< \f[E_{ij} = \begin{cases} 1 & \texttt{if } {i=\texttt{anchor.y } {or } {j=\texttt{anchor.x}}} \\0 & \texttt{otherwise} \end{cases}\f] + MORPH_ELLIPSE = 2 //!< an elliptic structuring element, that is, a filled ellipse inscribed + //!< into the rectangle Rect(0, 0, esize.width, 0.esize.height) +}; + +//! @} imgproc_filter + +//! @addtogroup imgproc_transform +//! @{ + +//! interpolation algorithm +enum InterpolationFlags{ + /** nearest neighbor interpolation */ + INTER_NEAREST = 0, + /** bilinear interpolation */ + INTER_LINEAR = 1, + /** bicubic interpolation */ + INTER_CUBIC = 2, + /** resampling using pixel area relation. It may be a preferred method for image decimation, as + it gives moire'-free results. But when the image is zoomed, it is similar to the INTER_NEAREST + method. */ + INTER_AREA = 3, + /** Lanczos interpolation over 8x8 neighborhood */ + INTER_LANCZOS4 = 4, + /** Bit exact bilinear interpolation */ + INTER_LINEAR_EXACT = 5, + /** Bit exact nearest neighbor interpolation. This will produce same results as + the nearest neighbor method in PIL, scikit-image or Matlab. */ + INTER_NEAREST_EXACT = 6, + /** mask for interpolation codes */ + INTER_MAX = 7, + /** flag, fills all of the destination image pixels. If some of them correspond to outliers in the + source image, they are set to zero */ + WARP_FILL_OUTLIERS = 8, + /** flag, inverse transformation + + For example, #linearPolar or #logPolar transforms: + - flag is __not__ set: \f$dst( \rho , \phi ) = src(x,y)\f$ + - flag is set: \f$dst(x,y) = src( \rho , \phi )\f$ + */ + WARP_INVERSE_MAP = 16 +}; + +/** \brief Specify the polar mapping mode +@sa warpPolar +*/ +enum WarpPolarMode +{ + WARP_POLAR_LINEAR = 0, ///< Remaps an image to/from polar space. + WARP_POLAR_LOG = 256 ///< Remaps an image to/from semilog-polar space. +}; + +enum InterpolationMasks { + INTER_BITS = 5, + INTER_BITS2 = INTER_BITS * 2, + INTER_TAB_SIZE = 1 << INTER_BITS, + INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE + }; + +//! @} imgproc_transform + +//! @addtogroup imgproc_misc +//! @{ + +//! Distance types for Distance Transform and M-estimators +//! @see distanceTransform, fitLine +enum DistanceTypes { + DIST_USER = -1, //!< User defined distance + DIST_L1 = 1, //!< distance = |x1-x2| + |y1-y2| + DIST_L2 = 2, //!< the simple euclidean distance + DIST_C = 3, //!< distance = max(|x1-x2|,|y1-y2|) + DIST_L12 = 4, //!< L1-L2 metric: distance = 2(sqrt(1+x*x/2) - 1)) + DIST_FAIR = 5, //!< distance = c^2(|x|/c-log(1+|x|/c)), c = 1.3998 + DIST_WELSCH = 6, //!< distance = c^2/2(1-exp(-(x/c)^2)), c = 2.9846 + DIST_HUBER = 7 //!< distance = |x| \texttt{thresh}\)}{0}{otherwise}\f] + THRESH_BINARY_INV = 1, //!< \f[\texttt{dst} (x,y) = \fork{0}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{\texttt{maxval}}{otherwise}\f] + THRESH_TRUNC = 2, //!< \f[\texttt{dst} (x,y) = \fork{\texttt{threshold}}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{\texttt{src}(x,y)}{otherwise}\f] + THRESH_TOZERO = 3, //!< \f[\texttt{dst} (x,y) = \fork{\texttt{src}(x,y)}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{0}{otherwise}\f] + THRESH_TOZERO_INV = 4, //!< \f[\texttt{dst} (x,y) = \fork{0}{if \(\texttt{src}(x,y) > \texttt{thresh}\)}{\texttt{src}(x,y)}{otherwise}\f] + THRESH_MASK = 7, + THRESH_OTSU = 8, //!< flag, use Otsu algorithm to choose the optimal threshold value + THRESH_TRIANGLE = 16 //!< flag, use Triangle algorithm to choose the optimal threshold value +}; + +//! adaptive threshold algorithm +//! @see adaptiveThreshold +enum AdaptiveThresholdTypes { + /** the threshold value \f$T(x,y)\f$ is a mean of the \f$\texttt{blockSize} \times + \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$ minus C */ + ADAPTIVE_THRESH_MEAN_C = 0, + /** the threshold value \f$T(x, y)\f$ is a weighted sum (cross-correlation with a Gaussian + window) of the \f$\texttt{blockSize} \times \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$ + minus C . The default sigma (standard deviation) is used for the specified blockSize . See + #getGaussianKernel*/ + ADAPTIVE_THRESH_GAUSSIAN_C = 1 +}; + +//! class of the pixel in GrabCut algorithm +enum GrabCutClasses { + GC_BGD = 0, //!< an obvious background pixels + GC_FGD = 1, //!< an obvious foreground (object) pixel + GC_PR_BGD = 2, //!< a possible background pixel + GC_PR_FGD = 3 //!< a possible foreground pixel +}; + +//! GrabCut algorithm flags +enum GrabCutModes { + /** The function initializes the state and the mask using the provided rectangle. After that it + runs iterCount iterations of the algorithm. */ + GC_INIT_WITH_RECT = 0, + /** The function initializes the state using the provided mask. Note that GC_INIT_WITH_RECT + and GC_INIT_WITH_MASK can be combined. Then, all the pixels outside of the ROI are + automatically initialized with GC_BGD .*/ + GC_INIT_WITH_MASK = 1, + /** The value means that the algorithm should just resume. */ + GC_EVAL = 2, + /** The value means that the algorithm should just run the grabCut algorithm (a single iteration) with the fixed model */ + GC_EVAL_FREEZE_MODEL = 3 +}; + +//! distanceTransform algorithm flags +enum DistanceTransformLabelTypes { + /** each connected component of zeros in src (as well as all the non-zero pixels closest to the + connected component) will be assigned the same label */ + DIST_LABEL_CCOMP = 0, + /** each zero pixel (and all the non-zero pixels closest to it) gets its own label. */ + DIST_LABEL_PIXEL = 1 +}; + +//! floodfill algorithm flags +enum FloodFillFlags { + /** If set, the difference between the current pixel and seed pixel is considered. Otherwise, + the difference between neighbor pixels is considered (that is, the range is floating). */ + FLOODFILL_FIXED_RANGE = 1 << 16, + /** If set, the function does not change the image ( newVal is ignored), and only fills the + mask with the value specified in bits 8-16 of flags as described above. This option only make + sense in function variants that have the mask parameter. */ + FLOODFILL_MASK_ONLY = 1 << 17 +}; + +//! @} imgproc_misc + +//! @addtogroup imgproc_shape +//! @{ + +//! connected components statistics +enum ConnectedComponentsTypes { + CC_STAT_LEFT = 0, //!< The leftmost (x) coordinate which is the inclusive start of the bounding + //!< box in the horizontal direction. + CC_STAT_TOP = 1, //!< The topmost (y) coordinate which is the inclusive start of the bounding + //!< box in the vertical direction. + CC_STAT_WIDTH = 2, //!< The horizontal size of the bounding box + CC_STAT_HEIGHT = 3, //!< The vertical size of the bounding box + CC_STAT_AREA = 4, //!< The total area (in pixels) of the connected component +#ifndef CV_DOXYGEN + CC_STAT_MAX = 5 //!< Max enumeration value. Used internally only for memory allocation +#endif +}; + +//! connected components algorithm +enum ConnectedComponentsAlgorithmsTypes { + CCL_DEFAULT = -1, //!< Spaghetti @cite Bolelli2019 algorithm for 8-way connectivity, Spaghetti4C @cite Bolelli2021 algorithm for 4-way connectivity. + CCL_WU = 0, //!< SAUF @cite Wu2009 algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity. The parallel implementation described in @cite Bolelli2017 is available for SAUF. + CCL_GRANA = 1, //!< BBDT @cite Grana2010 algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity. The parallel implementation described in @cite Bolelli2017 is available for both BBDT and SAUF. + CCL_BOLELLI = 2, //!< Spaghetti @cite Bolelli2019 algorithm for 8-way connectivity, Spaghetti4C @cite Bolelli2021 algorithm for 4-way connectivity. The parallel implementation described in @cite Bolelli2017 is available for both Spaghetti and Spaghetti4C. + CCL_SAUF = 3, //!< Same as CCL_WU. It is preferable to use the flag with the name of the algorithm (CCL_SAUF) rather than the one with the name of the first author (CCL_WU). + CCL_BBDT = 4, //!< Same as CCL_GRANA. It is preferable to use the flag with the name of the algorithm (CCL_BBDT) rather than the one with the name of the first author (CCL_GRANA). + CCL_SPAGHETTI = 5, //!< Same as CCL_BOLELLI. It is preferable to use the flag with the name of the algorithm (CCL_SPAGHETTI) rather than the one with the name of the first author (CCL_BOLELLI). +}; + +//! mode of the contour retrieval algorithm +enum RetrievalModes { + /** retrieves only the extreme outer contours. It sets `hierarchy[i][2]=hierarchy[i][3]=-1` for + all the contours. */ + RETR_EXTERNAL = 0, + /** retrieves all of the contours without establishing any hierarchical relationships. */ + RETR_LIST = 1, + /** retrieves all of the contours and organizes them into a two-level hierarchy. At the top + level, there are external boundaries of the components. At the second level, there are + boundaries of the holes. If there is another contour inside a hole of a connected component, it + is still put at the top level. */ + RETR_CCOMP = 2, + /** retrieves all of the contours and reconstructs a full hierarchy of nested contours.*/ + RETR_TREE = 3, + RETR_FLOODFILL = 4 //!< +}; + +//! the contour approximation algorithm +enum ContourApproximationModes { + /** stores absolutely all the contour points. That is, any 2 subsequent points (x1,y1) and + (x2,y2) of the contour will be either horizontal, vertical or diagonal neighbors, that is, + max(abs(x1-x2),abs(y2-y1))==1. */ + CHAIN_APPROX_NONE = 1, + /** compresses horizontal, vertical, and diagonal segments and leaves only their end points. + For example, an up-right rectangular contour is encoded with 4 points. */ + CHAIN_APPROX_SIMPLE = 2, + /** applies one of the flavors of the Teh-Chin chain approximation algorithm @cite TehChin89 */ + CHAIN_APPROX_TC89_L1 = 3, + /** applies one of the flavors of the Teh-Chin chain approximation algorithm @cite TehChin89 */ + CHAIN_APPROX_TC89_KCOS = 4 +}; + +/** @brief Shape matching methods + +\f$A\f$ denotes object1,\f$B\f$ denotes object2 + +\f$\begin{array}{l} m^A_i = \mathrm{sign} (h^A_i) \cdot \log{h^A_i} \\ m^B_i = \mathrm{sign} (h^B_i) \cdot \log{h^B_i} \end{array}\f$ + +and \f$h^A_i, h^B_i\f$ are the Hu moments of \f$A\f$ and \f$B\f$ , respectively. +*/ +enum ShapeMatchModes { + CONTOURS_MATCH_I1 =1, //!< \f[I_1(A,B) = \sum _{i=1...7} \left | \frac{1}{m^A_i} - \frac{1}{m^B_i} \right |\f] + CONTOURS_MATCH_I2 =2, //!< \f[I_2(A,B) = \sum _{i=1...7} \left | m^A_i - m^B_i \right |\f] + CONTOURS_MATCH_I3 =3 //!< \f[I_3(A,B) = \max _{i=1...7} \frac{ \left| m^A_i - m^B_i \right| }{ \left| m^A_i \right| }\f] +}; + +//! @} imgproc_shape + +//! @addtogroup imgproc_feature +//! @{ + +//! Variants of a Hough transform +enum HoughModes { + + /** classical or standard Hough transform. Every line is represented by two floating-point + numbers \f$(\rho, \theta)\f$ , where \f$\rho\f$ is a distance between (0,0) point and the line, + and \f$\theta\f$ is the angle between x-axis and the normal to the line. Thus, the matrix must + be (the created sequence will be) of CV_32FC2 type */ + HOUGH_STANDARD = 0, + /** probabilistic Hough transform (more efficient in case if the picture contains a few long + linear segments). It returns line segments rather than the whole line. Each segment is + represented by starting and ending points, and the matrix must be (the created sequence will + be) of the CV_32SC4 type. */ + HOUGH_PROBABILISTIC = 1, + /** multi-scale variant of the classical Hough transform. The lines are encoded the same way as + HOUGH_STANDARD. */ + HOUGH_MULTI_SCALE = 2, + HOUGH_GRADIENT = 3, //!< basically *21HT*, described in @cite Yuen90 + HOUGH_GRADIENT_ALT = 4, //!< variation of HOUGH_GRADIENT to get better accuracy +}; + +//! Variants of Line Segment %Detector +enum LineSegmentDetectorModes { + LSD_REFINE_NONE = 0, //!< No refinement applied + LSD_REFINE_STD = 1, //!< Standard refinement is applied. E.g. breaking arches into smaller straighter line approximations. + LSD_REFINE_ADV = 2 //!< Advanced refinement. Number of false alarms is calculated, lines are + //!< refined through increase of precision, decrement in size, etc. +}; + +//! @} imgproc_feature + +/** Histogram comparison methods + @ingroup imgproc_hist +*/ +enum HistCompMethods { + /** Correlation + \f[d(H_1,H_2) = \frac{\sum_I (H_1(I) - \bar{H_1}) (H_2(I) - \bar{H_2})}{\sqrt{\sum_I(H_1(I) - \bar{H_1})^2 \sum_I(H_2(I) - \bar{H_2})^2}}\f] + where + \f[\bar{H_k} = \frac{1}{N} \sum _J H_k(J)\f] + and \f$N\f$ is a total number of histogram bins. */ + HISTCMP_CORREL = 0, + /** Chi-Square + \f[d(H_1,H_2) = \sum _I \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)}\f] */ + HISTCMP_CHISQR = 1, + /** Intersection + \f[d(H_1,H_2) = \sum _I \min (H_1(I), H_2(I))\f] */ + HISTCMP_INTERSECT = 2, + /** Bhattacharyya distance + (In fact, OpenCV computes Hellinger distance, which is related to Bhattacharyya coefficient.) + \f[d(H_1,H_2) = \sqrt{1 - \frac{1}{\sqrt{\bar{H_1} \bar{H_2} N^2}} \sum_I \sqrt{H_1(I) \cdot H_2(I)}}\f] */ + HISTCMP_BHATTACHARYYA = 3, + HISTCMP_HELLINGER = HISTCMP_BHATTACHARYYA, //!< Synonym for HISTCMP_BHATTACHARYYA + /** Alternative Chi-Square + \f[d(H_1,H_2) = 2 * \sum _I \frac{\left(H_1(I)-H_2(I)\right)^2}{H_1(I)+H_2(I)}\f] + This alternative formula is regularly used for texture comparison. See e.g. @cite Puzicha1997 */ + HISTCMP_CHISQR_ALT = 4, + /** Kullback-Leibler divergence + \f[d(H_1,H_2) = \sum _I H_1(I) \log \left(\frac{H_1(I)}{H_2(I)}\right)\f] */ + HISTCMP_KL_DIV = 5 +}; + +/** the color conversion codes +@see @ref imgproc_color_conversions +@ingroup imgproc_color_conversions + */ +enum ColorConversionCodes { + COLOR_BGR2BGRA = 0, //!< add alpha channel to RGB or BGR image + COLOR_RGB2RGBA = COLOR_BGR2BGRA, + + COLOR_BGRA2BGR = 1, //!< remove alpha channel from RGB or BGR image + COLOR_RGBA2RGB = COLOR_BGRA2BGR, + + COLOR_BGR2RGBA = 2, //!< convert between RGB and BGR color spaces (with or without alpha channel) + COLOR_RGB2BGRA = COLOR_BGR2RGBA, + + COLOR_RGBA2BGR = 3, + COLOR_BGRA2RGB = COLOR_RGBA2BGR, + + COLOR_BGR2RGB = 4, + COLOR_RGB2BGR = COLOR_BGR2RGB, + + COLOR_BGRA2RGBA = 5, + COLOR_RGBA2BGRA = COLOR_BGRA2RGBA, + + COLOR_BGR2GRAY = 6, //!< convert between RGB/BGR and grayscale, @ref color_convert_rgb_gray "color conversions" + COLOR_RGB2GRAY = 7, + COLOR_GRAY2BGR = 8, + COLOR_GRAY2RGB = COLOR_GRAY2BGR, + COLOR_GRAY2BGRA = 9, + COLOR_GRAY2RGBA = COLOR_GRAY2BGRA, + COLOR_BGRA2GRAY = 10, + COLOR_RGBA2GRAY = 11, + + COLOR_BGR2BGR565 = 12, //!< convert between RGB/BGR and BGR565 (16-bit images) + COLOR_RGB2BGR565 = 13, + COLOR_BGR5652BGR = 14, + COLOR_BGR5652RGB = 15, + COLOR_BGRA2BGR565 = 16, + COLOR_RGBA2BGR565 = 17, + COLOR_BGR5652BGRA = 18, + COLOR_BGR5652RGBA = 19, + + COLOR_GRAY2BGR565 = 20, //!< convert between grayscale to BGR565 (16-bit images) + COLOR_BGR5652GRAY = 21, + + COLOR_BGR2BGR555 = 22, //!< convert between RGB/BGR and BGR555 (16-bit images) + COLOR_RGB2BGR555 = 23, + COLOR_BGR5552BGR = 24, + COLOR_BGR5552RGB = 25, + COLOR_BGRA2BGR555 = 26, + COLOR_RGBA2BGR555 = 27, + COLOR_BGR5552BGRA = 28, + COLOR_BGR5552RGBA = 29, + + COLOR_GRAY2BGR555 = 30, //!< convert between grayscale and BGR555 (16-bit images) + COLOR_BGR5552GRAY = 31, + + COLOR_BGR2XYZ = 32, //!< convert RGB/BGR to CIE XYZ, @ref color_convert_rgb_xyz "color conversions" + COLOR_RGB2XYZ = 33, + COLOR_XYZ2BGR = 34, + COLOR_XYZ2RGB = 35, + + COLOR_BGR2YCrCb = 36, //!< convert RGB/BGR to luma-chroma (aka YCC), @ref color_convert_rgb_ycrcb "color conversions" + COLOR_RGB2YCrCb = 37, + COLOR_YCrCb2BGR = 38, + COLOR_YCrCb2RGB = 39, + + COLOR_BGR2HSV = 40, //!< convert RGB/BGR to HSV (hue saturation value) with H range 0..180 if 8 bit image, @ref color_convert_rgb_hsv "color conversions" + COLOR_RGB2HSV = 41, + + COLOR_BGR2Lab = 44, //!< convert RGB/BGR to CIE Lab, @ref color_convert_rgb_lab "color conversions" + COLOR_RGB2Lab = 45, + + COLOR_BGR2Luv = 50, //!< convert RGB/BGR to CIE Luv, @ref color_convert_rgb_luv "color conversions" + COLOR_RGB2Luv = 51, + COLOR_BGR2HLS = 52, //!< convert RGB/BGR to HLS (hue lightness saturation) with H range 0..180 if 8 bit image, @ref color_convert_rgb_hls "color conversions" + COLOR_RGB2HLS = 53, + + COLOR_HSV2BGR = 54, //!< backward conversions HSV to RGB/BGR with H range 0..180 if 8 bit image + COLOR_HSV2RGB = 55, + + COLOR_Lab2BGR = 56, + COLOR_Lab2RGB = 57, + COLOR_Luv2BGR = 58, + COLOR_Luv2RGB = 59, + COLOR_HLS2BGR = 60, //!< backward conversions HLS to RGB/BGR with H range 0..180 if 8 bit image + COLOR_HLS2RGB = 61, + + COLOR_BGR2HSV_FULL = 66, //!< convert RGB/BGR to HSV (hue saturation value) with H range 0..255 if 8 bit image, @ref color_convert_rgb_hsv "color conversions" + COLOR_RGB2HSV_FULL = 67, + COLOR_BGR2HLS_FULL = 68, //!< convert RGB/BGR to HLS (hue lightness saturation) with H range 0..255 if 8 bit image, @ref color_convert_rgb_hls "color conversions" + COLOR_RGB2HLS_FULL = 69, + + COLOR_HSV2BGR_FULL = 70, //!< backward conversions HSV to RGB/BGR with H range 0..255 if 8 bit image + COLOR_HSV2RGB_FULL = 71, + COLOR_HLS2BGR_FULL = 72, //!< backward conversions HLS to RGB/BGR with H range 0..255 if 8 bit image + COLOR_HLS2RGB_FULL = 73, + + COLOR_LBGR2Lab = 74, + COLOR_LRGB2Lab = 75, + COLOR_LBGR2Luv = 76, + COLOR_LRGB2Luv = 77, + + COLOR_Lab2LBGR = 78, + COLOR_Lab2LRGB = 79, + COLOR_Luv2LBGR = 80, + COLOR_Luv2LRGB = 81, + + COLOR_BGR2YUV = 82, //!< convert between RGB/BGR and YUV + COLOR_RGB2YUV = 83, + COLOR_YUV2BGR = 84, + COLOR_YUV2RGB = 85, + + //! YUV 4:2:0 family to RGB + COLOR_YUV2RGB_NV12 = 90, + COLOR_YUV2BGR_NV12 = 91, + COLOR_YUV2RGB_NV21 = 92, + COLOR_YUV2BGR_NV21 = 93, + COLOR_YUV420sp2RGB = COLOR_YUV2RGB_NV21, + COLOR_YUV420sp2BGR = COLOR_YUV2BGR_NV21, + + COLOR_YUV2RGBA_NV12 = 94, + COLOR_YUV2BGRA_NV12 = 95, + COLOR_YUV2RGBA_NV21 = 96, + COLOR_YUV2BGRA_NV21 = 97, + COLOR_YUV420sp2RGBA = COLOR_YUV2RGBA_NV21, + COLOR_YUV420sp2BGRA = COLOR_YUV2BGRA_NV21, + + COLOR_YUV2RGB_YV12 = 98, + COLOR_YUV2BGR_YV12 = 99, + COLOR_YUV2RGB_IYUV = 100, + COLOR_YUV2BGR_IYUV = 101, + COLOR_YUV2RGB_I420 = COLOR_YUV2RGB_IYUV, + COLOR_YUV2BGR_I420 = COLOR_YUV2BGR_IYUV, + COLOR_YUV420p2RGB = COLOR_YUV2RGB_YV12, + COLOR_YUV420p2BGR = COLOR_YUV2BGR_YV12, + + COLOR_YUV2RGBA_YV12 = 102, + COLOR_YUV2BGRA_YV12 = 103, + COLOR_YUV2RGBA_IYUV = 104, + COLOR_YUV2BGRA_IYUV = 105, + COLOR_YUV2RGBA_I420 = COLOR_YUV2RGBA_IYUV, + COLOR_YUV2BGRA_I420 = COLOR_YUV2BGRA_IYUV, + COLOR_YUV420p2RGBA = COLOR_YUV2RGBA_YV12, + COLOR_YUV420p2BGRA = COLOR_YUV2BGRA_YV12, + + COLOR_YUV2GRAY_420 = 106, + COLOR_YUV2GRAY_NV21 = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_NV12 = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_YV12 = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_IYUV = COLOR_YUV2GRAY_420, + COLOR_YUV2GRAY_I420 = COLOR_YUV2GRAY_420, + COLOR_YUV420sp2GRAY = COLOR_YUV2GRAY_420, + COLOR_YUV420p2GRAY = COLOR_YUV2GRAY_420, + + //! YUV 4:2:2 family to RGB + COLOR_YUV2RGB_UYVY = 107, + COLOR_YUV2BGR_UYVY = 108, + //COLOR_YUV2RGB_VYUY = 109, + //COLOR_YUV2BGR_VYUY = 110, + COLOR_YUV2RGB_Y422 = COLOR_YUV2RGB_UYVY, + COLOR_YUV2BGR_Y422 = COLOR_YUV2BGR_UYVY, + COLOR_YUV2RGB_UYNV = COLOR_YUV2RGB_UYVY, + COLOR_YUV2BGR_UYNV = COLOR_YUV2BGR_UYVY, + + COLOR_YUV2RGBA_UYVY = 111, + COLOR_YUV2BGRA_UYVY = 112, + //COLOR_YUV2RGBA_VYUY = 113, + //COLOR_YUV2BGRA_VYUY = 114, + COLOR_YUV2RGBA_Y422 = COLOR_YUV2RGBA_UYVY, + COLOR_YUV2BGRA_Y422 = COLOR_YUV2BGRA_UYVY, + COLOR_YUV2RGBA_UYNV = COLOR_YUV2RGBA_UYVY, + COLOR_YUV2BGRA_UYNV = COLOR_YUV2BGRA_UYVY, + + COLOR_YUV2RGB_YUY2 = 115, + COLOR_YUV2BGR_YUY2 = 116, + COLOR_YUV2RGB_YVYU = 117, + COLOR_YUV2BGR_YVYU = 118, + COLOR_YUV2RGB_YUYV = COLOR_YUV2RGB_YUY2, + COLOR_YUV2BGR_YUYV = COLOR_YUV2BGR_YUY2, + COLOR_YUV2RGB_YUNV = COLOR_YUV2RGB_YUY2, + COLOR_YUV2BGR_YUNV = COLOR_YUV2BGR_YUY2, + + COLOR_YUV2RGBA_YUY2 = 119, + COLOR_YUV2BGRA_YUY2 = 120, + COLOR_YUV2RGBA_YVYU = 121, + COLOR_YUV2BGRA_YVYU = 122, + COLOR_YUV2RGBA_YUYV = COLOR_YUV2RGBA_YUY2, + COLOR_YUV2BGRA_YUYV = COLOR_YUV2BGRA_YUY2, + COLOR_YUV2RGBA_YUNV = COLOR_YUV2RGBA_YUY2, + COLOR_YUV2BGRA_YUNV = COLOR_YUV2BGRA_YUY2, + + COLOR_YUV2GRAY_UYVY = 123, + COLOR_YUV2GRAY_YUY2 = 124, + //CV_YUV2GRAY_VYUY = CV_YUV2GRAY_UYVY, + COLOR_YUV2GRAY_Y422 = COLOR_YUV2GRAY_UYVY, + COLOR_YUV2GRAY_UYNV = COLOR_YUV2GRAY_UYVY, + COLOR_YUV2GRAY_YVYU = COLOR_YUV2GRAY_YUY2, + COLOR_YUV2GRAY_YUYV = COLOR_YUV2GRAY_YUY2, + COLOR_YUV2GRAY_YUNV = COLOR_YUV2GRAY_YUY2, + + //! alpha premultiplication + COLOR_RGBA2mRGBA = 125, + COLOR_mRGBA2RGBA = 126, + + //! RGB to YUV 4:2:0 family + COLOR_RGB2YUV_I420 = 127, + COLOR_BGR2YUV_I420 = 128, + COLOR_RGB2YUV_IYUV = COLOR_RGB2YUV_I420, + COLOR_BGR2YUV_IYUV = COLOR_BGR2YUV_I420, + + COLOR_RGBA2YUV_I420 = 129, + COLOR_BGRA2YUV_I420 = 130, + COLOR_RGBA2YUV_IYUV = COLOR_RGBA2YUV_I420, + COLOR_BGRA2YUV_IYUV = COLOR_BGRA2YUV_I420, + COLOR_RGB2YUV_YV12 = 131, + COLOR_BGR2YUV_YV12 = 132, + COLOR_RGBA2YUV_YV12 = 133, + COLOR_BGRA2YUV_YV12 = 134, + + //! Demosaicing, see @ref color_convert_bayer "color conversions" for additional information + COLOR_BayerBG2BGR = 46, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2BGR = 47, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2BGR = 48, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2BGR = 49, //!< equivalent to GBRG Bayer pattern + + COLOR_BayerRGGB2BGR = COLOR_BayerBG2BGR, + COLOR_BayerGRBG2BGR = COLOR_BayerGB2BGR, + COLOR_BayerBGGR2BGR = COLOR_BayerRG2BGR, + COLOR_BayerGBRG2BGR = COLOR_BayerGR2BGR, + + COLOR_BayerRGGB2RGB = COLOR_BayerBGGR2BGR, + COLOR_BayerGRBG2RGB = COLOR_BayerGBRG2BGR, + COLOR_BayerBGGR2RGB = COLOR_BayerRGGB2BGR, + COLOR_BayerGBRG2RGB = COLOR_BayerGRBG2BGR, + + COLOR_BayerBG2RGB = COLOR_BayerRG2BGR, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2RGB = COLOR_BayerGR2BGR, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2RGB = COLOR_BayerBG2BGR, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2RGB = COLOR_BayerGB2BGR, //!< equivalent to GBRG Bayer pattern + + COLOR_BayerBG2GRAY = 86, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2GRAY = 87, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2GRAY = 88, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2GRAY = 89, //!< equivalent to GBRG Bayer pattern + + COLOR_BayerRGGB2GRAY = COLOR_BayerBG2GRAY, + COLOR_BayerGRBG2GRAY = COLOR_BayerGB2GRAY, + COLOR_BayerBGGR2GRAY = COLOR_BayerRG2GRAY, + COLOR_BayerGBRG2GRAY = COLOR_BayerGR2GRAY, + + //! Demosaicing using Variable Number of Gradients + COLOR_BayerBG2BGR_VNG = 62, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2BGR_VNG = 63, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2BGR_VNG = 64, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2BGR_VNG = 65, //!< equivalent to GBRG Bayer pattern + + COLOR_BayerRGGB2BGR_VNG = COLOR_BayerBG2BGR_VNG, + COLOR_BayerGRBG2BGR_VNG = COLOR_BayerGB2BGR_VNG, + COLOR_BayerBGGR2BGR_VNG = COLOR_BayerRG2BGR_VNG, + COLOR_BayerGBRG2BGR_VNG = COLOR_BayerGR2BGR_VNG, + + COLOR_BayerRGGB2RGB_VNG = COLOR_BayerBGGR2BGR_VNG, + COLOR_BayerGRBG2RGB_VNG = COLOR_BayerGBRG2BGR_VNG, + COLOR_BayerBGGR2RGB_VNG = COLOR_BayerRGGB2BGR_VNG, + COLOR_BayerGBRG2RGB_VNG = COLOR_BayerGRBG2BGR_VNG, + + COLOR_BayerBG2RGB_VNG = COLOR_BayerRG2BGR_VNG, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2RGB_VNG = COLOR_BayerGR2BGR_VNG, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2RGB_VNG = COLOR_BayerBG2BGR_VNG, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2RGB_VNG = COLOR_BayerGB2BGR_VNG, //!< equivalent to GBRG Bayer pattern + + //! Edge-Aware Demosaicing + COLOR_BayerBG2BGR_EA = 135, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2BGR_EA = 136, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2BGR_EA = 137, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2BGR_EA = 138, //!< equivalent to GBRG Bayer pattern + + COLOR_BayerRGGB2BGR_EA = COLOR_BayerBG2BGR_EA, + COLOR_BayerGRBG2BGR_EA = COLOR_BayerGB2BGR_EA, + COLOR_BayerBGGR2BGR_EA = COLOR_BayerRG2BGR_EA, + COLOR_BayerGBRG2BGR_EA = COLOR_BayerGR2BGR_EA, + + COLOR_BayerRGGB2RGB_EA = COLOR_BayerBGGR2BGR_EA, + COLOR_BayerGRBG2RGB_EA = COLOR_BayerGBRG2BGR_EA, + COLOR_BayerBGGR2RGB_EA = COLOR_BayerRGGB2BGR_EA, + COLOR_BayerGBRG2RGB_EA = COLOR_BayerGRBG2BGR_EA, + + COLOR_BayerBG2RGB_EA = COLOR_BayerRG2BGR_EA, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2RGB_EA = COLOR_BayerGR2BGR_EA, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2RGB_EA = COLOR_BayerBG2BGR_EA, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2RGB_EA = COLOR_BayerGB2BGR_EA, //!< equivalent to GBRG Bayer pattern + + //! Demosaicing with alpha channel + COLOR_BayerBG2BGRA = 139, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2BGRA = 140, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2BGRA = 141, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2BGRA = 142, //!< equivalent to GBRG Bayer pattern + + COLOR_BayerRGGB2BGRA = COLOR_BayerBG2BGRA, + COLOR_BayerGRBG2BGRA = COLOR_BayerGB2BGRA, + COLOR_BayerBGGR2BGRA = COLOR_BayerRG2BGRA, + COLOR_BayerGBRG2BGRA = COLOR_BayerGR2BGRA, + + COLOR_BayerRGGB2RGBA = COLOR_BayerBGGR2BGRA, + COLOR_BayerGRBG2RGBA = COLOR_BayerGBRG2BGRA, + COLOR_BayerBGGR2RGBA = COLOR_BayerRGGB2BGRA, + COLOR_BayerGBRG2RGBA = COLOR_BayerGRBG2BGRA, + + COLOR_BayerBG2RGBA = COLOR_BayerRG2BGRA, //!< equivalent to RGGB Bayer pattern + COLOR_BayerGB2RGBA = COLOR_BayerGR2BGRA, //!< equivalent to GRBG Bayer pattern + COLOR_BayerRG2RGBA = COLOR_BayerBG2BGRA, //!< equivalent to BGGR Bayer pattern + COLOR_BayerGR2RGBA = COLOR_BayerGB2BGRA, //!< equivalent to GBRG Bayer pattern + + COLOR_COLORCVT_MAX = 143 +}; + +//! @addtogroup imgproc_shape +//! @{ + +//! types of intersection between rectangles +enum RectanglesIntersectTypes { + INTERSECT_NONE = 0, //!< No intersection + INTERSECT_PARTIAL = 1, //!< There is a partial intersection + INTERSECT_FULL = 2 //!< One of the rectangle is fully enclosed in the other +}; + +/** types of line +@ingroup imgproc_draw +*/ +enum LineTypes { + FILLED = -1, + LINE_4 = 4, //!< 4-connected line + LINE_8 = 8, //!< 8-connected line + LINE_AA = 16 //!< antialiased line +}; + +/** Only a subset of Hershey fonts are supported +@ingroup imgproc_draw +*/ +enum HersheyFonts { + FONT_HERSHEY_SIMPLEX = 0, //!< normal size sans-serif font + FONT_HERSHEY_PLAIN = 1, //!< small size sans-serif font + FONT_HERSHEY_DUPLEX = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX) + FONT_HERSHEY_COMPLEX = 3, //!< normal size serif font + FONT_HERSHEY_TRIPLEX = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX) + FONT_HERSHEY_COMPLEX_SMALL = 5, //!< smaller version of FONT_HERSHEY_COMPLEX + FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font + FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX + FONT_ITALIC = 16 //!< flag for italic font +}; + +/** Possible set of marker types used for the cv::drawMarker function +@ingroup imgproc_draw +*/ +enum MarkerTypes +{ + MARKER_CROSS = 0, //!< A crosshair marker shape + MARKER_TILTED_CROSS = 1, //!< A 45 degree tilted crosshair marker shape + MARKER_STAR = 2, //!< A star marker shape, combination of cross and tilted cross + MARKER_DIAMOND = 3, //!< A diamond marker shape + MARKER_SQUARE = 4, //!< A square marker shape + MARKER_TRIANGLE_UP = 5, //!< An upwards pointing triangle marker shape + MARKER_TRIANGLE_DOWN = 6 //!< A downwards pointing triangle marker shape +}; + +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform +*/ +class CV_EXPORTS_W GeneralizedHough : public Algorithm +{ +public: + //! set template to search + CV_WRAP virtual void setTemplate(InputArray templ, Point templCenter = Point(-1, -1)) = 0; + CV_WRAP virtual void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1)) = 0; + + //! find template on image + CV_WRAP virtual void detect(InputArray image, OutputArray positions, OutputArray votes = noArray()) = 0; + CV_WRAP virtual void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = noArray()) = 0; + + //! Canny low threshold. + CV_WRAP virtual void setCannyLowThresh(int cannyLowThresh) = 0; + CV_WRAP virtual int getCannyLowThresh() const = 0; + + //! Canny high threshold. + CV_WRAP virtual void setCannyHighThresh(int cannyHighThresh) = 0; + CV_WRAP virtual int getCannyHighThresh() const = 0; + + //! Minimum distance between the centers of the detected objects. + CV_WRAP virtual void setMinDist(double minDist) = 0; + CV_WRAP virtual double getMinDist() const = 0; + + //! Inverse ratio of the accumulator resolution to the image resolution. + CV_WRAP virtual void setDp(double dp) = 0; + CV_WRAP virtual double getDp() const = 0; + + //! Maximal size of inner buffers. + CV_WRAP virtual void setMaxBufferSize(int maxBufferSize) = 0; + CV_WRAP virtual int getMaxBufferSize() const = 0; +}; + +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform + +Detects position only without translation and rotation @cite Ballard1981 . +*/ +class CV_EXPORTS_W GeneralizedHoughBallard : public GeneralizedHough +{ +public: + //! R-Table levels. + CV_WRAP virtual void setLevels(int levels) = 0; + CV_WRAP virtual int getLevels() const = 0; + + //! The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected. + CV_WRAP virtual void setVotesThreshold(int votesThreshold) = 0; + CV_WRAP virtual int getVotesThreshold() const = 0; +}; + +/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform + +Detects position, translation and rotation @cite Guil1999 . +*/ +class CV_EXPORTS_W GeneralizedHoughGuil : public GeneralizedHough +{ +public: + //! Angle difference in degrees between two points in feature. + CV_WRAP virtual void setXi(double xi) = 0; + CV_WRAP virtual double getXi() const = 0; + + //! Feature table levels. + CV_WRAP virtual void setLevels(int levels) = 0; + CV_WRAP virtual int getLevels() const = 0; + + //! Maximal difference between angles that treated as equal. + CV_WRAP virtual void setAngleEpsilon(double angleEpsilon) = 0; + CV_WRAP virtual double getAngleEpsilon() const = 0; + + //! Minimal rotation angle to detect in degrees. + CV_WRAP virtual void setMinAngle(double minAngle) = 0; + CV_WRAP virtual double getMinAngle() const = 0; + + //! Maximal rotation angle to detect in degrees. + CV_WRAP virtual void setMaxAngle(double maxAngle) = 0; + CV_WRAP virtual double getMaxAngle() const = 0; + + //! Angle step in degrees. + CV_WRAP virtual void setAngleStep(double angleStep) = 0; + CV_WRAP virtual double getAngleStep() const = 0; + + //! Angle votes threshold. + CV_WRAP virtual void setAngleThresh(int angleThresh) = 0; + CV_WRAP virtual int getAngleThresh() const = 0; + + //! Minimal scale to detect. + CV_WRAP virtual void setMinScale(double minScale) = 0; + CV_WRAP virtual double getMinScale() const = 0; + + //! Maximal scale to detect. + CV_WRAP virtual void setMaxScale(double maxScale) = 0; + CV_WRAP virtual double getMaxScale() const = 0; + + //! Scale step. + CV_WRAP virtual void setScaleStep(double scaleStep) = 0; + CV_WRAP virtual double getScaleStep() const = 0; + + //! Scale votes threshold. + CV_WRAP virtual void setScaleThresh(int scaleThresh) = 0; + CV_WRAP virtual int getScaleThresh() const = 0; + + //! Position votes threshold. + CV_WRAP virtual void setPosThresh(int posThresh) = 0; + CV_WRAP virtual int getPosThresh() const = 0; +}; + +//! @} imgproc_shape + +//! @addtogroup imgproc_hist +//! @{ + +/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. +*/ +class CV_EXPORTS_W CLAHE : public Algorithm +{ +public: + /** @brief Equalizes the histogram of a grayscale image using Contrast Limited Adaptive Histogram Equalization. + + @param src Source image of type CV_8UC1 or CV_16UC1. + @param dst Destination image. + */ + CV_WRAP virtual void apply(InputArray src, OutputArray dst) = 0; + + /** @brief Sets threshold for contrast limiting. + + @param clipLimit threshold value. + */ + CV_WRAP virtual void setClipLimit(double clipLimit) = 0; + + //! Returns threshold value for contrast limiting. + CV_WRAP virtual double getClipLimit() const = 0; + + /** @brief Sets size of grid for histogram equalization. Input image will be divided into + equally sized rectangular tiles. + + @param tileGridSize defines the number of tiles in row and column. + */ + CV_WRAP virtual void setTilesGridSize(Size tileGridSize) = 0; + + //!@brief Returns Size defines the number of tiles in row and column. + CV_WRAP virtual Size getTilesGridSize() const = 0; + + CV_WRAP virtual void collectGarbage() = 0; +}; + +//! @} imgproc_hist + +//! @addtogroup imgproc_subdiv2d +//! @{ + +class CV_EXPORTS_W Subdiv2D +{ +public: + /** Subdiv2D point location cases */ + enum { PTLOC_ERROR = -2, //!< Point location error + PTLOC_OUTSIDE_RECT = -1, //!< Point outside the subdivision bounding rect + PTLOC_INSIDE = 0, //!< Point inside some facet + PTLOC_VERTEX = 1, //!< Point coincides with one of the subdivision vertices + PTLOC_ON_EDGE = 2 //!< Point on some edge + }; + + /** Subdiv2D edge type navigation (see: getEdge()) */ + enum { NEXT_AROUND_ORG = 0x00, + NEXT_AROUND_DST = 0x22, + PREV_AROUND_ORG = 0x11, + PREV_AROUND_DST = 0x33, + NEXT_AROUND_LEFT = 0x13, + NEXT_AROUND_RIGHT = 0x31, + PREV_AROUND_LEFT = 0x20, + PREV_AROUND_RIGHT = 0x02 + }; + + /** creates an empty Subdiv2D object. + To create a new empty Delaunay subdivision you need to use the #initDelaunay function. + */ + CV_WRAP Subdiv2D(); + + /** @overload + + @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision. + + The function creates an empty Delaunay subdivision where 2D points can be added using the function + insert() . All of the points to be added must be within the specified rectangle, otherwise a runtime + error is raised. + */ + CV_WRAP Subdiv2D(Rect rect); + + /** @brief Creates a new empty Delaunay subdivision + + @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision. + + */ + CV_WRAP void initDelaunay(Rect rect); + + /** @brief Insert a single point into a Delaunay triangulation. + + @param pt Point to insert. + + The function inserts a single point into a subdivision and modifies the subdivision topology + appropriately. If a point with the same coordinates exists already, no new point is added. + @returns the ID of the point. + + @note If the point is outside of the triangulation specified rect a runtime error is raised. + */ + CV_WRAP int insert(Point2f pt); + + /** @brief Insert multiple points into a Delaunay triangulation. + + @param ptvec Points to insert. + + The function inserts a vector of points into a subdivision and modifies the subdivision topology + appropriately. + */ + CV_WRAP void insert(const std::vector& ptvec); + + /** @brief Returns the location of a point within a Delaunay triangulation. + + @param pt Point to locate. + @param edge Output edge that the point belongs to or is located to the right of it. + @param vertex Optional output vertex the input point coincides with. + + The function locates the input point within the subdivision and gives one of the triangle edges + or vertices. + + @returns an integer which specify one of the following five cases for point location: + - The point falls into some facet. The function returns #PTLOC_INSIDE and edge will contain one of + edges of the facet. + - The point falls onto the edge. The function returns #PTLOC_ON_EDGE and edge will contain this edge. + - The point coincides with one of the subdivision vertices. The function returns #PTLOC_VERTEX and + vertex will contain a pointer to the vertex. + - The point is outside the subdivision reference rectangle. The function returns #PTLOC_OUTSIDE_RECT + and no pointers are filled. + - One of input arguments is invalid. A runtime error is raised or, if silent or "parent" error + processing mode is selected, #PTLOC_ERROR is returned. + */ + CV_WRAP int locate(Point2f pt, CV_OUT int& edge, CV_OUT int& vertex); + + /** @brief Finds the subdivision vertex closest to the given point. + + @param pt Input point. + @param nearestPt Output subdivision vertex point. + + The function is another function that locates the input point within the subdivision. It finds the + subdivision vertex that is the closest to the input point. It is not necessarily one of vertices + of the facet containing the input point, though the facet (located using locate() ) is used as a + starting point. + + @returns vertex ID. + */ + CV_WRAP int findNearest(Point2f pt, CV_OUT Point2f* nearestPt = 0); + + /** @brief Returns a list of all edges. + + @param edgeList Output vector. + + The function gives each edge as a 4 numbers vector, where each two are one of the edge + vertices. i.e. org_x = v[0], org_y = v[1], dst_x = v[2], dst_y = v[3]. + */ + CV_WRAP void getEdgeList(CV_OUT std::vector& edgeList) const; + + /** @brief Returns a list of the leading edge ID connected to each triangle. + + @param leadingEdgeList Output vector. + + The function gives one edge ID for each triangle. + */ + CV_WRAP void getLeadingEdgeList(CV_OUT std::vector& leadingEdgeList) const; + + /** @brief Returns a list of all triangles. + + @param triangleList Output vector. + + The function gives each triangle as a 6 numbers vector, where each two are one of the triangle + vertices. i.e. p1_x = v[0], p1_y = v[1], p2_x = v[2], p2_y = v[3], p3_x = v[4], p3_y = v[5]. + */ + CV_WRAP void getTriangleList(CV_OUT std::vector& triangleList) const; + + /** @brief Returns a list of all Voronoi facets. + + @param idx Vector of vertices IDs to consider. For all vertices you can pass empty vector. + @param facetList Output vector of the Voronoi facets. + @param facetCenters Output vector of the Voronoi facets center points. + + */ + CV_WRAP void getVoronoiFacetList(const std::vector& idx, CV_OUT std::vector >& facetList, + CV_OUT std::vector& facetCenters); + + /** @brief Returns vertex location from vertex ID. + + @param vertex vertex ID. + @param firstEdge Optional. The first edge ID which is connected to the vertex. + @returns vertex (x,y) + + */ + CV_WRAP Point2f getVertex(int vertex, CV_OUT int* firstEdge = 0) const; + + /** @brief Returns one of the edges related to the given edge. + + @param edge Subdivision edge ID. + @param nextEdgeType Parameter specifying which of the related edges to return. + The following values are possible: + - NEXT_AROUND_ORG next around the edge origin ( eOnext on the picture below if e is the input edge) + - NEXT_AROUND_DST next around the edge vertex ( eDnext ) + - PREV_AROUND_ORG previous around the edge origin (reversed eRnext ) + - PREV_AROUND_DST previous around the edge destination (reversed eLnext ) + - NEXT_AROUND_LEFT next around the left facet ( eLnext ) + - NEXT_AROUND_RIGHT next around the right facet ( eRnext ) + - PREV_AROUND_LEFT previous around the left facet (reversed eOnext ) + - PREV_AROUND_RIGHT previous around the right facet (reversed eDnext ) + + ![sample output](pics/quadedge.png) + + @returns edge ID related to the input edge. + */ + CV_WRAP int getEdge( int edge, int nextEdgeType ) const; + + /** @brief Returns next edge around the edge origin. + + @param edge Subdivision edge ID. + + @returns an integer which is next edge ID around the edge origin: eOnext on the + picture above if e is the input edge). + */ + CV_WRAP int nextEdge(int edge) const; + + /** @brief Returns another edge of the same quad-edge. + + @param edge Subdivision edge ID. + @param rotate Parameter specifying which of the edges of the same quad-edge as the input + one to return. The following values are possible: + - 0 - the input edge ( e on the picture below if e is the input edge) + - 1 - the rotated edge ( eRot ) + - 2 - the reversed edge (reversed e (in green)) + - 3 - the reversed rotated edge (reversed eRot (in green)) + + @returns one of the edges ID of the same quad-edge as the input edge. + */ + CV_WRAP int rotateEdge(int edge, int rotate) const; + CV_WRAP int symEdge(int edge) const; + + /** @brief Returns the edge origin. + + @param edge Subdivision edge ID. + @param orgpt Output vertex location. + + @returns vertex ID. + */ + CV_WRAP int edgeOrg(int edge, CV_OUT Point2f* orgpt = 0) const; + + /** @brief Returns the edge destination. + + @param edge Subdivision edge ID. + @param dstpt Output vertex location. + + @returns vertex ID. + */ + CV_WRAP int edgeDst(int edge, CV_OUT Point2f* dstpt = 0) const; + +protected: + int newEdge(); + void deleteEdge(int edge); + int newPoint(Point2f pt, bool isvirtual, int firstEdge = 0); + void deletePoint(int vtx); + void setEdgePoints( int edge, int orgPt, int dstPt ); + void splice( int edgeA, int edgeB ); + int connectEdges( int edgeA, int edgeB ); + void swapEdges( int edge ); + int isRightOf(Point2f pt, int edge) const; + void calcVoronoi(); + void clearVoronoi(); + void checkSubdiv() const; + + struct CV_EXPORTS Vertex + { + Vertex(); + Vertex(Point2f pt, bool isvirtual, int firstEdge=0); + bool isvirtual() const; + bool isfree() const; + + int firstEdge; + int type; + Point2f pt; + }; + + struct CV_EXPORTS QuadEdge + { + QuadEdge(); + QuadEdge(int edgeidx); + bool isfree() const; + + int next[4]; + int pt[4]; + }; + + //! All of the vertices + std::vector vtx; + //! All of the edges + std::vector qedges; + int freeQEdge; + int freePoint; + bool validGeometry; + + int recentEdge; + //! Top left corner of the bounding rect + Point2f topLeft; + //! Bottom right corner of the bounding rect + Point2f bottomRight; +}; + +//! @} imgproc_subdiv2d + +//! @addtogroup imgproc_feature +//! @{ + +/** @example samples/cpp/lsd_lines.cpp +An example using the LineSegmentDetector +\image html building_lsd.png "Sample output image" width=434 height=300 +*/ + +/** @brief Line segment detector class + +following the algorithm described at @cite Rafael12 . + +@note Implementation has been removed from OpenCV version 3.4.6 to 3.4.15 and version 4.1.0 to 4.5.3 due original code license conflict. +restored again after [Computation of a NFA](https://github.com/rafael-grompone-von-gioi/binomial_nfa) code published under the MIT license. +*/ +class CV_EXPORTS_W LineSegmentDetector : public Algorithm +{ +public: + + /** @brief Finds lines in the input image. + + This is the output of the default parameters of the algorithm on the above shown image. + + ![image](pics/building_lsd.png) + + @param image A grayscale (CV_8UC1) input image. If only a roi needs to be selected, use: + `lsd_ptr-\>detect(image(roi), lines, ...); lines += Scalar(roi.x, roi.y, roi.x, roi.y);` + @param lines A vector of Vec4f elements specifying the beginning and ending point of a line. Where + Vec4f is (x1, y1, x2, y2), point 1 is the start, point 2 - end. Returned lines are strictly + oriented depending on the gradient. + @param width Vector of widths of the regions, where the lines are found. E.g. Width of line. + @param prec Vector of precisions with which the lines are found. + @param nfa Vector containing number of false alarms in the line region, with precision of 10%. The + bigger the value, logarithmically better the detection. + - -1 corresponds to 10 mean false alarms + - 0 corresponds to 1 mean false alarm + - 1 corresponds to 0.1 mean false alarms + This vector will be calculated only when the objects type is #LSD_REFINE_ADV. + */ + CV_WRAP virtual void detect(InputArray image, OutputArray lines, + OutputArray width = noArray(), OutputArray prec = noArray(), + OutputArray nfa = noArray()) = 0; + + /** @brief Draws the line segments on a given image. + @param image The image, where the lines will be drawn. Should be bigger or equal to the image, + where the lines were found. + @param lines A vector of the lines that needed to be drawn. + */ + CV_WRAP virtual void drawSegments(InputOutputArray image, InputArray lines) = 0; + + /** @brief Draws two groups of lines in blue and red, counting the non overlapping (mismatching) pixels. + + @param size The size of the image, where lines1 and lines2 were found. + @param lines1 The first group of lines that needs to be drawn. It is visualized in blue color. + @param lines2 The second group of lines. They visualized in red color. + @param image Optional image, where the lines will be drawn. The image should be color(3-channel) + in order for lines1 and lines2 to be drawn in the above mentioned colors. + */ + CV_WRAP virtual int compareSegments(const Size& size, InputArray lines1, InputArray lines2, InputOutputArray image = noArray()) = 0; + + virtual ~LineSegmentDetector() { } +}; + +/** @brief Creates a smart pointer to a LineSegmentDetector object and initializes it. + +The LineSegmentDetector algorithm is defined using the standard values. Only advanced users may want +to edit those, as to tailor it for their own application. + +@param refine The way found lines will be refined, see #LineSegmentDetectorModes +@param scale The scale of the image that will be used to find the lines. Range (0..1]. +@param sigma_scale Sigma for Gaussian filter. It is computed as sigma = sigma_scale/scale. +@param quant Bound to the quantization error on the gradient norm. +@param ang_th Gradient angle tolerance in degrees. +@param log_eps Detection threshold: -log10(NFA) \> log_eps. Used only when advance refinement is chosen. +@param density_th Minimal density of aligned region points in the enclosing rectangle. +@param n_bins Number of bins in pseudo-ordering of gradient modulus. + */ +CV_EXPORTS_W Ptr createLineSegmentDetector( + int refine = LSD_REFINE_STD, double scale = 0.8, + double sigma_scale = 0.6, double quant = 2.0, double ang_th = 22.5, + double log_eps = 0, double density_th = 0.7, int n_bins = 1024); + +//! @} imgproc_feature + +//! @addtogroup imgproc_filter +//! @{ + +/** @brief Returns Gaussian filter coefficients. + +The function computes and returns the \f$\texttt{ksize} \times 1\f$ matrix of Gaussian filter +coefficients: + +\f[G_i= \alpha *e^{-(i-( \texttt{ksize} -1)/2)^2/(2* \texttt{sigma}^2)},\f] + +where \f$i=0..\texttt{ksize}-1\f$ and \f$\alpha\f$ is the scale factor chosen so that \f$\sum_i G_i=1\f$. + +Two of such generated kernels can be passed to sepFilter2D. Those functions automatically recognize +smoothing kernels (a symmetrical kernel with sum of weights equal to 1) and handle them accordingly. +You may also use the higher-level GaussianBlur. +@param ksize Aperture size. It should be odd ( \f$\texttt{ksize} \mod 2 = 1\f$ ) and positive. +@param sigma Gaussian standard deviation. If it is non-positive, it is computed from ksize as +`sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. +@param ktype Type of filter coefficients. It can be CV_32F or CV_64F . +@sa sepFilter2D, getDerivKernels, getStructuringElement, GaussianBlur + */ +CV_EXPORTS_W Mat getGaussianKernel( int ksize, double sigma, int ktype = CV_64F ); + +/** @brief Returns filter coefficients for computing spatial image derivatives. + +The function computes and returns the filter coefficients for spatial image derivatives. When +`ksize=FILTER_SCHARR`, the Scharr \f$3 \times 3\f$ kernels are generated (see #Scharr). Otherwise, Sobel +kernels are generated (see #Sobel). The filters are normally passed to #sepFilter2D or to + +@param kx Output matrix of row filter coefficients. It has the type ktype . +@param ky Output matrix of column filter coefficients. It has the type ktype . +@param dx Derivative order in respect of x. +@param dy Derivative order in respect of y. +@param ksize Aperture size. It can be FILTER_SCHARR, 1, 3, 5, or 7. +@param normalize Flag indicating whether to normalize (scale down) the filter coefficients or not. +Theoretically, the coefficients should have the denominator \f$=2^{ksize*2-dx-dy-2}\f$. If you are +going to filter floating-point images, you are likely to use the normalized kernels. But if you +compute derivatives of an 8-bit image, store the results in a 16-bit image, and wish to preserve +all the fractional bits, you may want to set normalize=false . +@param ktype Type of filter coefficients. It can be CV_32f or CV_64F . + */ +CV_EXPORTS_W void getDerivKernels( OutputArray kx, OutputArray ky, + int dx, int dy, int ksize, + bool normalize = false, int ktype = CV_32F ); + +/** @brief Returns Gabor filter coefficients. + +For more details about gabor filter equations and parameters, see: [Gabor +Filter](http://en.wikipedia.org/wiki/Gabor_filter). + +@param ksize Size of the filter returned. +@param sigma Standard deviation of the gaussian envelope. +@param theta Orientation of the normal to the parallel stripes of a Gabor function. +@param lambd Wavelength of the sinusoidal factor. +@param gamma Spatial aspect ratio. +@param psi Phase offset. +@param ktype Type of filter coefficients. It can be CV_32F or CV_64F . + */ +CV_EXPORTS_W Mat getGaborKernel( Size ksize, double sigma, double theta, double lambd, + double gamma, double psi = CV_PI*0.5, int ktype = CV_64F ); + +//! returns "magic" border value for erosion and dilation. It is automatically transformed to Scalar::all(-DBL_MAX) for dilation. +static inline Scalar morphologyDefaultBorderValue() { return Scalar::all(DBL_MAX); } + +/** @brief Returns a structuring element of the specified size and shape for morphological operations. + +The function constructs and returns the structuring element that can be further passed to #erode, +#dilate or #morphologyEx. But you can also construct an arbitrary binary mask yourself and use it as +the structuring element. + +@param shape Element shape that could be one of #MorphShapes +@param ksize Size of the structuring element. +@param anchor Anchor position within the element. The default value \f$(-1, -1)\f$ means that the +anchor is at the center. Note that only the shape of a cross-shaped element depends on the anchor +position. In other cases the anchor just regulates how much the result of the morphological +operation is shifted. + */ +CV_EXPORTS_W Mat getStructuringElement(int shape, Size ksize, Point anchor = Point(-1,-1)); + +/** @example samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp +Sample code for simple filters +![Sample screenshot](Smoothing_Tutorial_Result_Median_Filter.jpg) +Check @ref tutorial_gausian_median_blur_bilateral_filter "the corresponding tutorial" for more details + */ + +/** @brief Blurs an image using the median filter. + +The function smoothes an image using the median filter with the \f$\texttt{ksize} \times +\texttt{ksize}\f$ aperture. Each channel of a multi-channel image is processed independently. +In-place operation is supported. + +@note The median filter uses #BORDER_REPLICATE internally to cope with border pixels, see #BorderTypes + +@param src input 1-, 3-, or 4-channel image; when ksize is 3 or 5, the image depth should be +CV_8U, CV_16U, or CV_32F, for larger aperture sizes, it can only be CV_8U. +@param dst destination array of the same size and type as src. +@param ksize aperture linear size; it must be odd and greater than 1, for example: 3, 5, 7 ... +@sa bilateralFilter, blur, boxFilter, GaussianBlur + */ +CV_EXPORTS_W void medianBlur( InputArray src, OutputArray dst, int ksize ); + +/** @brief Blurs an image using a Gaussian filter. + +The function convolves the source image with the specified Gaussian kernel. In-place filtering is +supported. + +@param src input image; the image can have any number of channels, which are processed +independently, but the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param ksize Gaussian kernel size. ksize.width and ksize.height can differ but they both must be +positive and odd. Or, they can be zero's and then they are computed from sigma. +@param sigmaX Gaussian kernel standard deviation in X direction. +@param sigmaY Gaussian kernel standard deviation in Y direction; if sigmaY is zero, it is set to be +equal to sigmaX, if both sigmas are zeros, they are computed from ksize.width and ksize.height, +respectively (see #getGaussianKernel for details); to fully control the result regardless of +possible future modifications of all this semantics, it is recommended to specify all of ksize, +sigmaX, and sigmaY. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. + +@sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur + */ +CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize, + double sigmaX, double sigmaY = 0, + int borderType = BORDER_DEFAULT ); + +/** @brief Applies the bilateral filter to an image. + +The function applies bilateral filtering to the input image, as described in +http://www.dai.ed.ac.uk/CVonline/LOCAL_COPIES/MANDUCHI1/Bilateral_Filtering.html +bilateralFilter can reduce unwanted noise very well while keeping edges fairly sharp. However, it is +very slow compared to most filters. + +_Sigma values_: For simplicity, you can set the 2 sigma values to be the same. If they are small (\< +10), the filter will not have much effect, whereas if they are large (\> 150), they will have a very +strong effect, making the image look "cartoonish". + +_Filter size_: Large filters (d \> 5) are very slow, so it is recommended to use d=5 for real-time +applications, and perhaps d=9 for offline applications that need heavy noise filtering. + +This filter does not work inplace. +@param src Source 8-bit or floating-point, 1-channel or 3-channel image. +@param dst Destination image of the same size and type as src . +@param d Diameter of each pixel neighborhood that is used during filtering. If it is non-positive, +it is computed from sigmaSpace. +@param sigmaColor Filter sigma in the color space. A larger value of the parameter means that +farther colors within the pixel neighborhood (see sigmaSpace) will be mixed together, resulting +in larger areas of semi-equal color. +@param sigmaSpace Filter sigma in the coordinate space. A larger value of the parameter means that +farther pixels will influence each other as long as their colors are close enough (see sigmaColor +). When d\>0, it specifies the neighborhood size regardless of sigmaSpace. Otherwise, d is +proportional to sigmaSpace. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes + */ +CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d, + double sigmaColor, double sigmaSpace, + int borderType = BORDER_DEFAULT ); + +/** @brief Blurs an image using the box filter. + +The function smooths an image using the kernel: + +\f[\texttt{K} = \alpha \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \end{bmatrix}\f] + +where + +\f[\alpha = \begin{cases} \frac{1}{\texttt{ksize.width*ksize.height}} & \texttt{when } \texttt{normalize=true} \\1 & \texttt{otherwise}\end{cases}\f] + +Unnormalized box filter is useful for computing various integral characteristics over each pixel +neighborhood, such as covariance matrices of image derivatives (used in dense optical flow +algorithms, and so on). If you need to compute pixel sums over variable-size windows, use #integral. + +@param src input image. +@param dst output image of the same size and type as src. +@param ddepth the output image depth (-1 to use src.depth()). +@param ksize blurring kernel size. +@param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel +center. +@param normalize flag, specifying whether the kernel is normalized by its area or not. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. +@sa blur, bilateralFilter, GaussianBlur, medianBlur, integral + */ +CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth, + Size ksize, Point anchor = Point(-1,-1), + bool normalize = true, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates the normalized sum of squares of the pixel values overlapping the filter. + +For every pixel \f$ (x, y) \f$ in the source image, the function calculates the sum of squares of those neighboring +pixel values which overlap the filter placed over the pixel \f$ (x, y) \f$. + +The unnormalized square box filter can be useful in computing local image statistics such as the local +variance and standard deviation around the neighborhood of a pixel. + +@param src input image +@param dst output image of the same size and type as src +@param ddepth the output image depth (-1 to use src.depth()) +@param ksize kernel size +@param anchor kernel anchor point. The default value of Point(-1, -1) denotes that the anchor is at the kernel +center. +@param normalize flag, specifying whether the kernel is to be normalized by it's area or not. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. +@sa boxFilter +*/ +CV_EXPORTS_W void sqrBoxFilter( InputArray src, OutputArray dst, int ddepth, + Size ksize, Point anchor = Point(-1, -1), + bool normalize = true, + int borderType = BORDER_DEFAULT ); + +/** @brief Blurs an image using the normalized box filter. + +The function smooths an image using the kernel: + +\f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f] + +The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), ksize, +anchor, true, borderType)`. + +@param src input image; it can have any number of channels, which are processed independently, but +the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param ksize blurring kernel size. +@param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel +center. +@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported. +@sa boxFilter, bilateralFilter, GaussianBlur, medianBlur + */ +CV_EXPORTS_W void blur( InputArray src, OutputArray dst, + Size ksize, Point anchor = Point(-1,-1), + int borderType = BORDER_DEFAULT ); + +/** @brief Convolves an image with the kernel. + +The function applies an arbitrary linear filter to an image. In-place operation is supported. When +the aperture is partially outside the image, the function interpolates outlier pixel values +according to the specified border mode. + +The function does actually compute correlation, not the convolution: + +\f[\texttt{dst} (x,y) = \sum _{ \substack{0\leq x' < \texttt{kernel.cols}\\{0\leq y' < \texttt{kernel.rows}}}} \texttt{kernel} (x',y')* \texttt{src} (x+x'- \texttt{anchor.x} ,y+y'- \texttt{anchor.y} )\f] + +That is, the kernel is not mirrored around the anchor point. If you need a real convolution, flip +the kernel using #flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows - +anchor.y - 1)`. + +The function uses the DFT-based algorithm in case of sufficiently large kernels (~`11 x 11` or +larger) and the direct algorithm for small kernels. + +@param src input image. +@param dst output image of the same size and the same number of channels as src. +@param ddepth desired depth of the destination image, see @ref filter_depths "combinations" +@param kernel convolution kernel (or rather a correlation kernel), a single-channel floating point +matrix; if you want to apply different kernels to different channels, split the image into +separate color planes using split and process them individually. +@param anchor anchor of the kernel that indicates the relative position of a filtered point within +the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor +is at the kernel center. +@param delta optional value added to the filtered pixels before storing them in dst. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa sepFilter2D, dft, matchTemplate + */ +CV_EXPORTS_W void filter2D( InputArray src, OutputArray dst, int ddepth, + InputArray kernel, Point anchor = Point(-1,-1), + double delta = 0, int borderType = BORDER_DEFAULT ); + +/** @brief Applies a separable linear filter to an image. + +The function applies a separable linear filter to the image. That is, first, every row of src is +filtered with the 1D kernel kernelX. Then, every column of the result is filtered with the 1D +kernel kernelY. The final result shifted by delta is stored in dst . + +@param src Source image. +@param dst Destination image of the same size and the same number of channels as src . +@param ddepth Destination image depth, see @ref filter_depths "combinations" +@param kernelX Coefficients for filtering each row. +@param kernelY Coefficients for filtering each column. +@param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor +is at the kernel center. +@param delta Value added to the filtered results before storing them. +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa filter2D, Sobel, GaussianBlur, boxFilter, blur + */ +CV_EXPORTS_W void sepFilter2D( InputArray src, OutputArray dst, int ddepth, + InputArray kernelX, InputArray kernelY, + Point anchor = Point(-1,-1), + double delta = 0, int borderType = BORDER_DEFAULT ); + +/** @example samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp +Sample code using Sobel and/or Scharr OpenCV functions to make a simple Edge Detector +![Sample screenshot](Sobel_Derivatives_Tutorial_Result.jpg) +Check @ref tutorial_sobel_derivatives "the corresponding tutorial" for more details +*/ + +/** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator. + +In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to +calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or \f$1 \times 3\f$ +kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first +or the second x- or y- derivatives. + +There is also the special value `ksize = #FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr +filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is + +\f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f] + +for the x-derivative, or transposed for the y-derivative. + +The function calculates an image derivative by convolving the image with the appropriate kernel: + +\f[\texttt{dst} = \frac{\partial^{xorder+yorder} \texttt{src}}{\partial x^{xorder} \partial y^{yorder}}\f] + +The Sobel operators combine Gaussian smoothing and differentiation, so the result is more or less +resistant to the noise. Most often, the function is called with ( xorder = 1, yorder = 0, ksize = 3) +or ( xorder = 0, yorder = 1, ksize = 3) to calculate the first x- or y- image derivative. The first +case corresponds to a kernel of: + +\f[\vecthreethree{-1}{0}{1}{-2}{0}{2}{-1}{0}{1}\f] + +The second case corresponds to a kernel of: + +\f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f] + +@param src input image. +@param dst output image of the same size and the same number of channels as src . +@param ddepth output image depth, see @ref filter_depths "combinations"; in the case of + 8-bit input images it will result in truncated derivatives. +@param dx order of the derivative x. +@param dy order of the derivative y. +@param ksize size of the extended Sobel kernel; it must be 1, 3, 5, or 7. +@param scale optional scale factor for the computed derivative values; by default, no scaling is +applied (see #getDerivKernels for details). +@param delta optional delta value that is added to the results prior to storing them in dst. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa Scharr, Laplacian, sepFilter2D, filter2D, GaussianBlur, cartToPolar + */ +CV_EXPORTS_W void Sobel( InputArray src, OutputArray dst, int ddepth, + int dx, int dy, int ksize = 3, + double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates the first order image derivative in both x and y using a Sobel operator + +Equivalent to calling: + +@code +Sobel( src, dx, CV_16SC1, 1, 0, 3 ); +Sobel( src, dy, CV_16SC1, 0, 1, 3 ); +@endcode + +@param src input image. +@param dx output image with first-order derivative in x. +@param dy output image with first-order derivative in y. +@param ksize size of Sobel kernel. It must be 3. +@param borderType pixel extrapolation method, see #BorderTypes. + Only #BORDER_DEFAULT=#BORDER_REFLECT_101 and #BORDER_REPLICATE are supported. + +@sa Sobel + */ + +CV_EXPORTS_W void spatialGradient( InputArray src, OutputArray dx, + OutputArray dy, int ksize = 3, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates the first x- or y- image derivative using Scharr operator. + +The function computes the first x- or y- spatial image derivative using the Scharr operator. The +call + +\f[\texttt{Scharr(src, dst, ddepth, dx, dy, scale, delta, borderType)}\f] + +is equivalent to + +\f[\texttt{Sobel(src, dst, ddepth, dx, dy, FILTER_SCHARR, scale, delta, borderType)} .\f] + +@param src input image. +@param dst output image of the same size and the same number of channels as src. +@param ddepth output image depth, see @ref filter_depths "combinations" +@param dx order of the derivative x. +@param dy order of the derivative y. +@param scale optional scale factor for the computed derivative values; by default, no scaling is +applied (see #getDerivKernels for details). +@param delta optional delta value that is added to the results prior to storing them in dst. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa cartToPolar + */ +CV_EXPORTS_W void Scharr( InputArray src, OutputArray dst, int ddepth, + int dx, int dy, double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT ); + +/** @example samples/cpp/laplace.cpp +An example using Laplace transformations for edge detection +*/ + +/** @brief Calculates the Laplacian of an image. + +The function calculates the Laplacian of the source image by adding up the second x and y +derivatives calculated using the Sobel operator: + +\f[\texttt{dst} = \Delta \texttt{src} = \frac{\partial^2 \texttt{src}}{\partial x^2} + \frac{\partial^2 \texttt{src}}{\partial y^2}\f] + +This is done when `ksize > 1`. When `ksize == 1`, the Laplacian is computed by filtering the image +with the following \f$3 \times 3\f$ aperture: + +\f[\vecthreethree {0}{1}{0}{1}{-4}{1}{0}{1}{0}\f] + +@param src Source image. +@param dst Destination image of the same size and the same number of channels as src . +@param ddepth Desired depth of the destination image. +@param ksize Aperture size used to compute the second-derivative filters. See #getDerivKernels for +details. The size must be positive and odd. +@param scale Optional scale factor for the computed Laplacian values. By default, no scaling is +applied. See #getDerivKernels for details. +@param delta Optional delta value that is added to the results prior to storing them in dst . +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@sa Sobel, Scharr + */ +CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth, + int ksize = 1, double scale = 1, double delta = 0, + int borderType = BORDER_DEFAULT ); + +//! @} imgproc_filter + +//! @addtogroup imgproc_feature +//! @{ + +/** @example samples/cpp/edge.cpp +This program demonstrates usage of the Canny edge detector + +Check @ref tutorial_canny_detector "the corresponding tutorial" for more details +*/ + +/** @brief Finds edges in an image using the Canny algorithm @cite Canny86 . + +The function finds edges in the input image and marks them in the output map edges using the +Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The +largest value is used to find initial segments of strong edges. See + + +@param image 8-bit input image. +@param edges output edge map; single channels 8-bit image, which has the same size as image . +@param threshold1 first threshold for the hysteresis procedure. +@param threshold2 second threshold for the hysteresis procedure. +@param apertureSize aperture size for the Sobel operator. +@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm +\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude ( +L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( +L2gradient=false ). + */ +CV_EXPORTS_W void Canny( InputArray image, OutputArray edges, + double threshold1, double threshold2, + int apertureSize = 3, bool L2gradient = false ); + +/** \overload + +Finds edges in an image using the Canny algorithm with custom image gradient. + +@param dx 16-bit x derivative of input image (CV_16SC1 or CV_16SC3). +@param dy 16-bit y derivative of input image (same type as dx). +@param edges output edge map; single channels 8-bit image, which has the same size as image . +@param threshold1 first threshold for the hysteresis procedure. +@param threshold2 second threshold for the hysteresis procedure. +@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm +\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude ( +L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( +L2gradient=false ). + */ +CV_EXPORTS_W void Canny( InputArray dx, InputArray dy, + OutputArray edges, + double threshold1, double threshold2, + bool L2gradient = false ); + +/** @brief Calculates the minimal eigenvalue of gradient matrices for corner detection. + +The function is similar to cornerEigenValsAndVecs but it calculates and stores only the minimal +eigenvalue of the covariance matrix of derivatives, that is, \f$\min(\lambda_1, \lambda_2)\f$ in terms +of the formulae in the cornerEigenValsAndVecs description. + +@param src Input single-channel 8-bit or floating-point image. +@param dst Image to store the minimal eigenvalues. It has the type CV_32FC1 and the same size as +src . +@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ). +@param ksize Aperture parameter for the Sobel operator. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + */ +CV_EXPORTS_W void cornerMinEigenVal( InputArray src, OutputArray dst, + int blockSize, int ksize = 3, + int borderType = BORDER_DEFAULT ); + +/** @brief Harris corner detector. + +The function runs the Harris corner detector on the image. Similarly to cornerMinEigenVal and +cornerEigenValsAndVecs , for each pixel \f$(x, y)\f$ it calculates a \f$2\times2\f$ gradient covariance +matrix \f$M^{(x,y)}\f$ over a \f$\texttt{blockSize} \times \texttt{blockSize}\f$ neighborhood. Then, it +computes the following characteristic: + +\f[\texttt{dst} (x,y) = \mathrm{det} M^{(x,y)} - k \cdot \left ( \mathrm{tr} M^{(x,y)} \right )^2\f] + +Corners in the image can be found as the local maxima of this response map. + +@param src Input single-channel 8-bit or floating-point image. +@param dst Image to store the Harris detector responses. It has the type CV_32FC1 and the same +size as src . +@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ). +@param ksize Aperture parameter for the Sobel operator. +@param k Harris detector free parameter. See the formula above. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + */ +CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize, + int ksize, double k, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates eigenvalues and eigenvectors of image blocks for corner detection. + +For every pixel \f$p\f$ , the function cornerEigenValsAndVecs considers a blockSize \f$\times\f$ blockSize +neighborhood \f$S(p)\f$ . It calculates the covariation matrix of derivatives over the neighborhood as: + +\f[M = \begin{bmatrix} \sum _{S(p)}(dI/dx)^2 & \sum _{S(p)}dI/dx dI/dy \\ \sum _{S(p)}dI/dx dI/dy & \sum _{S(p)}(dI/dy)^2 \end{bmatrix}\f] + +where the derivatives are computed using the Sobel operator. + +After that, it finds eigenvectors and eigenvalues of \f$M\f$ and stores them in the destination image as +\f$(\lambda_1, \lambda_2, x_1, y_1, x_2, y_2)\f$ where + +- \f$\lambda_1, \lambda_2\f$ are the non-sorted eigenvalues of \f$M\f$ +- \f$x_1, y_1\f$ are the eigenvectors corresponding to \f$\lambda_1\f$ +- \f$x_2, y_2\f$ are the eigenvectors corresponding to \f$\lambda_2\f$ + +The output of the function can be used for robust edge or corner detection. + +@param src Input single-channel 8-bit or floating-point image. +@param dst Image to store the results. It has the same size as src and the type CV_32FC(6) . +@param blockSize Neighborhood size (see details below). +@param ksize Aperture parameter for the Sobel operator. +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + +@sa cornerMinEigenVal, cornerHarris, preCornerDetect + */ +CV_EXPORTS_W void cornerEigenValsAndVecs( InputArray src, OutputArray dst, + int blockSize, int ksize, + int borderType = BORDER_DEFAULT ); + +/** @brief Calculates a feature map for corner detection. + +The function calculates the complex spatial derivative-based function of the source image + +\f[\texttt{dst} = (D_x \texttt{src} )^2 \cdot D_{yy} \texttt{src} + (D_y \texttt{src} )^2 \cdot D_{xx} \texttt{src} - 2 D_x \texttt{src} \cdot D_y \texttt{src} \cdot D_{xy} \texttt{src}\f] + +where \f$D_x\f$,\f$D_y\f$ are the first image derivatives, \f$D_{xx}\f$,\f$D_{yy}\f$ are the second image +derivatives, and \f$D_{xy}\f$ is the mixed derivative. + +The corners can be found as local maximums of the functions, as shown below: +@code + Mat corners, dilated_corners; + preCornerDetect(image, corners, 3); + // dilation with 3x3 rectangular structuring element + dilate(corners, dilated_corners, Mat(), 1); + Mat corner_mask = corners == dilated_corners; +@endcode + +@param src Source single-channel 8-bit of floating-point image. +@param dst Output image that has the type CV_32F and the same size as src . +@param ksize %Aperture size of the Sobel . +@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported. + */ +CV_EXPORTS_W void preCornerDetect( InputArray src, OutputArray dst, int ksize, + int borderType = BORDER_DEFAULT ); + +/** @brief Refines the corner locations. + +The function iterates to find the sub-pixel accurate location of corners or radial saddle +points as described in @cite forstner1987fast, and as shown on the figure below. + +![image](pics/cornersubpix.png) + +Sub-pixel accurate corner locator is based on the observation that every vector from the center \f$q\f$ +to a point \f$p\f$ located within a neighborhood of \f$q\f$ is orthogonal to the image gradient at \f$p\f$ +subject to image and measurement noise. Consider the expression: + +\f[\epsilon _i = {DI_{p_i}}^T \cdot (q - p_i)\f] + +where \f${DI_{p_i}}\f$ is an image gradient at one of the points \f$p_i\f$ in a neighborhood of \f$q\f$ . The +value of \f$q\f$ is to be found so that \f$\epsilon_i\f$ is minimized. A system of equations may be set up +with \f$\epsilon_i\f$ set to zero: + +\f[\sum _i(DI_{p_i} \cdot {DI_{p_i}}^T) \cdot q - \sum _i(DI_{p_i} \cdot {DI_{p_i}}^T \cdot p_i)\f] + +where the gradients are summed within a neighborhood ("search window") of \f$q\f$ . Calling the first +gradient term \f$G\f$ and the second gradient term \f$b\f$ gives: + +\f[q = G^{-1} \cdot b\f] + +The algorithm sets the center of the neighborhood window at this new center \f$q\f$ and then iterates +until the center stays within a set threshold. + +@param image Input single-channel, 8-bit or float image. +@param corners Initial coordinates of the input corners and refined coordinates provided for +output. +@param winSize Half of the side length of the search window. For example, if winSize=Size(5,5) , +then a \f$(5*2+1) \times (5*2+1) = 11 \times 11\f$ search window is used. +@param zeroZone Half of the size of the dead region in the middle of the search zone over which +the summation in the formula below is not done. It is used sometimes to avoid possible +singularities of the autocorrelation matrix. The value of (-1,-1) indicates that there is no such +a size. +@param criteria Criteria for termination of the iterative process of corner refinement. That is, +the process of corner position refinement stops either after criteria.maxCount iterations or when +the corner position moves by less than criteria.epsilon on some iteration. + */ +CV_EXPORTS_W void cornerSubPix( InputArray image, InputOutputArray corners, + Size winSize, Size zeroZone, + TermCriteria criteria ); + +/** @brief Determines strong corners on an image. + +The function finds the most prominent corners in the image or in the specified image region, as +described in @cite Shi94 + +- Function calculates the corner quality measure at every source image pixel using the + #cornerMinEigenVal or #cornerHarris . +- Function performs a non-maximum suppression (the local maximums in *3 x 3* neighborhood are + retained). +- The corners with the minimal eigenvalue less than + \f$\texttt{qualityLevel} \cdot \max_{x,y} qualityMeasureMap(x,y)\f$ are rejected. +- The remaining corners are sorted by the quality measure in the descending order. +- Function throws away each corner for which there is a stronger corner at a distance less than + maxDistance. + +The function can be used to initialize a point-based tracker of an object. + +@note If the function is called with different values A and B of the parameter qualityLevel , and +A \> B, the vector of returned corners with qualityLevel=A will be the prefix of the output vector +with qualityLevel=B . + +@param image Input 8-bit or floating-point 32-bit, single-channel image. +@param corners Output vector of detected corners. +@param maxCorners Maximum number of corners to return. If there are more corners than are found, +the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set +and all detected corners are returned. +@param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The +parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue +(see #cornerMinEigenVal ) or the Harris function response (see #cornerHarris ). The corners with the +quality measure less than the product are rejected. For example, if the best corner has the +quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure +less than 15 are rejected. +@param minDistance Minimum possible Euclidean distance between the returned corners. +@param mask Optional region of interest. If the image is not empty (it needs to have the type +CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected. +@param blockSize Size of an average block for computing a derivative covariation matrix over each +pixel neighborhood. See cornerEigenValsAndVecs . +@param useHarrisDetector Parameter indicating whether to use a Harris detector (see #cornerHarris) +or #cornerMinEigenVal. +@param k Free parameter of the Harris detector. + +@sa cornerMinEigenVal, cornerHarris, calcOpticalFlowPyrLK, estimateRigidTransform, + */ + +CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners, + int maxCorners, double qualityLevel, double minDistance, + InputArray mask = noArray(), int blockSize = 3, + bool useHarrisDetector = false, double k = 0.04 ); + +CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners, + int maxCorners, double qualityLevel, double minDistance, + InputArray mask, int blockSize, + int gradientSize, bool useHarrisDetector = false, + double k = 0.04 ); + +/** @brief Same as above, but returns also quality measure of the detected corners. + +@param image Input 8-bit or floating-point 32-bit, single-channel image. +@param corners Output vector of detected corners. +@param maxCorners Maximum number of corners to return. If there are more corners than are found, +the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set +and all detected corners are returned. +@param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The +parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue +(see #cornerMinEigenVal ) or the Harris function response (see #cornerHarris ). The corners with the +quality measure less than the product are rejected. For example, if the best corner has the +quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure +less than 15 are rejected. +@param minDistance Minimum possible Euclidean distance between the returned corners. +@param mask Region of interest. If the image is not empty (it needs to have the type +CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected. +@param cornersQuality Output vector of quality measure of the detected corners. +@param blockSize Size of an average block for computing a derivative covariation matrix over each +pixel neighborhood. See cornerEigenValsAndVecs . +@param gradientSize Aperture parameter for the Sobel operator used for derivatives computation. +See cornerEigenValsAndVecs . +@param useHarrisDetector Parameter indicating whether to use a Harris detector (see #cornerHarris) +or #cornerMinEigenVal. +@param k Free parameter of the Harris detector. + */ +CV_EXPORTS CV_WRAP_AS(goodFeaturesToTrackWithQuality) void goodFeaturesToTrack( + InputArray image, OutputArray corners, + int maxCorners, double qualityLevel, double minDistance, + InputArray mask, OutputArray cornersQuality, int blockSize = 3, + int gradientSize = 3, bool useHarrisDetector = false, double k = 0.04); + +/** @example samples/cpp/tutorial_code/ImgTrans/houghlines.cpp +An example using the Hough line detector +![Sample input image](Hough_Lines_Tutorial_Original_Image.jpg) ![Output image](Hough_Lines_Tutorial_Result.jpg) +*/ + +/** @brief Finds lines in a binary image using the standard Hough transform. + +The function implements the standard or standard multi-scale Hough transform algorithm for line +detection. See for a good explanation of Hough +transform. + +@param image 8-bit, single-channel binary source image. The image may be modified by the function. +@param lines Output vector of lines. Each line is represented by a 2 or 3 element vector +\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \textrm{votes})\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of +the image). \f$\theta\f$ is the line rotation angle in radians ( +\f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ). +\f$\textrm{votes}\f$ is the value of accumulator. +@param rho Distance resolution of the accumulator in pixels. +@param theta Angle resolution of the accumulator in radians. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ). +@param srn For the multi-scale Hough transform, it is a divisor for the distance resolution rho . +The coarse accumulator distance resolution is rho and the accurate accumulator resolution is +rho/srn . If both srn=0 and stn=0 , the classical Hough transform is used. Otherwise, both these +parameters should be positive. +@param stn For the multi-scale Hough transform, it is a divisor for the distance resolution theta. +@param min_theta For standard and multi-scale Hough transform, minimum angle to check for lines. +Must fall between 0 and max_theta. +@param max_theta For standard and multi-scale Hough transform, maximum angle to check for lines. +Must fall between min_theta and CV_PI. + */ +CV_EXPORTS_W void HoughLines( InputArray image, OutputArray lines, + double rho, double theta, int threshold, + double srn = 0, double stn = 0, + double min_theta = 0, double max_theta = CV_PI ); + +/** @brief Finds line segments in a binary image using the probabilistic Hough transform. + +The function implements the probabilistic Hough transform algorithm for line detection, described +in @cite Matas00 + +See the line detection example below: +@include snippets/imgproc_HoughLinesP.cpp +This is a sample picture the function parameters have been tuned for: + +![image](pics/building.jpg) + +And this is the output of the above program in case of the probabilistic Hough transform: + +![image](pics/houghp.png) + +@param image 8-bit, single-channel binary source image. The image may be modified by the function. +@param lines Output vector of lines. Each line is represented by a 4-element vector +\f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected +line segment. +@param rho Distance resolution of the accumulator in pixels. +@param theta Angle resolution of the accumulator in radians. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ). +@param minLineLength Minimum line length. Line segments shorter than that are rejected. +@param maxLineGap Maximum allowed gap between points on the same line to link them. + +@sa LineSegmentDetector + */ +CV_EXPORTS_W void HoughLinesP( InputArray image, OutputArray lines, + double rho, double theta, int threshold, + double minLineLength = 0, double maxLineGap = 0 ); + +/** @brief Finds lines in a set of points using the standard Hough transform. + +The function finds lines in a set of points using a modification of the Hough transform. +@include snippets/imgproc_HoughLinesPointSet.cpp +@param point Input vector of points. Each vector must be encoded as a Point vector \f$(x,y)\f$. Type must be CV_32FC2 or CV_32SC2. +@param lines Output vector of found lines. Each vector is encoded as a vector \f$(votes, rho, theta)\f$. +The larger the value of 'votes', the higher the reliability of the Hough line. +@param lines_max Max count of Hough lines. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ). +@param min_rho Minimum value for \f$\rho\f$ for the accumulator (Note: \f$\rho\f$ can be negative. The absolute value \f$|\rho|\f$ is the distance of a line to the origin.). +@param max_rho Maximum value for \f$\rho\f$ for the accumulator. +@param rho_step Distance resolution of the accumulator. +@param min_theta Minimum angle value of the accumulator in radians. +@param max_theta Maximum angle value of the accumulator in radians. +@param theta_step Angle resolution of the accumulator in radians. + */ +CV_EXPORTS_W void HoughLinesPointSet( InputArray point, OutputArray lines, int lines_max, int threshold, + double min_rho, double max_rho, double rho_step, + double min_theta, double max_theta, double theta_step ); + +/** @example samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp +An example using the Hough circle detector +*/ + +/** @brief Finds circles in a grayscale image using the Hough transform. + +The function finds circles in a grayscale image using a modification of the Hough transform. + +Example: : +@include snippets/imgproc_HoughLinesCircles.cpp + +@note Usually the function detects the centers of circles well. However, it may fail to find correct +radii. You can assist to the function by specifying the radius range ( minRadius and maxRadius ) if +you know it. Or, in the case of #HOUGH_GRADIENT method you may set maxRadius to a negative number +to return centers only without radius search, and find the correct radius using an additional procedure. + +It also helps to smooth image a bit unless it's already soft. For example, +GaussianBlur() with 7x7 kernel and 1.5x1.5 sigma or similar blurring may help. + +@param image 8-bit, single-channel, grayscale input image. +@param circles Output vector of found circles. Each vector is encoded as 3 or 4 element +floating-point vector \f$(x, y, radius)\f$ or \f$(x, y, radius, votes)\f$ . +@param method Detection method, see #HoughModes. The available methods are #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT. +@param dp Inverse ratio of the accumulator resolution to the image resolution. For example, if +dp=1 , the accumulator has the same resolution as the input image. If dp=2 , the accumulator has +half as big width and height. For #HOUGH_GRADIENT_ALT the recommended value is dp=1.5, +unless some small very circles need to be detected. +@param minDist Minimum distance between the centers of the detected circles. If the parameter is +too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is +too large, some circles may be missed. +@param param1 First method-specific parameter. In case of #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT, +it is the higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller). +Note that #HOUGH_GRADIENT_ALT uses #Scharr algorithm to compute image derivatives, so the threshold value +shough normally be higher, such as 300 or normally exposed and contrasty images. +@param param2 Second method-specific parameter. In case of #HOUGH_GRADIENT, it is the +accumulator threshold for the circle centers at the detection stage. The smaller it is, the more +false circles may be detected. Circles, corresponding to the larger accumulator values, will be +returned first. In the case of #HOUGH_GRADIENT_ALT algorithm, this is the circle "perfectness" measure. +The closer it to 1, the better shaped circles algorithm selects. In most cases 0.9 should be fine. +If you want get better detection of small circles, you may decrease it to 0.85, 0.8 or even less. +But then also try to limit the search range [minRadius, maxRadius] to avoid many false circles. +@param minRadius Minimum circle radius. +@param maxRadius Maximum circle radius. If <= 0, uses the maximum image dimension. If < 0, #HOUGH_GRADIENT returns +centers without finding the radius. #HOUGH_GRADIENT_ALT always computes circle radiuses. + +@sa fitEllipse, minEnclosingCircle + */ +CV_EXPORTS_W void HoughCircles( InputArray image, OutputArray circles, + int method, double dp, double minDist, + double param1 = 100, double param2 = 100, + int minRadius = 0, int maxRadius = 0 ); + +//! @} imgproc_feature + +//! @addtogroup imgproc_filter +//! @{ + +/** @example samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp +Advanced morphology Transformations sample code +![Sample screenshot](Morphology_2_Tutorial_Result.jpg) +Check @ref tutorial_opening_closing_hats "the corresponding tutorial" for more details +*/ + +/** @brief Erodes an image by using a specific structuring element. + +The function erodes the source image using the specified structuring element that determines the +shape of a pixel neighborhood over which the minimum is taken: + +\f[\texttt{dst} (x,y) = \min _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f] + +The function supports the in-place mode. Erosion can be applied several ( iterations ) times. In +case of multi-channel images, each channel is processed independently. + +@param src input image; the number of channels can be arbitrary, but the depth should be one of +CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param kernel structuring element used for erosion; if `element=Mat()`, a `3 x 3` rectangular +structuring element is used. Kernel can be created using #getStructuringElement. +@param anchor position of the anchor within the element; default value (-1, -1) means that the +anchor is at the element center. +@param iterations number of times erosion is applied. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@param borderValue border value in case of a constant border +@sa dilate, morphologyEx, getStructuringElement + */ +CV_EXPORTS_W void erode( InputArray src, OutputArray dst, InputArray kernel, + Point anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue() ); + +/** @example samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp +Erosion and Dilation sample code +![Sample Screenshot-Erosion](Morphology_1_Tutorial_Erosion_Result.jpg)![Sample Screenshot-Dilation](Morphology_1_Tutorial_Dilation_Result.jpg) +Check @ref tutorial_erosion_dilatation "the corresponding tutorial" for more details +*/ + +/** @brief Dilates an image by using a specific structuring element. + +The function dilates the source image using the specified structuring element that determines the +shape of a pixel neighborhood over which the maximum is taken: +\f[\texttt{dst} (x,y) = \max _{(x',y'): \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f] + +The function supports the in-place mode. Dilation can be applied several ( iterations ) times. In +case of multi-channel images, each channel is processed independently. + +@param src input image; the number of channels can be arbitrary, but the depth should be one of +CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst output image of the same size and type as src. +@param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular +structuring element is used. Kernel can be created using #getStructuringElement +@param anchor position of the anchor within the element; default value (-1, -1) means that the +anchor is at the element center. +@param iterations number of times dilation is applied. +@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not suported. +@param borderValue border value in case of a constant border +@sa erode, morphologyEx, getStructuringElement + */ +CV_EXPORTS_W void dilate( InputArray src, OutputArray dst, InputArray kernel, + Point anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue() ); + +/** @brief Performs advanced morphological transformations. + +The function cv::morphologyEx can perform advanced morphological transformations using an erosion and dilation as +basic operations. + +Any of the operations can be done in-place. In case of multi-channel images, each channel is +processed independently. + +@param src Source image. The number of channels can be arbitrary. The depth should be one of +CV_8U, CV_16U, CV_16S, CV_32F or CV_64F. +@param dst Destination image of the same size and type as source image. +@param op Type of a morphological operation, see #MorphTypes +@param kernel Structuring element. It can be created using #getStructuringElement. +@param anchor Anchor position with the kernel. Negative values mean that the anchor is at the +kernel center. +@param iterations Number of times erosion and dilation are applied. +@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@param borderValue Border value in case of a constant border. The default value has a special +meaning. +@sa dilate, erode, getStructuringElement +@note The number of iterations is the number of times erosion or dilatation operation will be applied. +For instance, an opening operation (#MORPH_OPEN) with two iterations is equivalent to apply +successively: erode -> erode -> dilate -> dilate (and not erode -> dilate -> erode -> dilate). + */ +CV_EXPORTS_W void morphologyEx( InputArray src, OutputArray dst, + int op, InputArray kernel, + Point anchor = Point(-1,-1), int iterations = 1, + int borderType = BORDER_CONSTANT, + const Scalar& borderValue = morphologyDefaultBorderValue() ); + +//! @} imgproc_filter + +//! @addtogroup imgproc_transform +//! @{ + +/** @brief Resizes an image. + +The function resize resizes the image src down to or up to the specified size. Note that the +initial dst type or size are not taken into account. Instead, the size and type are derived from +the `src`,`dsize`,`fx`, and `fy`. If you want to resize src so that it fits the pre-created dst, +you may call the function as follows: +@code + // explicitly specify dsize=dst.size(); fx and fy will be computed from that. + resize(src, dst, dst.size(), 0, 0, interpolation); +@endcode +If you want to decimate the image by factor of 2 in each direction, you can call the function this +way: +@code + // specify fx and fy and let the function compute the destination image size. + resize(src, dst, Size(), 0.5, 0.5, interpolation); +@endcode +To shrink an image, it will generally look best with #INTER_AREA interpolation, whereas to +enlarge an image, it will generally look best with #INTER_CUBIC (slow) or #INTER_LINEAR +(faster but still looks OK). + +@param src input image. +@param dst output image; it has the size dsize (when it is non-zero) or the size computed from +src.size(), fx, and fy; the type of dst is the same as of src. +@param dsize output image size; if it equals zero (`None` in Python), it is computed as: + \f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] + Either dsize or both fx and fy must be non-zero. +@param fx scale factor along the horizontal axis; when it equals 0, it is computed as +\f[\texttt{(double)dsize.width/src.cols}\f] +@param fy scale factor along the vertical axis; when it equals 0, it is computed as +\f[\texttt{(double)dsize.height/src.rows}\f] +@param interpolation interpolation method, see #InterpolationFlags + +@sa warpAffine, warpPerspective, remap + */ +CV_EXPORTS_W void resize( InputArray src, OutputArray dst, + Size dsize, double fx = 0, double fy = 0, + int interpolation = INTER_LINEAR ); + +/** @brief Applies an affine transformation to an image. + +The function warpAffine transforms the source image using the specified matrix: + +\f[\texttt{dst} (x,y) = \texttt{src} ( \texttt{M} _{11} x + \texttt{M} _{12} y + \texttt{M} _{13}, \texttt{M} _{21} x + \texttt{M} _{22} y + \texttt{M} _{23})\f] + +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted +with #invertAffineTransform and then put in the formula above instead of M. The function cannot +operate in-place. + +@param src input image. +@param dst output image that has the size dsize and the same type as src . +@param M \f$2\times 3\f$ transformation matrix. +@param dsize size of the output image. +@param flags combination of interpolation methods (see #InterpolationFlags) and the optional +flag #WARP_INVERSE_MAP that means that M is the inverse transformation ( +\f$\texttt{dst}\rightarrow\texttt{src}\f$ ). +@param borderMode pixel extrapolation method (see #BorderTypes); when +borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to +the "outliers" in the source image are not modified by the function. +@param borderValue value used in case of a constant border; by default, it is 0. + +@sa warpPerspective, resize, remap, getRectSubPix, transform + */ +CV_EXPORTS_W void warpAffine( InputArray src, OutputArray dst, + InputArray M, Size dsize, + int flags = INTER_LINEAR, + int borderMode = BORDER_CONSTANT, + const Scalar& borderValue = Scalar()); + +/** @example samples/cpp/warpPerspective_demo.cpp +An example program shows using cv::getPerspectiveTransform and cv::warpPerspective for image warping +*/ + +/** @brief Applies a perspective transformation to an image. + +The function warpPerspective transforms the source image using the specified matrix: + +\f[\texttt{dst} (x,y) = \texttt{src} \left ( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} , + \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right )\f] + +when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert +and then put in the formula above instead of M. The function cannot operate in-place. + +@param src input image. +@param dst output image that has the size dsize and the same type as src . +@param M \f$3\times 3\f$ transformation matrix. +@param dsize size of the output image. +@param flags combination of interpolation methods (#INTER_LINEAR or #INTER_NEAREST) and the +optional flag #WARP_INVERSE_MAP, that sets M as the inverse transformation ( +\f$\texttt{dst}\rightarrow\texttt{src}\f$ ). +@param borderMode pixel extrapolation method (#BORDER_CONSTANT or #BORDER_REPLICATE). +@param borderValue value used in case of a constant border; by default, it equals 0. + +@sa warpAffine, resize, remap, getRectSubPix, perspectiveTransform + */ +CV_EXPORTS_W void warpPerspective( InputArray src, OutputArray dst, + InputArray M, Size dsize, + int flags = INTER_LINEAR, + int borderMode = BORDER_CONSTANT, + const Scalar& borderValue = Scalar()); + +/** @brief Applies a generic geometrical transformation to an image. + +The function remap transforms the source image using the specified map: + +\f[\texttt{dst} (x,y) = \texttt{src} (map_x(x,y),map_y(x,y))\f] + +where values of pixels with non-integer coordinates are computed using one of available +interpolation methods. \f$map_x\f$ and \f$map_y\f$ can be encoded as separate floating-point maps +in \f$map_1\f$ and \f$map_2\f$ respectively, or interleaved floating-point maps of \f$(x,y)\f$ in +\f$map_1\f$, or fixed-point maps created by using #convertMaps. The reason you might want to +convert from floating to fixed-point representations of a map is that they can yield much faster +(\~2x) remapping operations. In the converted case, \f$map_1\f$ contains pairs (cvFloor(x), +cvFloor(y)) and \f$map_2\f$ contains indices in a table of interpolation coefficients. + +This function cannot operate in-place. + +@param src Source image. +@param dst Destination image. It has the same size as map1 and the same type as src . +@param map1 The first map of either (x,y) points or just x values having the type CV_16SC2 , +CV_32FC1, or CV_32FC2. See #convertMaps for details on converting a floating point +representation to fixed-point for speed. +@param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map +if map1 is (x,y) points), respectively. +@param interpolation Interpolation method (see #InterpolationFlags). The methods #INTER_AREA +and #INTER_LINEAR_EXACT are not supported by this function. +@param borderMode Pixel extrapolation method (see #BorderTypes). When +borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image that +corresponds to the "outliers" in the source image are not modified by the function. +@param borderValue Value used in case of a constant border. By default, it is 0. +@note +Due to current implementation limitations the size of an input and output images should be less than 32767x32767. + */ +CV_EXPORTS_W void remap( InputArray src, OutputArray dst, + InputArray map1, InputArray map2, + int interpolation, int borderMode = BORDER_CONSTANT, + const Scalar& borderValue = Scalar()); + +/** @brief Converts image transformation maps from one representation to another. + +The function converts a pair of maps for remap from one representation to another. The following +options ( (map1.type(), map2.type()) \f$\rightarrow\f$ (dstmap1.type(), dstmap2.type()) ) are +supported: + +- \f$\texttt{(CV_32FC1, CV_32FC1)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. This is the +most frequently used conversion operation, in which the original floating-point maps (see #remap) +are converted to a more compact and much faster fixed-point representation. The first output array +contains the rounded coordinates and the second array (created only when nninterpolation=false ) +contains indices in the interpolation tables. + +- \f$\texttt{(CV_32FC2)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. The same as above but +the original maps are stored in one 2-channel matrix. + +- Reverse conversion. Obviously, the reconstructed floating-point maps will not be exactly the same +as the originals. + +@param map1 The first input map of type CV_16SC2, CV_32FC1, or CV_32FC2 . +@param map2 The second input map of type CV_16UC1, CV_32FC1, or none (empty matrix), +respectively. +@param dstmap1 The first output map that has the type dstmap1type and the same size as src . +@param dstmap2 The second output map. +@param dstmap1type Type of the first output map that should be CV_16SC2, CV_32FC1, or +CV_32FC2 . +@param nninterpolation Flag indicating whether the fixed-point maps are used for the +nearest-neighbor or for a more complex interpolation. + +@sa remap, undistort, initUndistortRectifyMap + */ +CV_EXPORTS_W void convertMaps( InputArray map1, InputArray map2, + OutputArray dstmap1, OutputArray dstmap2, + int dstmap1type, bool nninterpolation = false ); + +/** @brief Calculates an affine matrix of 2D rotation. + +The function calculates the following matrix: + +\f[\begin{bmatrix} \alpha & \beta & (1- \alpha ) \cdot \texttt{center.x} - \beta \cdot \texttt{center.y} \\ - \beta & \alpha & \beta \cdot \texttt{center.x} + (1- \alpha ) \cdot \texttt{center.y} \end{bmatrix}\f] + +where + +\f[\begin{array}{l} \alpha = \texttt{scale} \cdot \cos \texttt{angle} , \\ \beta = \texttt{scale} \cdot \sin \texttt{angle} \end{array}\f] + +The transformation maps the rotation center to itself. If this is not the target, adjust the shift. + +@param center Center of the rotation in the source image. +@param angle Rotation angle in degrees. Positive values mean counter-clockwise rotation (the +coordinate origin is assumed to be the top-left corner). +@param scale Isotropic scale factor. + +@sa getAffineTransform, warpAffine, transform + */ +CV_EXPORTS_W Mat getRotationMatrix2D(Point2f center, double angle, double scale); + +/** @sa getRotationMatrix2D */ +CV_EXPORTS Matx23d getRotationMatrix2D_(Point2f center, double angle, double scale); + +inline +Mat getRotationMatrix2D(Point2f center, double angle, double scale) +{ + return Mat(getRotationMatrix2D_(center, angle, scale), true); +} + +/** @brief Calculates an affine transform from three pairs of the corresponding points. + +The function calculates the \f$2 \times 3\f$ matrix of an affine transform so that: + +\f[\begin{bmatrix} x'_i \\ y'_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] + +where + +\f[dst(i)=(x'_i,y'_i), src(i)=(x_i, y_i), i=0,1,2\f] + +@param src Coordinates of triangle vertices in the source image. +@param dst Coordinates of the corresponding triangle vertices in the destination image. + +@sa warpAffine, transform + */ +CV_EXPORTS Mat getAffineTransform( const Point2f src[], const Point2f dst[] ); + +/** @brief Inverts an affine transformation. + +The function computes an inverse affine transformation represented by \f$2 \times 3\f$ matrix M: + +\f[\begin{bmatrix} a_{11} & a_{12} & b_1 \\ a_{21} & a_{22} & b_2 \end{bmatrix}\f] + +The result is also a \f$2 \times 3\f$ matrix of the same type as M. + +@param M Original affine transformation. +@param iM Output reverse affine transformation. + */ +CV_EXPORTS_W void invertAffineTransform( InputArray M, OutputArray iM ); + +/** @brief Calculates a perspective transform from four pairs of the corresponding points. + +The function calculates the \f$3 \times 3\f$ matrix of a perspective transform so that: + +\f[\begin{bmatrix} t_i x'_i \\ t_i y'_i \\ t_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f] + +where + +\f[dst(i)=(x'_i,y'_i), src(i)=(x_i, y_i), i=0,1,2,3\f] + +@param src Coordinates of quadrangle vertices in the source image. +@param dst Coordinates of the corresponding quadrangle vertices in the destination image. +@param solveMethod method passed to cv::solve (#DecompTypes) + +@sa findHomography, warpPerspective, perspectiveTransform + */ +CV_EXPORTS_W Mat getPerspectiveTransform(InputArray src, InputArray dst, int solveMethod = DECOMP_LU); + +/** @overload */ +CV_EXPORTS Mat getPerspectiveTransform(const Point2f src[], const Point2f dst[], int solveMethod = DECOMP_LU); + + +CV_EXPORTS_W Mat getAffineTransform( InputArray src, InputArray dst ); + +/** @brief Retrieves a pixel rectangle from an image with sub-pixel accuracy. + +The function getRectSubPix extracts pixels from src: + +\f[patch(x, y) = src(x + \texttt{center.x} - ( \texttt{dst.cols} -1)*0.5, y + \texttt{center.y} - ( \texttt{dst.rows} -1)*0.5)\f] + +where the values of the pixels at non-integer coordinates are retrieved using bilinear +interpolation. Every channel of multi-channel images is processed independently. Also +the image should be a single channel or three channel image. While the center of the +rectangle must be inside the image, parts of the rectangle may be outside. + +@param image Source image. +@param patchSize Size of the extracted patch. +@param center Floating point coordinates of the center of the extracted rectangle within the +source image. The center must be inside the image. +@param patch Extracted patch that has the size patchSize and the same number of channels as src . +@param patchType Depth of the extracted pixels. By default, they have the same depth as src . + +@sa warpAffine, warpPerspective + */ +CV_EXPORTS_W void getRectSubPix( InputArray image, Size patchSize, + Point2f center, OutputArray patch, int patchType = -1 ); + +/** @example samples/cpp/polar_transforms.cpp +An example using the cv::linearPolar and cv::logPolar operations +*/ + +/** @brief Remaps an image to semilog-polar coordinates space. + +@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags+WARP_POLAR_LOG); + +@internal +Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image d)"): +\f[\begin{array}{l} + dst( \rho , \phi ) = src(x,y) \\ + dst.size() \leftarrow src.size() +\end{array}\f] + +where +\f[\begin{array}{l} + I = (dx,dy) = (x - center.x,y - center.y) \\ + \rho = M \cdot log_e(\texttt{magnitude} (I)) ,\\ + \phi = Kangle \cdot \texttt{angle} (I) \\ +\end{array}\f] + +and +\f[\begin{array}{l} + M = src.cols / log_e(maxRadius) \\ + Kangle = src.rows / 2\Pi \\ +\end{array}\f] + +The function emulates the human "foveal" vision and can be used for fast scale and +rotation-invariant template matching, for object tracking and so forth. +@param src Source image +@param dst Destination image. It will have same size and type as src. +@param center The transformation center; where the output precision is maximal +@param M Magnitude scale parameter. It determines the radius of the bounding circle to transform too. +@param flags A combination of interpolation methods, see #InterpolationFlags + +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. + +@sa cv::linearPolar +@endinternal +*/ +CV_EXPORTS_W void logPolar( InputArray src, OutputArray dst, + Point2f center, double M, int flags ); + +/** @brief Remaps an image to polar coordinates space. + +@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags) + +@internal +Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image c)"): +\f[\begin{array}{l} + dst( \rho , \phi ) = src(x,y) \\ + dst.size() \leftarrow src.size() +\end{array}\f] + +where +\f[\begin{array}{l} + I = (dx,dy) = (x - center.x,y - center.y) \\ + \rho = Kmag \cdot \texttt{magnitude} (I) ,\\ + \phi = angle \cdot \texttt{angle} (I) +\end{array}\f] + +and +\f[\begin{array}{l} + Kx = src.cols / maxRadius \\ + Ky = src.rows / 2\Pi +\end{array}\f] + + +@param src Source image +@param dst Destination image. It will have same size and type as src. +@param center The transformation center; +@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too. +@param flags A combination of interpolation methods, see #InterpolationFlags + +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. + +@sa cv::logPolar +@endinternal +*/ +CV_EXPORTS_W void linearPolar( InputArray src, OutputArray dst, + Point2f center, double maxRadius, int flags ); + + +/** \brief Remaps an image to polar or semilog-polar coordinates space + +@anchor polar_remaps_reference_image +![Polar remaps reference](pics/polar_remap_doc.png) + +Transform the source image using the following transformation: +\f[ +dst(\rho , \phi ) = src(x,y) +\f] + +where +\f[ +\begin{array}{l} +\vec{I} = (x - center.x, \;y - center.y) \\ +\phi = Kangle \cdot \texttt{angle} (\vec{I}) \\ +\rho = \left\{\begin{matrix} +Klin \cdot \texttt{magnitude} (\vec{I}) & default \\ +Klog \cdot log_e(\texttt{magnitude} (\vec{I})) & if \; semilog \\ +\end{matrix}\right. +\end{array} +\f] + +and +\f[ +\begin{array}{l} +Kangle = dsize.height / 2\Pi \\ +Klin = dsize.width / maxRadius \\ +Klog = dsize.width / log_e(maxRadius) \\ +\end{array} +\f] + + +\par Linear vs semilog mapping + +Polar mapping can be linear or semi-log. Add one of #WarpPolarMode to `flags` to specify the polar mapping mode. + +Linear is the default mode. + +The semilog mapping emulates the human "foveal" vision that permit very high acuity on the line of sight (central vision) +in contrast to peripheral vision where acuity is minor. + +\par Option on `dsize`: + +- if both values in `dsize <=0 ` (default), +the destination image will have (almost) same area of source bounding circle: +\f[\begin{array}{l} +dsize.area \leftarrow (maxRadius^2 \cdot \Pi) \\ +dsize.width = \texttt{cvRound}(maxRadius) \\ +dsize.height = \texttt{cvRound}(maxRadius \cdot \Pi) \\ +\end{array}\f] + + +- if only `dsize.height <= 0`, +the destination image area will be proportional to the bounding circle area but scaled by `Kx * Kx`: +\f[\begin{array}{l} +dsize.height = \texttt{cvRound}(dsize.width \cdot \Pi) \\ +\end{array} +\f] + +- if both values in `dsize > 0 `, +the destination image will have the given size therefore the area of the bounding circle will be scaled to `dsize`. + + +\par Reverse mapping + +You can get reverse mapping adding #WARP_INVERSE_MAP to `flags` +\snippet polar_transforms.cpp InverseMap + +In addiction, to calculate the original coordinate from a polar mapped coordinate \f$(rho, phi)->(x, y)\f$: +\snippet polar_transforms.cpp InverseCoordinate + +@param src Source image. +@param dst Destination image. It will have same type as src. +@param dsize The destination image size (see description for valid options). +@param center The transformation center. +@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too. +@param flags A combination of interpolation methods, #InterpolationFlags + #WarpPolarMode. + - Add #WARP_POLAR_LINEAR to select linear polar mapping (default) + - Add #WARP_POLAR_LOG to select semilog polar mapping + - Add #WARP_INVERSE_MAP for reverse mapping. +@note +- The function can not operate in-place. +- To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees. +- This function uses #remap. Due to current implementation limitations the size of an input and output images should be less than 32767x32767. + +@sa cv::remap +*/ +CV_EXPORTS_W void warpPolar(InputArray src, OutputArray dst, Size dsize, + Point2f center, double maxRadius, int flags); + + +//! @} imgproc_transform + +//! @addtogroup imgproc_misc +//! @{ + +/** @brief Calculates the integral of an image. + +The function calculates one or more integral images for the source image as follows: + +\f[\texttt{sum} (X,Y) = \sum _{x + +Calculates the cross-power spectrum of two supplied source arrays. The arrays are padded if needed +with getOptimalDFTSize. + +The function performs the following equations: +- First it applies a Hanning window (see ) to each +image to remove possible edge effects. This window is cached until the array size changes to speed +up processing time. +- Next it computes the forward DFTs of each source array: +\f[\mathbf{G}_a = \mathcal{F}\{src_1\}, \; \mathbf{G}_b = \mathcal{F}\{src_2\}\f] +where \f$\mathcal{F}\f$ is the forward DFT. +- It then computes the cross-power spectrum of each frequency domain array: +\f[R = \frac{ \mathbf{G}_a \mathbf{G}_b^*}{|\mathbf{G}_a \mathbf{G}_b^*|}\f] +- Next the cross-correlation is converted back into the time domain via the inverse DFT: +\f[r = \mathcal{F}^{-1}\{R\}\f] +- Finally, it computes the peak location and computes a 5x5 weighted centroid around the peak to +achieve sub-pixel accuracy. +\f[(\Delta x, \Delta y) = \texttt{weightedCentroid} \{\arg \max_{(x, y)}\{r\}\}\f] +- If non-zero, the response parameter is computed as the sum of the elements of r within the 5x5 +centroid around the peak location. It is normalized to a maximum of 1 (meaning there is a single +peak) and will be smaller when there are multiple peaks. + +@param src1 Source floating point array (CV_32FC1 or CV_64FC1) +@param src2 Source floating point array (CV_32FC1 or CV_64FC1) +@param window Floating point array with windowing coefficients to reduce edge effects (optional). +@param response Signal power within the 5x5 centroid around the peak, between 0 and 1 (optional). +@returns detected phase shift (sub-pixel) between the two arrays. + +@sa dft, getOptimalDFTSize, idft, mulSpectrums createHanningWindow + */ +CV_EXPORTS_W Point2d phaseCorrelate(InputArray src1, InputArray src2, + InputArray window = noArray(), CV_OUT double* response = 0); + +/** @brief This function computes a Hanning window coefficients in two dimensions. + +See (http://en.wikipedia.org/wiki/Hann_function) and (http://en.wikipedia.org/wiki/Window_function) +for more information. + +An example is shown below: +@code + // create hanning window of size 100x100 and type CV_32F + Mat hann; + createHanningWindow(hann, Size(100, 100), CV_32F); +@endcode +@param dst Destination array to place Hann coefficients in +@param winSize The window size specifications (both width and height must be > 1) +@param type Created array type + */ +CV_EXPORTS_W void createHanningWindow(OutputArray dst, Size winSize, int type); + +/** @brief Performs the per-element division of the first Fourier spectrum by the second Fourier spectrum. + +The function cv::divSpectrums performs the per-element division of the first array by the second array. +The arrays are CCS-packed or complex matrices that are results of a real or complex Fourier transform. + +@param a first input array. +@param b second input array of the same size and type as src1 . +@param c output array of the same size and type as src1 . +@param flags operation flags; currently, the only supported flag is cv::DFT_ROWS, which indicates that +each row of src1 and src2 is an independent 1D Fourier spectrum. If you do not want to use this flag, then simply add a `0` as value. +@param conjB optional flag that conjugates the second input array before the multiplication (true) +or not (false). +*/ +CV_EXPORTS_W void divSpectrums(InputArray a, InputArray b, OutputArray c, + int flags, bool conjB = false); + +//! @} imgproc_motion + +//! @addtogroup imgproc_misc +//! @{ + +/** @brief Applies a fixed-level threshold to each array element. + +The function applies fixed-level thresholding to a multiple-channel array. The function is typically +used to get a bi-level (binary) image out of a grayscale image ( #compare could be also used for +this purpose) or for removing a noise, that is, filtering out pixels with too small or too large +values. There are several types of thresholding supported by the function. They are determined by +type parameter. + +Also, the special values #THRESH_OTSU or #THRESH_TRIANGLE may be combined with one of the +above values. In these cases, the function determines the optimal threshold value using the Otsu's +or Triangle algorithm and uses it instead of the specified thresh. + +@note Currently, the Otsu's and Triangle methods are implemented only for 8-bit single-channel images. + +@param src input array (multiple-channel, 8-bit or 32-bit floating point). +@param dst output array of the same size and type and the same number of channels as src. +@param thresh threshold value. +@param maxval maximum value to use with the #THRESH_BINARY and #THRESH_BINARY_INV thresholding +types. +@param type thresholding type (see #ThresholdTypes). +@return the computed threshold value if Otsu's or Triangle methods used. + +@sa adaptiveThreshold, findContours, compare, min, max + */ +CV_EXPORTS_W double threshold( InputArray src, OutputArray dst, + double thresh, double maxval, int type ); + + +/** @brief Applies an adaptive threshold to an array. + +The function transforms a grayscale image to a binary image according to the formulae: +- **THRESH_BINARY** + \f[dst(x,y) = \fork{\texttt{maxValue}}{if \(src(x,y) > T(x,y)\)}{0}{otherwise}\f] +- **THRESH_BINARY_INV** + \f[dst(x,y) = \fork{0}{if \(src(x,y) > T(x,y)\)}{\texttt{maxValue}}{otherwise}\f] +where \f$T(x,y)\f$ is a threshold calculated individually for each pixel (see adaptiveMethod parameter). + +The function can process the image in-place. + +@param src Source 8-bit single-channel image. +@param dst Destination image of the same size and the same type as src. +@param maxValue Non-zero value assigned to the pixels for which the condition is satisfied +@param adaptiveMethod Adaptive thresholding algorithm to use, see #AdaptiveThresholdTypes. +The #BORDER_REPLICATE | #BORDER_ISOLATED is used to process boundaries. +@param thresholdType Thresholding type that must be either #THRESH_BINARY or #THRESH_BINARY_INV, +see #ThresholdTypes. +@param blockSize Size of a pixel neighborhood that is used to calculate a threshold value for the +pixel: 3, 5, 7, and so on. +@param C Constant subtracted from the mean or weighted mean (see the details below). Normally, it +is positive but may be zero or negative as well. + +@sa threshold, blur, GaussianBlur + */ +CV_EXPORTS_W void adaptiveThreshold( InputArray src, OutputArray dst, + double maxValue, int adaptiveMethod, + int thresholdType, int blockSize, double C ); + +//! @} imgproc_misc + +//! @addtogroup imgproc_filter +//! @{ + +/** @example samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp +An example using pyrDown and pyrUp functions +*/ + +/** @brief Blurs an image and downsamples it. + +By default, size of the output image is computed as `Size((src.cols+1)/2, (src.rows+1)/2)`, but in +any case, the following conditions should be satisfied: + +\f[\begin{array}{l} | \texttt{dstsize.width} *2-src.cols| \leq 2 \\ | \texttt{dstsize.height} *2-src.rows| \leq 2 \end{array}\f] + +The function performs the downsampling step of the Gaussian pyramid construction. First, it +convolves the source image with the kernel: + +\f[\frac{1}{256} \begin{bmatrix} 1 & 4 & 6 & 4 & 1 \\ 4 & 16 & 24 & 16 & 4 \\ 6 & 24 & 36 & 24 & 6 \\ 4 & 16 & 24 & 16 & 4 \\ 1 & 4 & 6 & 4 & 1 \end{bmatrix}\f] + +Then, it downsamples the image by rejecting even rows and columns. + +@param src input image. +@param dst output image; it has the specified size and the same type as src. +@param dstsize size of the output image. +@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported) + */ +CV_EXPORTS_W void pyrDown( InputArray src, OutputArray dst, + const Size& dstsize = Size(), int borderType = BORDER_DEFAULT ); + +/** @brief Upsamples an image and then blurs it. + +By default, size of the output image is computed as `Size(src.cols\*2, (src.rows\*2)`, but in any +case, the following conditions should be satisfied: + +\f[\begin{array}{l} | \texttt{dstsize.width} -src.cols*2| \leq ( \texttt{dstsize.width} \mod 2) \\ | \texttt{dstsize.height} -src.rows*2| \leq ( \texttt{dstsize.height} \mod 2) \end{array}\f] + +The function performs the upsampling step of the Gaussian pyramid construction, though it can +actually be used to construct the Laplacian pyramid. First, it upsamples the source image by +injecting even zero rows and columns and then convolves the result with the same kernel as in +pyrDown multiplied by 4. + +@param src input image. +@param dst output image. It has the specified size and the same type as src . +@param dstsize size of the output image. +@param borderType Pixel extrapolation method, see #BorderTypes (only #BORDER_DEFAULT is supported) + */ +CV_EXPORTS_W void pyrUp( InputArray src, OutputArray dst, + const Size& dstsize = Size(), int borderType = BORDER_DEFAULT ); + +/** @brief Constructs the Gaussian pyramid for an image. + +The function constructs a vector of images and builds the Gaussian pyramid by recursively applying +pyrDown to the previously built pyramid layers, starting from `dst[0]==src`. + +@param src Source image. Check pyrDown for the list of supported types. +@param dst Destination vector of maxlevel+1 images of the same type as src. dst[0] will be the +same as src. dst[1] is the next pyramid layer, a smoothed and down-sized src, and so on. +@param maxlevel 0-based index of the last (the smallest) pyramid layer. It must be non-negative. +@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported) + */ +CV_EXPORTS void buildPyramid( InputArray src, OutputArrayOfArrays dst, + int maxlevel, int borderType = BORDER_DEFAULT ); + +//! @} imgproc_filter + +//! @addtogroup imgproc_hist +//! @{ + +/** @example samples/cpp/demhist.cpp +An example for creating histograms of an image +*/ + +/** @brief Calculates a histogram of a set of arrays. + +The function cv::calcHist calculates the histogram of one or more arrays. The elements of a tuple used +to increment a histogram bin are taken from the corresponding input arrays at the same location. The +sample below shows how to compute a 2D Hue-Saturation histogram for a color image. : +@include snippets/imgproc_calcHist.cpp + +@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same +size. Each of them can have an arbitrary number of channels. +@param nimages Number of source images. +@param channels List of the dims channels used to compute the histogram. The first array channels +are numerated from 0 to images[0].channels()-1 , the second array channels are counted from +images[0].channels() to images[0].channels() + images[1].channels()-1, and so on. +@param mask Optional mask. If the matrix is not empty, it must be an 8-bit array of the same size +as images[i] . The non-zero mask elements mark the array elements counted in the histogram. +@param hist Output histogram, which is a dense or sparse dims -dimensional array. +@param dims Histogram dimensionality that must be positive and not greater than CV_MAX_DIMS +(equal to 32 in the current OpenCV version). +@param histSize Array of histogram sizes in each dimension. +@param ranges Array of the dims arrays of the histogram bin boundaries in each dimension. When the +histogram is uniform ( uniform =true), then for each dimension i it is enough to specify the lower +(inclusive) boundary \f$L_0\f$ of the 0-th histogram bin and the upper (exclusive) boundary +\f$U_{\texttt{histSize}[i]-1}\f$ for the last histogram bin histSize[i]-1 . That is, in case of a +uniform histogram each of ranges[i] is an array of 2 elements. When the histogram is not uniform ( +uniform=false ), then each of ranges[i] contains histSize[i]+1 elements: +\f$L_0, U_0=L_1, U_1=L_2, ..., U_{\texttt{histSize[i]}-2}=L_{\texttt{histSize[i]}-1}, U_{\texttt{histSize[i]}-1}\f$ +. The array elements, that are not between \f$L_0\f$ and \f$U_{\texttt{histSize[i]}-1}\f$ , are not +counted in the histogram. +@param uniform Flag indicating whether the histogram is uniform or not (see above). +@param accumulate Accumulation flag. If it is set, the histogram is not cleared in the beginning +when it is allocated. This feature enables you to compute a single histogram from several sets of +arrays, or to update the histogram in time. +*/ +CV_EXPORTS void calcHist( const Mat* images, int nimages, + const int* channels, InputArray mask, + OutputArray hist, int dims, const int* histSize, + const float** ranges, bool uniform = true, bool accumulate = false ); + +/** @overload + +this variant uses %SparseMat for output +*/ +CV_EXPORTS void calcHist( const Mat* images, int nimages, + const int* channels, InputArray mask, + SparseMat& hist, int dims, + const int* histSize, const float** ranges, + bool uniform = true, bool accumulate = false ); + +/** @overload */ +CV_EXPORTS_W void calcHist( InputArrayOfArrays images, + const std::vector& channels, + InputArray mask, OutputArray hist, + const std::vector& histSize, + const std::vector& ranges, + bool accumulate = false ); + +/** @brief Calculates the back projection of a histogram. + +The function cv::calcBackProject calculates the back project of the histogram. That is, similarly to +#calcHist , at each location (x, y) the function collects the values from the selected channels +in the input images and finds the corresponding histogram bin. But instead of incrementing it, the +function reads the bin value, scales it by scale , and stores in backProject(x,y) . In terms of +statistics, the function computes probability of each element value in respect with the empirical +probability distribution represented by the histogram. See how, for example, you can find and track +a bright-colored object in a scene: + +- Before tracking, show the object to the camera so that it covers almost the whole frame. +Calculate a hue histogram. The histogram may have strong maximums, corresponding to the dominant +colors in the object. + +- When tracking, calculate a back projection of a hue plane of each input video frame using that +pre-computed histogram. Threshold the back projection to suppress weak colors. It may also make +sense to suppress pixels with non-sufficient color saturation and too dark or too bright pixels. + +- Find connected components in the resulting picture and choose, for example, the largest +component. + +This is an approximate algorithm of the CamShift color object tracker. + +@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same +size. Each of them can have an arbitrary number of channels. +@param nimages Number of source images. +@param channels The list of channels used to compute the back projection. The number of channels +must match the histogram dimensionality. The first array channels are numerated from 0 to +images[0].channels()-1 , the second array channels are counted from images[0].channels() to +images[0].channels() + images[1].channels()-1, and so on. +@param hist Input histogram that can be dense or sparse. +@param backProject Destination back projection array that is a single-channel array of the same +size and depth as images[0] . +@param ranges Array of arrays of the histogram bin boundaries in each dimension. See #calcHist . +@param scale Optional scale factor for the output back projection. +@param uniform Flag indicating whether the histogram is uniform or not (see above). + +@sa calcHist, compareHist + */ +CV_EXPORTS void calcBackProject( const Mat* images, int nimages, + const int* channels, InputArray hist, + OutputArray backProject, const float** ranges, + double scale = 1, bool uniform = true ); + +/** @overload */ +CV_EXPORTS void calcBackProject( const Mat* images, int nimages, + const int* channels, const SparseMat& hist, + OutputArray backProject, const float** ranges, + double scale = 1, bool uniform = true ); + +/** @overload */ +CV_EXPORTS_W void calcBackProject( InputArrayOfArrays images, const std::vector& channels, + InputArray hist, OutputArray dst, + const std::vector& ranges, + double scale ); + +/** @brief Compares two histograms. + +The function cv::compareHist compares two dense or two sparse histograms using the specified method. + +The function returns \f$d(H_1, H_2)\f$ . + +While the function works well with 1-, 2-, 3-dimensional dense histograms, it may not be suitable +for high-dimensional sparse histograms. In such histograms, because of aliasing and sampling +problems, the coordinates of non-zero histogram bins can slightly shift. To compare such histograms +or more general sparse configurations of weighted points, consider using the #EMD function. + +@param H1 First compared histogram. +@param H2 Second compared histogram of the same size as H1 . +@param method Comparison method, see #HistCompMethods + */ +CV_EXPORTS_W double compareHist( InputArray H1, InputArray H2, int method ); + +/** @overload */ +CV_EXPORTS double compareHist( const SparseMat& H1, const SparseMat& H2, int method ); + +/** @brief Equalizes the histogram of a grayscale image. + +The function equalizes the histogram of the input image using the following algorithm: + +- Calculate the histogram \f$H\f$ for src . +- Normalize the histogram so that the sum of histogram bins is 255. +- Compute the integral of the histogram: +\f[H'_i = \sum _{0 \le j < i} H(j)\f] +- Transform the image using \f$H'\f$ as a look-up table: \f$\texttt{dst}(x,y) = H'(\texttt{src}(x,y))\f$ + +The algorithm normalizes the brightness and increases the contrast of the image. + +@param src Source 8-bit single channel image. +@param dst Destination image of the same size and type as src . + */ +CV_EXPORTS_W void equalizeHist( InputArray src, OutputArray dst ); + +/** @brief Creates a smart pointer to a cv::CLAHE class and initializes it. + +@param clipLimit Threshold for contrast limiting. +@param tileGridSize Size of grid for histogram equalization. Input image will be divided into +equally sized rectangular tiles. tileGridSize defines the number of tiles in row and column. + */ +CV_EXPORTS_W Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + +/** @brief Computes the "minimal work" distance between two weighted point configurations. + +The function computes the earth mover distance and/or a lower boundary of the distance between the +two weighted point configurations. One of the applications described in @cite RubnerSept98, +@cite Rubner2000 is multi-dimensional histogram comparison for image retrieval. EMD is a transportation +problem that is solved using some modification of a simplex algorithm, thus the complexity is +exponential in the worst case, though, on average it is much faster. In the case of a real metric +the lower boundary can be calculated even faster (using linear-time algorithm) and it can be used +to determine roughly whether the two signatures are far enough so that they cannot relate to the +same object. + +@param signature1 First signature, a \f$\texttt{size1}\times \texttt{dims}+1\f$ floating-point matrix. +Each row stores the point weight followed by the point coordinates. The matrix is allowed to have +a single column (weights only) if the user-defined cost matrix is used. The weights must be +non-negative and have at least one non-zero value. +@param signature2 Second signature of the same format as signature1 , though the number of rows +may be different. The total weights may be different. In this case an extra "dummy" point is added +to either signature1 or signature2. The weights must be non-negative and have at least one non-zero +value. +@param distType Used metric. See #DistanceTypes. +@param cost User-defined \f$\texttt{size1}\times \texttt{size2}\f$ cost matrix. Also, if a cost matrix +is used, lower boundary lowerBound cannot be calculated because it needs a metric function. +@param lowerBound Optional input/output parameter: lower boundary of a distance between the two +signatures that is a distance between mass centers. The lower boundary may not be calculated if +the user-defined cost matrix is used, the total weights of point configurations are not equal, or +if the signatures consist of weights only (the signature matrices have a single column). You +**must** initialize \*lowerBound . If the calculated distance between mass centers is greater or +equal to \*lowerBound (it means that the signatures are far enough), the function does not +calculate EMD. In any case \*lowerBound is set to the calculated distance between mass centers on +return. Thus, if you want to calculate both distance between mass centers and EMD, \*lowerBound +should be set to 0. +@param flow Resultant \f$\texttt{size1} \times \texttt{size2}\f$ flow matrix: \f$\texttt{flow}_{i,j}\f$ is +a flow from \f$i\f$ -th point of signature1 to \f$j\f$ -th point of signature2 . + */ +CV_EXPORTS float EMD( InputArray signature1, InputArray signature2, + int distType, InputArray cost=noArray(), + float* lowerBound = 0, OutputArray flow = noArray() ); + +CV_EXPORTS_AS(EMD) float wrapperEMD( InputArray signature1, InputArray signature2, + int distType, InputArray cost=noArray(), + CV_IN_OUT Ptr lowerBound = Ptr(), OutputArray flow = noArray() ); + +//! @} imgproc_hist + +//! @addtogroup imgproc_segmentation +//! @{ + +/** @example samples/cpp/watershed.cpp +An example using the watershed algorithm +*/ + +/** @brief Performs a marker-based image segmentation using the watershed algorithm. + +The function implements one of the variants of watershed, non-parametric marker-based segmentation +algorithm, described in @cite Meyer92 . + +Before passing the image to the function, you have to roughly outline the desired regions in the +image markers with positive (\>0) indices. So, every region is represented as one or more connected +components with the pixel values 1, 2, 3, and so on. Such markers can be retrieved from a binary +mask using #findContours and #drawContours (see the watershed.cpp demo). The markers are "seeds" of +the future image regions. All the other pixels in markers , whose relation to the outlined regions +is not known and should be defined by the algorithm, should be set to 0's. In the function output, +each pixel in markers is set to a value of the "seed" components or to -1 at boundaries between the +regions. + +@note Any two neighbor connected components are not necessarily separated by a watershed boundary +(-1's pixels); for example, they can touch each other in the initial marker image passed to the +function. + +@param image Input 8-bit 3-channel image. +@param markers Input/output 32-bit single-channel image (map) of markers. It should have the same +size as image . + +@sa findContours + */ +CV_EXPORTS_W void watershed( InputArray image, InputOutputArray markers ); + +//! @} imgproc_segmentation + +//! @addtogroup imgproc_filter +//! @{ + +/** @brief Performs initial step of meanshift segmentation of an image. + +The function implements the filtering stage of meanshift segmentation, that is, the output of the +function is the filtered "posterized" image with color gradients and fine-grain texture flattened. +At every pixel (X,Y) of the input image (or down-sized input image, see below) the function executes +meanshift iterations, that is, the pixel (X,Y) neighborhood in the joint space-color hyperspace is +considered: + +\f[(x,y): X- \texttt{sp} \le x \le X+ \texttt{sp} , Y- \texttt{sp} \le y \le Y+ \texttt{sp} , ||(R,G,B)-(r,g,b)|| \le \texttt{sr}\f] + +where (R,G,B) and (r,g,b) are the vectors of color components at (X,Y) and (x,y), respectively +(though, the algorithm does not depend on the color space used, so any 3-component color space can +be used instead). Over the neighborhood the average spatial value (X',Y') and average color vector +(R',G',B') are found and they act as the neighborhood center on the next iteration: + +\f[(X,Y)~(X',Y'), (R,G,B)~(R',G',B').\f] + +After the iterations over, the color components of the initial pixel (that is, the pixel from where +the iterations started) are set to the final value (average color at the last iteration): + +\f[I(X,Y) <- (R*,G*,B*)\f] + +When maxLevel \> 0, the gaussian pyramid of maxLevel+1 levels is built, and the above procedure is +run on the smallest layer first. After that, the results are propagated to the larger layer and the +iterations are run again only on those pixels where the layer colors differ by more than sr from the +lower-resolution layer of the pyramid. That makes boundaries of color regions sharper. Note that the +results will be actually different from the ones obtained by running the meanshift procedure on the +whole original image (i.e. when maxLevel==0). + +@param src The source 8-bit, 3-channel image. +@param dst The destination image of the same format and the same size as the source. +@param sp The spatial window radius. +@param sr The color window radius. +@param maxLevel Maximum level of the pyramid for the segmentation. +@param termcrit Termination criteria: when to stop meanshift iterations. + */ +CV_EXPORTS_W void pyrMeanShiftFiltering( InputArray src, OutputArray dst, + double sp, double sr, int maxLevel = 1, + TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5,1) ); + +//! @} + +//! @addtogroup imgproc_segmentation +//! @{ + +/** @example samples/cpp/grabcut.cpp +An example using the GrabCut algorithm +![Sample Screenshot](grabcut_output1.jpg) +*/ + +/** @brief Runs the GrabCut algorithm. + +The function implements the [GrabCut image segmentation algorithm](http://en.wikipedia.org/wiki/GrabCut). + +@param img Input 8-bit 3-channel image. +@param mask Input/output 8-bit single-channel mask. The mask is initialized by the function when +mode is set to #GC_INIT_WITH_RECT. Its elements may have one of the #GrabCutClasses. +@param rect ROI containing a segmented object. The pixels outside of the ROI are marked as +"obvious background". The parameter is only used when mode==#GC_INIT_WITH_RECT . +@param bgdModel Temporary array for the background model. Do not modify it while you are +processing the same image. +@param fgdModel Temporary arrays for the foreground model. Do not modify it while you are +processing the same image. +@param iterCount Number of iterations the algorithm should make before returning the result. Note +that the result can be refined with further calls with mode==#GC_INIT_WITH_MASK or +mode==GC_EVAL . +@param mode Operation mode that could be one of the #GrabCutModes + */ +CV_EXPORTS_W void grabCut( InputArray img, InputOutputArray mask, Rect rect, + InputOutputArray bgdModel, InputOutputArray fgdModel, + int iterCount, int mode = GC_EVAL ); + +//! @} imgproc_segmentation + +//! @addtogroup imgproc_misc +//! @{ + +/** @example samples/cpp/distrans.cpp +An example on using the distance transform +*/ + +/** @brief Calculates the distance to the closest zero pixel for each pixel of the source image. + +The function cv::distanceTransform calculates the approximate or precise distance from every binary +image pixel to the nearest zero pixel. For zero image pixels, the distance will obviously be zero. + +When maskSize == #DIST_MASK_PRECISE and distanceType == #DIST_L2 , the function runs the +algorithm described in @cite Felzenszwalb04 . This algorithm is parallelized with the TBB library. + +In other cases, the algorithm @cite Borgefors86 is used. This means that for a pixel the function +finds the shortest path to the nearest zero pixel consisting of basic shifts: horizontal, vertical, +diagonal, or knight's move (the latest is available for a \f$5\times 5\f$ mask). The overall +distance is calculated as a sum of these basic distances. Since the distance function should be +symmetric, all of the horizontal and vertical shifts must have the same cost (denoted as a ), all +the diagonal shifts must have the same cost (denoted as `b`), and all knight's moves must have the +same cost (denoted as `c`). For the #DIST_C and #DIST_L1 types, the distance is calculated +precisely, whereas for #DIST_L2 (Euclidean distance) the distance can be calculated only with a +relative error (a \f$5\times 5\f$ mask gives more accurate results). For `a`,`b`, and `c`, OpenCV +uses the values suggested in the original paper: +- DIST_L1: `a = 1, b = 2` +- DIST_L2: + - `3 x 3`: `a=0.955, b=1.3693` + - `5 x 5`: `a=1, b=1.4, c=2.1969` +- DIST_C: `a = 1, b = 1` + +Typically, for a fast, coarse distance estimation #DIST_L2, a \f$3\times 3\f$ mask is used. For a +more accurate distance estimation #DIST_L2, a \f$5\times 5\f$ mask or the precise algorithm is used. +Note that both the precise and the approximate algorithms are linear on the number of pixels. + +This variant of the function does not only compute the minimum distance for each pixel \f$(x, y)\f$ +but also identifies the nearest connected component consisting of zero pixels +(labelType==#DIST_LABEL_CCOMP) or the nearest zero pixel (labelType==#DIST_LABEL_PIXEL). Index of the +component/pixel is stored in `labels(x, y)`. When labelType==#DIST_LABEL_CCOMP, the function +automatically finds connected components of zero pixels in the input image and marks them with +distinct labels. When labelType==#DIST_LABEL_PIXEL, the function scans through the input image and +marks all the zero pixels with distinct labels. + +In this mode, the complexity is still linear. That is, the function provides a very fast way to +compute the Voronoi diagram for a binary image. Currently, the second variant can use only the +approximate distance transform algorithm, i.e. maskSize=#DIST_MASK_PRECISE is not supported +yet. + +@param src 8-bit, single-channel (binary) source image. +@param dst Output image with calculated distances. It is a 8-bit or 32-bit floating-point, +single-channel image of the same size as src. +@param labels Output 2D array of labels (the discrete Voronoi diagram). It has the type +CV_32SC1 and the same size as src. +@param distanceType Type of distance, see #DistanceTypes +@param maskSize Size of the distance transform mask, see #DistanceTransformMasks. +#DIST_MASK_PRECISE is not supported by this variant. In case of the #DIST_L1 or #DIST_C distance type, +the parameter is forced to 3 because a \f$3\times 3\f$ mask gives the same result as \f$5\times +5\f$ or any larger aperture. +@param labelType Type of the label array to build, see #DistanceTransformLabelTypes. + */ +CV_EXPORTS_AS(distanceTransformWithLabels) void distanceTransform( InputArray src, OutputArray dst, + OutputArray labels, int distanceType, int maskSize, + int labelType = DIST_LABEL_CCOMP ); + +/** @overload +@param src 8-bit, single-channel (binary) source image. +@param dst Output image with calculated distances. It is a 8-bit or 32-bit floating-point, +single-channel image of the same size as src . +@param distanceType Type of distance, see #DistanceTypes +@param maskSize Size of the distance transform mask, see #DistanceTransformMasks. In case of the +#DIST_L1 or #DIST_C distance type, the parameter is forced to 3 because a \f$3\times 3\f$ mask gives +the same result as \f$5\times 5\f$ or any larger aperture. +@param dstType Type of output image. It can be CV_8U or CV_32F. Type CV_8U can be used only for +the first variant of the function and distanceType == #DIST_L1. +*/ +CV_EXPORTS_W void distanceTransform( InputArray src, OutputArray dst, + int distanceType, int maskSize, int dstType=CV_32F); + +/** @brief Fills a connected component with the given color. + +The function cv::floodFill fills a connected component starting from the seed point with the specified +color. The connectivity is determined by the color/brightness closeness of the neighbor pixels. The +pixel at \f$(x,y)\f$ is considered to belong to the repainted domain if: + +- in case of a grayscale image and floating range +\f[\texttt{src} (x',y')- \texttt{loDiff} \leq \texttt{src} (x,y) \leq \texttt{src} (x',y')+ \texttt{upDiff}\f] + + +- in case of a grayscale image and fixed range +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)- \texttt{loDiff} \leq \texttt{src} (x,y) \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)+ \texttt{upDiff}\f] + + +- in case of a color image and floating range +\f[\texttt{src} (x',y')_r- \texttt{loDiff} _r \leq \texttt{src} (x,y)_r \leq \texttt{src} (x',y')_r+ \texttt{upDiff} _r,\f] +\f[\texttt{src} (x',y')_g- \texttt{loDiff} _g \leq \texttt{src} (x,y)_g \leq \texttt{src} (x',y')_g+ \texttt{upDiff} _g\f] +and +\f[\texttt{src} (x',y')_b- \texttt{loDiff} _b \leq \texttt{src} (x,y)_b \leq \texttt{src} (x',y')_b+ \texttt{upDiff} _b\f] + + +- in case of a color image and fixed range +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_r- \texttt{loDiff} _r \leq \texttt{src} (x,y)_r \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_r+ \texttt{upDiff} _r,\f] +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_g- \texttt{loDiff} _g \leq \texttt{src} (x,y)_g \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_g+ \texttt{upDiff} _g\f] +and +\f[\texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_b- \texttt{loDiff} _b \leq \texttt{src} (x,y)_b \leq \texttt{src} ( \texttt{seedPoint} .x, \texttt{seedPoint} .y)_b+ \texttt{upDiff} _b\f] + + +where \f$src(x',y')\f$ is the value of one of pixel neighbors that is already known to belong to the +component. That is, to be added to the connected component, a color/brightness of the pixel should +be close enough to: +- Color/brightness of one of its neighbors that already belong to the connected component in case +of a floating range. +- Color/brightness of the seed point in case of a fixed range. + +Use these functions to either mark a connected component with the specified color in-place, or build +a mask and then extract the contour, or copy the region to another image, and so on. + +@param image Input/output 1- or 3-channel, 8-bit, or floating-point image. It is modified by the +function unless the #FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See +the details below. +@param mask Operation mask that should be a single-channel 8-bit image, 2 pixels wider and 2 pixels +taller than image. If an empty Mat is passed it will be created automatically. Since this is both an +input and output parameter, you must take responsibility of initializing it. +Flood-filling cannot go across non-zero pixels in the input mask. For example, +an edge detector output can be used as a mask to stop filling at edges. On output, pixels in the +mask corresponding to filled pixels in the image are set to 1 or to the specified value in flags +as described below. Additionally, the function fills the border of the mask with ones to simplify +internal processing. It is therefore possible to use the same mask in multiple calls to the function +to make sure the filled areas do not overlap. +@param seedPoint Starting point. +@param newVal New value of the repainted domain pixels. +@param loDiff Maximal lower brightness/color difference between the currently observed pixel and +one of its neighbors belonging to the component, or a seed pixel being added to the component. +@param upDiff Maximal upper brightness/color difference between the currently observed pixel and +one of its neighbors belonging to the component, or a seed pixel being added to the component. +@param rect Optional output parameter set by the function to the minimum bounding rectangle of the +repainted domain. +@param flags Operation flags. The first 8 bits contain a connectivity value. The default value of +4 means that only the four nearest neighbor pixels (those that share an edge) are considered. A +connectivity value of 8 means that the eight nearest neighbor pixels (those that share a corner) +will be considered. The next 8 bits (8-16) contain a value between 1 and 255 with which to fill +the mask (the default value is 1). For example, 4 | ( 255 \<\< 8 ) will consider 4 nearest +neighbours and fill the mask with a value of 255. The following additional options occupy higher +bits and therefore may be further combined with the connectivity and mask fill values using +bit-wise or (|), see #FloodFillFlags. + +@note Since the mask is larger than the filled image, a pixel \f$(x, y)\f$ in image corresponds to the +pixel \f$(x+1, y+1)\f$ in the mask . + +@sa findContours + */ +CV_EXPORTS_W int floodFill( InputOutputArray image, InputOutputArray mask, + Point seedPoint, Scalar newVal, CV_OUT Rect* rect=0, + Scalar loDiff = Scalar(), Scalar upDiff = Scalar(), + int flags = 4 ); + +/** @example samples/cpp/ffilldemo.cpp +An example using the FloodFill technique +*/ + +/** @overload + +variant without `mask` parameter +*/ +CV_EXPORTS int floodFill( InputOutputArray image, + Point seedPoint, Scalar newVal, CV_OUT Rect* rect = 0, + Scalar loDiff = Scalar(), Scalar upDiff = Scalar(), + int flags = 4 ); + +//! Performs linear blending of two images: +//! \f[ \texttt{dst}(i,j) = \texttt{weights1}(i,j)*\texttt{src1}(i,j) + \texttt{weights2}(i,j)*\texttt{src2}(i,j) \f] +//! @param src1 It has a type of CV_8UC(n) or CV_32FC(n), where n is a positive integer. +//! @param src2 It has the same type and size as src1. +//! @param weights1 It has a type of CV_32FC1 and the same size with src1. +//! @param weights2 It has a type of CV_32FC1 and the same size with src1. +//! @param dst It is created if it does not have the same size and type with src1. +CV_EXPORTS_W void blendLinear(InputArray src1, InputArray src2, InputArray weights1, InputArray weights2, OutputArray dst); + +//! @} imgproc_misc + +//! @addtogroup imgproc_color_conversions +//! @{ + +/** @brief Converts an image from one color space to another. + +The function converts an input image from one color space to another. In case of a transformation +to-from RGB color space, the order of the channels should be specified explicitly (RGB or BGR). Note +that the default color format in OpenCV is often referred to as RGB but it is actually BGR (the +bytes are reversed). So the first byte in a standard (24-bit) color image will be an 8-bit Blue +component, the second byte will be Green, and the third byte will be Red. The fourth, fifth, and +sixth bytes would then be the second pixel (Blue, then Green, then Red), and so on. + +The conventional ranges for R, G, and B channel values are: +- 0 to 255 for CV_8U images +- 0 to 65535 for CV_16U images +- 0 to 1 for CV_32F images + +In case of linear transformations, the range does not matter. But in case of a non-linear +transformation, an input RGB image should be normalized to the proper value range to get the correct +results, for example, for RGB \f$\rightarrow\f$ L\*u\*v\* transformation. For example, if you have a +32-bit floating-point image directly converted from an 8-bit image without any scaling, then it will +have the 0..255 value range instead of 0..1 assumed by the function. So, before calling #cvtColor , +you need first to scale the image down: +@code + img *= 1./255; + cvtColor(img, img, COLOR_BGR2Luv); +@endcode +If you use #cvtColor with 8-bit images, the conversion will have some information lost. For many +applications, this will not be noticeable but it is recommended to use 32-bit images in applications +that need the full range of colors or that convert an image before an operation and then convert +back. + +If conversion adds the alpha channel, its value will set to the maximum of corresponding channel +range: 255 for CV_8U, 65535 for CV_16U, 1 for CV_32F. + +@param src input image: 8-bit unsigned, 16-bit unsigned ( CV_16UC... ), or single-precision +floating-point. +@param dst output image of the same size and depth as src. +@param code color space conversion code (see #ColorConversionCodes). +@param dstCn number of channels in the destination image; if the parameter is 0, the number of the +channels is derived automatically from src and code. + +@see @ref imgproc_color_conversions + */ +CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0 ); + +/** @brief Converts an image from one color space to another where the source image is +stored in two planes. + +This function only supports YUV420 to RGB conversion as of now. + +@param src1: 8-bit image (#CV_8U) of the Y plane. +@param src2: image containing interleaved U/V plane. +@param dst: output image. +@param code: Specifies the type of conversion. It can take any of the following values: +- #COLOR_YUV2BGR_NV12 +- #COLOR_YUV2RGB_NV12 +- #COLOR_YUV2BGRA_NV12 +- #COLOR_YUV2RGBA_NV12 +- #COLOR_YUV2BGR_NV21 +- #COLOR_YUV2RGB_NV21 +- #COLOR_YUV2BGRA_NV21 +- #COLOR_YUV2RGBA_NV21 +*/ +CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code ); + +/** @brief main function for all demosaicing processes + +@param src input image: 8-bit unsigned or 16-bit unsigned. +@param dst output image of the same size and depth as src. +@param code Color space conversion code (see the description below). +@param dstCn number of channels in the destination image; if the parameter is 0, the number of the +channels is derived automatically from src and code. + +The function can do the following transformations: + +- Demosaicing using bilinear interpolation + + #COLOR_BayerBG2BGR , #COLOR_BayerGB2BGR , #COLOR_BayerRG2BGR , #COLOR_BayerGR2BGR + + #COLOR_BayerBG2GRAY , #COLOR_BayerGB2GRAY , #COLOR_BayerRG2GRAY , #COLOR_BayerGR2GRAY + +- Demosaicing using Variable Number of Gradients. + + #COLOR_BayerBG2BGR_VNG , #COLOR_BayerGB2BGR_VNG , #COLOR_BayerRG2BGR_VNG , #COLOR_BayerGR2BGR_VNG + +- Edge-Aware Demosaicing. + + #COLOR_BayerBG2BGR_EA , #COLOR_BayerGB2BGR_EA , #COLOR_BayerRG2BGR_EA , #COLOR_BayerGR2BGR_EA + +- Demosaicing with alpha channel + + #COLOR_BayerBG2BGRA , #COLOR_BayerGB2BGRA , #COLOR_BayerRG2BGRA , #COLOR_BayerGR2BGRA + +@sa cvtColor +*/ +CV_EXPORTS_W void demosaicing(InputArray src, OutputArray dst, int code, int dstCn = 0); + +//! @} imgproc_color_conversions + +//! @addtogroup imgproc_shape +//! @{ + +/** @brief Calculates all of the moments up to the third order of a polygon or rasterized shape. + +The function computes moments, up to the 3rd order, of a vector shape or a rasterized shape. The +results are returned in the structure cv::Moments. + +@param array Raster image (single-channel, 8-bit or floating-point 2D array) or an array ( +\f$1 \times N\f$ or \f$N \times 1\f$ ) of 2D points (Point or Point2f ). +@param binaryImage If it is true, all non-zero image pixels are treated as 1's. The parameter is +used for images only. +@returns moments. + +@note Only applicable to contour moments calculations from Python bindings: Note that the numpy +type for the input array should be either np.int32 or np.float32. + +@sa contourArea, arcLength + */ +CV_EXPORTS_W Moments moments( InputArray array, bool binaryImage = false ); + +/** @brief Calculates seven Hu invariants. + +The function calculates seven Hu invariants (introduced in @cite Hu62; see also +) defined as: + +\f[\begin{array}{l} hu[0]= \eta _{20}+ \eta _{02} \\ hu[1]=( \eta _{20}- \eta _{02})^{2}+4 \eta _{11}^{2} \\ hu[2]=( \eta _{30}-3 \eta _{12})^{2}+ (3 \eta _{21}- \eta _{03})^{2} \\ hu[3]=( \eta _{30}+ \eta _{12})^{2}+ ( \eta _{21}+ \eta _{03})^{2} \\ hu[4]=( \eta _{30}-3 \eta _{12})( \eta _{30}+ \eta _{12})[( \eta _{30}+ \eta _{12})^{2}-3( \eta _{21}+ \eta _{03})^{2}]+(3 \eta _{21}- \eta _{03})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}] \\ hu[5]=( \eta _{20}- \eta _{02})[( \eta _{30}+ \eta _{12})^{2}- ( \eta _{21}+ \eta _{03})^{2}]+4 \eta _{11}( \eta _{30}+ \eta _{12})( \eta _{21}+ \eta _{03}) \\ hu[6]=(3 \eta _{21}- \eta _{03})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}]-( \eta _{30}-3 \eta _{12})( \eta _{21}+ \eta _{03})[3( \eta _{30}+ \eta _{12})^{2}-( \eta _{21}+ \eta _{03})^{2}] \\ \end{array}\f] + +where \f$\eta_{ji}\f$ stands for \f$\texttt{Moments::nu}_{ji}\f$ . + +These values are proved to be invariants to the image scale, rotation, and reflection except the +seventh one, whose sign is changed by reflection. This invariance is proved with the assumption of +infinite image resolution. In case of raster images, the computed Hu invariants for the original and +transformed images are a bit different. + +@param moments Input moments computed with moments . +@param hu Output Hu invariants. + +@sa matchShapes + */ +CV_EXPORTS void HuMoments( const Moments& moments, double hu[7] ); + +/** @overload */ +CV_EXPORTS_W void HuMoments( const Moments& m, OutputArray hu ); + +//! @} imgproc_shape + +//! @addtogroup imgproc_object +//! @{ + +//! type of the template matching operation +enum TemplateMatchModes { + TM_SQDIFF = 0, /*!< \f[R(x,y)= \sum _{x',y'} (T(x',y')-I(x+x',y+y'))^2\f] + with mask: + \f[R(x,y)= \sum _{x',y'} \left( (T(x',y')-I(x+x',y+y')) \cdot + M(x',y') \right)^2\f] */ + TM_SQDIFF_NORMED = 1, /*!< \f[R(x,y)= \frac{\sum_{x',y'} (T(x',y')-I(x+x',y+y'))^2}{\sqrt{\sum_{ + x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}}\f] + with mask: + \f[R(x,y)= \frac{\sum _{x',y'} \left( (T(x',y')-I(x+x',y+y')) \cdot + M(x',y') \right)^2}{\sqrt{\sum_{x',y'} \left( T(x',y') \cdot + M(x',y') \right)^2 \cdot \sum_{x',y'} \left( I(x+x',y+y') \cdot + M(x',y') \right)^2}}\f] */ + TM_CCORR = 2, /*!< \f[R(x,y)= \sum _{x',y'} (T(x',y') \cdot I(x+x',y+y'))\f] + with mask: + \f[R(x,y)= \sum _{x',y'} (T(x',y') \cdot I(x+x',y+y') \cdot M(x',y') + ^2)\f] */ + TM_CCORR_NORMED = 3, /*!< \f[R(x,y)= \frac{\sum_{x',y'} (T(x',y') \cdot I(x+x',y+y'))}{\sqrt{ + \sum_{x',y'}T(x',y')^2 \cdot \sum_{x',y'} I(x+x',y+y')^2}}\f] + with mask: + \f[R(x,y)= \frac{\sum_{x',y'} (T(x',y') \cdot I(x+x',y+y') \cdot + M(x',y')^2)}{\sqrt{\sum_{x',y'} \left( T(x',y') \cdot M(x',y') + \right)^2 \cdot \sum_{x',y'} \left( I(x+x',y+y') \cdot M(x',y') + \right)^2}}\f] */ + TM_CCOEFF = 4, /*!< \f[R(x,y)= \sum _{x',y'} (T'(x',y') \cdot I'(x+x',y+y'))\f] + where + \f[\begin{array}{l} T'(x',y')=T(x',y') - 1/(w \cdot h) \cdot \sum _{ + x'',y''} T(x'',y'') \\ I'(x+x',y+y')=I(x+x',y+y') - 1/(w \cdot h) + \cdot \sum _{x'',y''} I(x+x'',y+y'') \end{array}\f] + with mask: + \f[\begin{array}{l} T'(x',y')=M(x',y') \cdot \left( T(x',y') - + \frac{1}{\sum _{x'',y''} M(x'',y'')} \cdot \sum _{x'',y''} + (T(x'',y'') \cdot M(x'',y'')) \right) \\ I'(x+x',y+y')=M(x',y') + \cdot \left( I(x+x',y+y') - \frac{1}{\sum _{x'',y''} M(x'',y'')} + \cdot \sum _{x'',y''} (I(x+x'',y+y'') \cdot M(x'',y'')) \right) + \end{array} \f] */ + TM_CCOEFF_NORMED = 5 /*!< \f[R(x,y)= \frac{ \sum_{x',y'} (T'(x',y') \cdot I'(x+x',y+y')) }{ + \sqrt{\sum_{x',y'}T'(x',y')^2 \cdot \sum_{x',y'} I'(x+x',y+y')^2} + }\f] */ +}; + +/** @example samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp +An example using Template Matching algorithm +*/ + +/** @brief Compares a template against overlapped image regions. + +The function slides through image , compares the overlapped patches of size \f$w \times h\f$ against +templ using the specified method and stores the comparison results in result . #TemplateMatchModes +describes the formulae for the available comparison methods ( \f$I\f$ denotes image, \f$T\f$ +template, \f$R\f$ result, \f$M\f$ the optional mask ). The summation is done over template and/or +the image patch: \f$x' = 0...w-1, y' = 0...h-1\f$ + +After the function finishes the comparison, the best matches can be found as global minimums (when +#TM_SQDIFF was used) or maximums (when #TM_CCORR or #TM_CCOEFF was used) using the +#minMaxLoc function. In case of a color image, template summation in the numerator and each sum in +the denominator is done over all of the channels and separate mean values are used for each channel. +That is, the function can take a color template and a color image. The result will still be a +single-channel image, which is easier to analyze. + +@param image Image where the search is running. It must be 8-bit or 32-bit floating-point. +@param templ Searched template. It must be not greater than the source image and have the same +data type. +@param result Map of comparison results. It must be single-channel 32-bit floating-point. If image +is \f$W \times H\f$ and templ is \f$w \times h\f$ , then result is \f$(W-w+1) \times (H-h+1)\f$ . +@param method Parameter specifying the comparison method, see #TemplateMatchModes +@param mask Optional mask. It must have the same size as templ. It must either have the same number + of channels as template or only one channel, which is then used for all template and + image channels. If the data type is #CV_8U, the mask is interpreted as a binary mask, + meaning only elements where mask is nonzero are used and are kept unchanged independent + of the actual mask value (weight equals 1). For data tpye #CV_32F, the mask values are + used as weights. The exact formulas are documented in #TemplateMatchModes. + */ +CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ, + OutputArray result, int method, InputArray mask = noArray() ); + +//! @} + +//! @addtogroup imgproc_shape +//! @{ + +/** @example samples/cpp/connected_components.cpp +This program demonstrates connected components and use of the trackbar +*/ + +/** @brief computes the connected components labeled image of boolean image + +image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0 +represents the background label. ltype specifies the output label image type, an important +consideration based on the total number of labels or alternatively the total number of pixels in +the source image. ccltype specifies the connected components labeling algorithm to use, currently +Bolelli (Spaghetti) @cite Bolelli2019, Grana (BBDT) @cite Grana2010 and Wu's (SAUF) @cite Wu2009 algorithms +are supported, see the #ConnectedComponentsAlgorithmsTypes for details. Note that SAUF algorithm forces +a row major ordering of labels while Spaghetti and BBDT do not. +This function uses parallel version of the algorithms if at least one allowed +parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs. + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +@param ccltype connected components algorithm type (see the #ConnectedComponentsAlgorithmsTypes). +*/ +CV_EXPORTS_AS(connectedComponentsWithAlgorithm) int connectedComponents(InputArray image, OutputArray labels, + int connectivity, int ltype, int ccltype); + + +/** @overload + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +*/ +CV_EXPORTS_W int connectedComponents(InputArray image, OutputArray labels, + int connectivity = 8, int ltype = CV_32S); + + +/** @brief computes the connected components labeled image of boolean image and also produces a statistics output for each label + +image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0 +represents the background label. ltype specifies the output label image type, an important +consideration based on the total number of labels or alternatively the total number of pixels in +the source image. ccltype specifies the connected components labeling algorithm to use, currently +Bolelli (Spaghetti) @cite Bolelli2019, Grana (BBDT) @cite Grana2010 and Wu's (SAUF) @cite Wu2009 algorithms +are supported, see the #ConnectedComponentsAlgorithmsTypes for details. Note that SAUF algorithm forces +a row major ordering of labels while Spaghetti and BBDT do not. +This function uses parallel version of the algorithms (statistics included) if at least one allowed +parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs. + +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param stats statistics output for each label, including the background label. +Statistics are accessed via stats(label, COLUMN) where COLUMN is one of +#ConnectedComponentsTypes, selecting the statistic. The data type is CV_32S. +@param centroids centroid output for each label, including the background label. Centroids are +accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F. +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +@param ccltype connected components algorithm type (see #ConnectedComponentsAlgorithmsTypes). +*/ +CV_EXPORTS_AS(connectedComponentsWithStatsWithAlgorithm) int connectedComponentsWithStats(InputArray image, OutputArray labels, + OutputArray stats, OutputArray centroids, + int connectivity, int ltype, int ccltype); + +/** @overload +@param image the 8-bit single-channel image to be labeled +@param labels destination labeled image +@param stats statistics output for each label, including the background label. +Statistics are accessed via stats(label, COLUMN) where COLUMN is one of +#ConnectedComponentsTypes, selecting the statistic. The data type is CV_32S. +@param centroids centroid output for each label, including the background label. Centroids are +accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F. +@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively +@param ltype output image label type. Currently CV_32S and CV_16U are supported. +*/ +CV_EXPORTS_W int connectedComponentsWithStats(InputArray image, OutputArray labels, + OutputArray stats, OutputArray centroids, + int connectivity = 8, int ltype = CV_32S); + + +/** @brief Finds contours in a binary image. + +The function retrieves contours from the binary image using the algorithm @cite Suzuki85 . The contours +are a useful tool for shape analysis and object detection and recognition. See squares.cpp in the +OpenCV sample directory. +@note Since opencv 3.2 source image is not modified by this function. + +@param image Source, an 8-bit single-channel image. Non-zero pixels are treated as 1's. Zero +pixels remain 0's, so the image is treated as binary . You can use #compare, #inRange, #threshold , +#adaptiveThreshold, #Canny, and others to create a binary image out of a grayscale or color one. +If mode equals to #RETR_CCOMP or #RETR_FLOODFILL, the input can also be a 32-bit integer image of labels (CV_32SC1). +@param contours Detected contours. Each contour is stored as a vector of points (e.g. +std::vector >). +@param hierarchy Optional output vector (e.g. std::vector), containing information about the image topology. It has +as many elements as the number of contours. For each i-th contour contours[i], the elements +hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are set to 0-based indices +in contours of the next and previous contours at the same hierarchical level, the first child +contour and the parent contour, respectively. If for the contour i there are no next, previous, +parent, or nested contours, the corresponding elements of hierarchy[i] will be negative. +@note In Python, hierarchy is nested inside a top level array. Use hierarchy[0][i] to access hierarchical elements of i-th contour. +@param mode Contour retrieval mode, see #RetrievalModes +@param method Contour approximation method, see #ContourApproximationModes +@param offset Optional offset by which every contour point is shifted. This is useful if the +contours are extracted from the image ROI and then they should be analyzed in the whole image +context. + */ +CV_EXPORTS_W void findContours( InputArray image, OutputArrayOfArrays contours, + OutputArray hierarchy, int mode, + int method, Point offset = Point()); + +/** @overload */ +CV_EXPORTS void findContours( InputArray image, OutputArrayOfArrays contours, + int mode, int method, Point offset = Point()); + +/** @example samples/cpp/squares.cpp +A program using pyramid scaling, Canny, contours and contour simplification to find +squares in a list of images (pic1-6.png). Returns sequence of squares detected on the image. +*/ + +/** @example samples/tapi/squares.cpp +A program using pyramid scaling, Canny, contours and contour simplification to find +squares in the input image. +*/ + +/** @brief Approximates a polygonal curve(s) with the specified precision. + +The function cv::approxPolyDP approximates a curve or a polygon with another curve/polygon with less +vertices so that the distance between them is less or equal to the specified precision. It uses the +Douglas-Peucker algorithm + +@param curve Input vector of a 2D point stored in std::vector or Mat +@param approxCurve Result of the approximation. The type should match the type of the input curve. +@param epsilon Parameter specifying the approximation accuracy. This is the maximum distance +between the original curve and its approximation. +@param closed If true, the approximated curve is closed (its first and last vertices are +connected). Otherwise, it is not closed. + */ +CV_EXPORTS_W void approxPolyDP( InputArray curve, + OutputArray approxCurve, + double epsilon, bool closed ); + +/** @brief Calculates a contour perimeter or a curve length. + +The function computes a curve length or a closed contour perimeter. + +@param curve Input vector of 2D points, stored in std::vector or Mat. +@param closed Flag indicating whether the curve is closed or not. + */ +CV_EXPORTS_W double arcLength( InputArray curve, bool closed ); + +/** @brief Calculates the up-right bounding rectangle of a point set or non-zero pixels of gray-scale image. + +The function calculates and returns the minimal up-right bounding rectangle for the specified point set or +non-zero pixels of gray-scale image. + +@param array Input gray-scale image or 2D point set, stored in std::vector or Mat. + */ +CV_EXPORTS_W Rect boundingRect( InputArray array ); + +/** @brief Calculates a contour area. + +The function computes a contour area. Similarly to moments , the area is computed using the Green +formula. Thus, the returned area and the number of non-zero pixels, if you draw the contour using +#drawContours or #fillPoly , can be different. Also, the function will most certainly give a wrong +results for contours with self-intersections. + +Example: +@code + vector contour; + contour.push_back(Point2f(0, 0)); + contour.push_back(Point2f(10, 0)); + contour.push_back(Point2f(10, 10)); + contour.push_back(Point2f(5, 4)); + + double area0 = contourArea(contour); + vector approx; + approxPolyDP(contour, approx, 5, true); + double area1 = contourArea(approx); + + cout << "area0 =" << area0 << endl << + "area1 =" << area1 << endl << + "approx poly vertices" << approx.size() << endl; +@endcode +@param contour Input vector of 2D points (contour vertices), stored in std::vector or Mat. +@param oriented Oriented area flag. If it is true, the function returns a signed area value, +depending on the contour orientation (clockwise or counter-clockwise). Using this feature you can +determine orientation of a contour by taking the sign of an area. By default, the parameter is +false, which means that the absolute value is returned. + */ +CV_EXPORTS_W double contourArea( InputArray contour, bool oriented = false ); + +/** @brief Finds a rotated rectangle of the minimum area enclosing the input 2D point set. + +The function calculates and returns the minimum-area bounding rectangle (possibly rotated) for a +specified point set. Developer should keep in mind that the returned RotatedRect can contain negative +indices when data is close to the containing Mat element boundary. + +@param points Input vector of 2D points, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect minAreaRect( InputArray points ); + +/** @brief Finds the four vertices of a rotated rect. Useful to draw the rotated rectangle. + +The function finds the four vertices of a rotated rectangle. This function is useful to draw the +rectangle. In C++, instead of using this function, you can directly use RotatedRect::points method. Please +visit the @ref tutorial_bounding_rotated_ellipses "tutorial on Creating Bounding rotated boxes and ellipses for contours" for more information. + +@param box The input rotated rectangle. It may be the output of +@param points The output array of four vertices of rectangles. + */ +CV_EXPORTS_W void boxPoints(RotatedRect box, OutputArray points); + +/** @brief Finds a circle of the minimum area enclosing a 2D point set. + +The function finds the minimal enclosing circle of a 2D point set using an iterative algorithm. + +@param points Input vector of 2D points, stored in std::vector\<\> or Mat +@param center Output center of the circle. +@param radius Output radius of the circle. + */ +CV_EXPORTS_W void minEnclosingCircle( InputArray points, + CV_OUT Point2f& center, CV_OUT float& radius ); + +/** @example samples/cpp/minarea.cpp +*/ + +/** @brief Finds a triangle of minimum area enclosing a 2D point set and returns its area. + +The function finds a triangle of minimum area enclosing the given set of 2D points and returns its +area. The output for a given 2D point set is shown in the image below. 2D points are depicted in +*red* and the enclosing triangle in *yellow*. + +![Sample output of the minimum enclosing triangle function](pics/minenclosingtriangle.png) + +The implementation of the algorithm is based on O'Rourke's @cite ORourke86 and Klee and Laskowski's +@cite KleeLaskowski85 papers. O'Rourke provides a \f$\theta(n)\f$ algorithm for finding the minimal +enclosing triangle of a 2D convex polygon with n vertices. Since the #minEnclosingTriangle function +takes a 2D point set as input an additional preprocessing step of computing the convex hull of the +2D point set is required. The complexity of the #convexHull function is \f$O(n log(n))\f$ which is higher +than \f$\theta(n)\f$. Thus the overall complexity of the function is \f$O(n log(n))\f$. + +@param points Input vector of 2D points with depth CV_32S or CV_32F, stored in std::vector\<\> or Mat +@param triangle Output vector of three 2D points defining the vertices of the triangle. The depth +of the OutputArray must be CV_32F. + */ +CV_EXPORTS_W double minEnclosingTriangle( InputArray points, CV_OUT OutputArray triangle ); + +/** @brief Compares two shapes. + +The function compares two shapes. All three implemented methods use the Hu invariants (see #HuMoments) + +@param contour1 First contour or grayscale image. +@param contour2 Second contour or grayscale image. +@param method Comparison method, see #ShapeMatchModes +@param parameter Method-specific parameter (not supported now). + */ +CV_EXPORTS_W double matchShapes( InputArray contour1, InputArray contour2, + int method, double parameter ); + +/** @example samples/cpp/convexhull.cpp +An example using the convexHull functionality +*/ + +/** @brief Finds the convex hull of a point set. + +The function cv::convexHull finds the convex hull of a 2D point set using the Sklansky's algorithm @cite Sklansky82 +that has *O(N logN)* complexity in the current implementation. + +@param points Input 2D point set, stored in std::vector or Mat. +@param hull Output convex hull. It is either an integer vector of indices or vector of points. In +the first case, the hull elements are 0-based indices of the convex hull points in the original +array (since the set of convex hull points is a subset of the original point set). In the second +case, hull elements are the convex hull points themselves. +@param clockwise Orientation flag. If it is true, the output convex hull is oriented clockwise. +Otherwise, it is oriented counter-clockwise. The assumed coordinate system has its X axis pointing +to the right, and its Y axis pointing upwards. +@param returnPoints Operation flag. In case of a matrix, when the flag is true, the function +returns convex hull points. Otherwise, it returns indices of the convex hull points. When the +output array is std::vector, the flag is ignored, and the output depends on the type of the +vector: std::vector\ implies returnPoints=false, std::vector\ implies +returnPoints=true. + +@note `points` and `hull` should be different arrays, inplace processing isn't supported. + +Check @ref tutorial_hull "the corresponding tutorial" for more details. + +useful links: + +https://www.learnopencv.com/convex-hull-using-opencv-in-python-and-c/ + */ +CV_EXPORTS_W void convexHull( InputArray points, OutputArray hull, + bool clockwise = false, bool returnPoints = true ); + +/** @brief Finds the convexity defects of a contour. + +The figure below displays convexity defects of a hand contour: + +![image](pics/defects.png) + +@param contour Input contour. +@param convexhull Convex hull obtained using convexHull that should contain indices of the contour +points that make the hull. +@param convexityDefects The output vector of convexity defects. In C++ and the new Python/Java +interface each convexity defect is represented as 4-element integer vector (a.k.a. #Vec4i): +(start_index, end_index, farthest_pt_index, fixpt_depth), where indices are 0-based indices +in the original contour of the convexity defect beginning, end and the farthest point, and +fixpt_depth is fixed-point approximation (with 8 fractional bits) of the distance between the +farthest contour point and the hull. That is, to get the floating-point value of the depth will be +fixpt_depth/256.0. + */ +CV_EXPORTS_W void convexityDefects( InputArray contour, InputArray convexhull, OutputArray convexityDefects ); + +/** @brief Tests a contour convexity. + +The function tests whether the input contour is convex or not. The contour must be simple, that is, +without self-intersections. Otherwise, the function output is undefined. + +@param contour Input vector of 2D points, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W bool isContourConvex( InputArray contour ); + +/** @example samples/cpp/intersectExample.cpp +Examples of how intersectConvexConvex works +*/ + +/** @brief Finds intersection of two convex polygons + +@param p1 First polygon +@param p2 Second polygon +@param p12 Output polygon describing the intersecting area +@param handleNested When true, an intersection is found if one of the polygons is fully enclosed in the other. +When false, no intersection is found. If the polygons share a side or the vertex of one polygon lies on an edge +of the other, they are not considered nested and an intersection will be found regardless of the value of handleNested. + +@returns Absolute value of area of intersecting polygon + +@note intersectConvexConvex doesn't confirm that both polygons are convex and will return invalid results if they aren't. + */ +CV_EXPORTS_W float intersectConvexConvex( InputArray p1, InputArray p2, + OutputArray p12, bool handleNested = true ); + +/** @example samples/cpp/fitellipse.cpp +An example using the fitEllipse technique +*/ + +/** @brief Fits an ellipse around a set of 2D points. + +The function calculates the ellipse that fits (in a least-squares sense) a set of 2D points best of +all. It returns the rotated rectangle in which the ellipse is inscribed. The first algorithm described by @cite Fitzgibbon95 +is used. Developer should keep in mind that it is possible that the returned +ellipse/rotatedRect data contains negative indices, due to the data points being close to the +border of the containing Mat element. + +@param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipse( InputArray points ); + +/** @brief Fits an ellipse around a set of 2D points. + + The function calculates the ellipse that fits a set of 2D points. + It returns the rotated rectangle in which the ellipse is inscribed. + The Approximate Mean Square (AMS) proposed by @cite Taubin1991 is used. + + For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$, + which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$. + However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$, + the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines, + quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits. + If the fit is found to be a parabolic or hyperbolic function then the standard #fitEllipse method is used. + The AMS method restricts the fit to parabolic, hyperbolic and elliptical curves + by imposing the condition that \f$ A^T ( D_x^T D_x + D_y^T D_y) A = 1 \f$ where + the matrices \f$ Dx \f$ and \f$ Dy \f$ are the partial derivatives of the design matrix \f$ D \f$ with + respect to x and y. The matrices are formed row by row applying the following to + each of the points in the set: + \f{align*}{ + D(i,:)&=\left\{x_i^2, x_i y_i, y_i^2, x_i, y_i, 1\right\} & + D_x(i,:)&=\left\{2 x_i,y_i,0,1,0,0\right\} & + D_y(i,:)&=\left\{0,x_i,2 y_i,0,1,0\right\} + \f} + The AMS method minimizes the cost function + \f{equation*}{ + \epsilon ^2=\frac{ A^T D^T D A }{ A^T (D_x^T D_x + D_y^T D_y) A^T } + \f} + + The minimum cost is found by solving the generalized eigenvalue problem. + + \f{equation*}{ + D^T D A = \lambda \left( D_x^T D_x + D_y^T D_y\right) A + \f} + + @param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipseAMS( InputArray points ); + + +/** @brief Fits an ellipse around a set of 2D points. + + The function calculates the ellipse that fits a set of 2D points. + It returns the rotated rectangle in which the ellipse is inscribed. + The Direct least square (Direct) method by @cite Fitzgibbon1999 is used. + + For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$, + which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$. + However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$, + the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines, + quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits. + The Direct method confines the fit to ellipses by ensuring that \f$ 4 A_{xx} A_{yy}- A_{xy}^2 > 0 \f$. + The condition imposed is that \f$ 4 A_{xx} A_{yy}- A_{xy}^2=1 \f$ which satisfies the inequality + and as the coefficients can be arbitrarily scaled is not overly restrictive. + + \f{equation*}{ + \epsilon ^2= A^T D^T D A \quad \text{with} \quad A^T C A =1 \quad \text{and} \quad C=\left(\begin{matrix} + 0 & 0 & 2 & 0 & 0 & 0 \\ + 0 & -1 & 0 & 0 & 0 & 0 \\ + 2 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 + \end{matrix} \right) + \f} + + The minimum cost is found by solving the generalized eigenvalue problem. + + \f{equation*}{ + D^T D A = \lambda \left( C\right) A + \f} + + The system produces only one positive eigenvalue \f$ \lambda\f$ which is chosen as the solution + with its eigenvector \f$\mathbf{u}\f$. These are used to find the coefficients + + \f{equation*}{ + A = \sqrt{\frac{1}{\mathbf{u}^T C \mathbf{u}}} \mathbf{u} + \f} + The scaling factor guarantees that \f$A^T C A =1\f$. + + @param points Input 2D point set, stored in std::vector\<\> or Mat + */ +CV_EXPORTS_W RotatedRect fitEllipseDirect( InputArray points ); + +/** @brief Fits a line to a 2D or 3D point set. + +The function fitLine fits a line to a 2D or 3D point set by minimizing \f$\sum_i \rho(r_i)\f$ where +\f$r_i\f$ is a distance between the \f$i^{th}\f$ point, the line and \f$\rho(r)\f$ is a distance function, one +of the following: +- DIST_L2 +\f[\rho (r) = r^2/2 \quad \text{(the simplest and the fastest least-squares method)}\f] +- DIST_L1 +\f[\rho (r) = r\f] +- DIST_L12 +\f[\rho (r) = 2 \cdot ( \sqrt{1 + \frac{r^2}{2}} - 1)\f] +- DIST_FAIR +\f[\rho \left (r \right ) = C^2 \cdot \left ( \frac{r}{C} - \log{\left(1 + \frac{r}{C}\right)} \right ) \quad \text{where} \quad C=1.3998\f] +- DIST_WELSCH +\f[\rho \left (r \right ) = \frac{C^2}{2} \cdot \left ( 1 - \exp{\left(-\left(\frac{r}{C}\right)^2\right)} \right ) \quad \text{where} \quad C=2.9846\f] +- DIST_HUBER +\f[\rho (r) = \fork{r^2/2}{if \(r < C\)}{C \cdot (r-C/2)}{otherwise} \quad \text{where} \quad C=1.345\f] + +The algorithm is based on the M-estimator ( ) technique +that iteratively fits the line using the weighted least-squares algorithm. After each iteration the +weights \f$w_i\f$ are adjusted to be inversely proportional to \f$\rho(r_i)\f$ . + +@param points Input vector of 2D or 3D points, stored in std::vector\<\> or Mat. +@param line Output line parameters. In case of 2D fitting, it should be a vector of 4 elements +(like Vec4f) - (vx, vy, x0, y0), where (vx, vy) is a normalized vector collinear to the line and +(x0, y0) is a point on the line. In case of 3D fitting, it should be a vector of 6 elements (like +Vec6f) - (vx, vy, vz, x0, y0, z0), where (vx, vy, vz) is a normalized vector collinear to the line +and (x0, y0, z0) is a point on the line. +@param distType Distance used by the M-estimator, see #DistanceTypes +@param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value +is chosen. +@param reps Sufficient accuracy for the radius (distance between the coordinate origin and the line). +@param aeps Sufficient accuracy for the angle. 0.01 would be a good default value for reps and aeps. + */ +CV_EXPORTS_W void fitLine( InputArray points, OutputArray line, int distType, + double param, double reps, double aeps ); + +/** @brief Performs a point-in-contour test. + +The function determines whether the point is inside a contour, outside, or lies on an edge (or +coincides with a vertex). It returns positive (inside), negative (outside), or zero (on an edge) +value, correspondingly. When measureDist=false , the return value is +1, -1, and 0, respectively. +Otherwise, the return value is a signed distance between the point and the nearest contour edge. + +See below a sample output of the function where each image pixel is tested against the contour: + +![sample output](pics/pointpolygon.png) + +@param contour Input contour. +@param pt Point tested against the contour. +@param measureDist If true, the function estimates the signed distance from the point to the +nearest contour edge. Otherwise, the function only checks if the point is inside a contour or not. + */ +CV_EXPORTS_W double pointPolygonTest( InputArray contour, Point2f pt, bool measureDist ); + +/** @brief Finds out if there is any intersection between two rotated rectangles. + +If there is then the vertices of the intersecting region are returned as well. + +Below are some examples of intersection configurations. The hatched pattern indicates the +intersecting region and the red vertices are returned by the function. + +![intersection examples](pics/intersection.png) + +@param rect1 First rectangle +@param rect2 Second rectangle +@param intersectingRegion The output array of the vertices of the intersecting region. It returns +at most 8 vertices. Stored as std::vector\ or cv::Mat as Mx1 of type CV_32FC2. +@returns One of #RectanglesIntersectTypes + */ +CV_EXPORTS_W int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& rect2, OutputArray intersectingRegion ); + +/** @brief Creates a smart pointer to a cv::GeneralizedHoughBallard class and initializes it. +*/ +CV_EXPORTS_W Ptr createGeneralizedHoughBallard(); + +/** @brief Creates a smart pointer to a cv::GeneralizedHoughGuil class and initializes it. +*/ +CV_EXPORTS_W Ptr createGeneralizedHoughGuil(); + +//! @} imgproc_shape + +//! @addtogroup imgproc_colormap +//! @{ + +//! GNU Octave/MATLAB equivalent colormaps +enum ColormapTypes +{ + COLORMAP_AUTUMN = 0, //!< ![autumn](pics/colormaps/colorscale_autumn.jpg) + COLORMAP_BONE = 1, //!< ![bone](pics/colormaps/colorscale_bone.jpg) + COLORMAP_JET = 2, //!< ![jet](pics/colormaps/colorscale_jet.jpg) + COLORMAP_WINTER = 3, //!< ![winter](pics/colormaps/colorscale_winter.jpg) + COLORMAP_RAINBOW = 4, //!< ![rainbow](pics/colormaps/colorscale_rainbow.jpg) + COLORMAP_OCEAN = 5, //!< ![ocean](pics/colormaps/colorscale_ocean.jpg) + COLORMAP_SUMMER = 6, //!< ![summer](pics/colormaps/colorscale_summer.jpg) + COLORMAP_SPRING = 7, //!< ![spring](pics/colormaps/colorscale_spring.jpg) + COLORMAP_COOL = 8, //!< ![cool](pics/colormaps/colorscale_cool.jpg) + COLORMAP_HSV = 9, //!< ![HSV](pics/colormaps/colorscale_hsv.jpg) + COLORMAP_PINK = 10, //!< ![pink](pics/colormaps/colorscale_pink.jpg) + COLORMAP_HOT = 11, //!< ![hot](pics/colormaps/colorscale_hot.jpg) + COLORMAP_PARULA = 12, //!< ![parula](pics/colormaps/colorscale_parula.jpg) + COLORMAP_MAGMA = 13, //!< ![magma](pics/colormaps/colorscale_magma.jpg) + COLORMAP_INFERNO = 14, //!< ![inferno](pics/colormaps/colorscale_inferno.jpg) + COLORMAP_PLASMA = 15, //!< ![plasma](pics/colormaps/colorscale_plasma.jpg) + COLORMAP_VIRIDIS = 16, //!< ![viridis](pics/colormaps/colorscale_viridis.jpg) + COLORMAP_CIVIDIS = 17, //!< ![cividis](pics/colormaps/colorscale_cividis.jpg) + COLORMAP_TWILIGHT = 18, //!< ![twilight](pics/colormaps/colorscale_twilight.jpg) + COLORMAP_TWILIGHT_SHIFTED = 19, //!< ![twilight shifted](pics/colormaps/colorscale_twilight_shifted.jpg) + COLORMAP_TURBO = 20, //!< ![turbo](pics/colormaps/colorscale_turbo.jpg) + COLORMAP_DEEPGREEN = 21 //!< ![deepgreen](pics/colormaps/colorscale_deepgreen.jpg) +}; + +/** @example samples/cpp/falsecolor.cpp +An example using applyColorMap function +*/ + +/** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image. + +@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. +@param dst The result is the colormapped source image. Note: Mat::create is called on dst. +@param colormap The colormap to apply, see #ColormapTypes +*/ +CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap); + +/** @brief Applies a user colormap on a given image. + +@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3. +@param dst The result is the colormapped source image. Note: Mat::create is called on dst. +@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256 +*/ +CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor); + +//! @} imgproc_colormap + +//! @addtogroup imgproc_draw +//! @{ + + +/** OpenCV color channel order is BGR[A] */ +#define CV_RGB(r, g, b) cv::Scalar((b), (g), (r), 0) + +/** @brief Draws a line segment connecting two points. + +The function line draws the line segment between pt1 and pt2 points in the image. The line is +clipped by the image boundaries. For non-antialiased lines with integer coordinates, the 8-connected +or 4-connected Bresenham algorithm is used. Thick lines are drawn with rounding endings. Antialiased +lines are drawn using Gaussian filtering. + +@param img Image. +@param pt1 First point of the line segment. +@param pt2 Second point of the line segment. +@param color Line color. +@param thickness Line thickness. +@param lineType Type of the line. See #LineTypes. +@param shift Number of fractional bits in the point coordinates. + */ +CV_EXPORTS_W void line(InputOutputArray img, Point pt1, Point pt2, const Scalar& color, + int thickness = 1, int lineType = LINE_8, int shift = 0); + +/** @brief Draws an arrow segment pointing from the first point to the second one. + +The function cv::arrowedLine draws an arrow between pt1 and pt2 points in the image. See also #line. + +@param img Image. +@param pt1 The point the arrow starts from. +@param pt2 The point the arrow points to. +@param color Line color. +@param thickness Line thickness. +@param line_type Type of the line. See #LineTypes +@param shift Number of fractional bits in the point coordinates. +@param tipLength The length of the arrow tip in relation to the arrow length + */ +CV_EXPORTS_W void arrowedLine(InputOutputArray img, Point pt1, Point pt2, const Scalar& color, + int thickness=1, int line_type=8, int shift=0, double tipLength=0.1); + +/** @brief Draws a simple, thick, or filled up-right rectangle. + +The function cv::rectangle draws a rectangle outline or a filled rectangle whose two opposite corners +are pt1 and pt2. + +@param img Image. +@param pt1 Vertex of the rectangle. +@param pt2 Vertex of the rectangle opposite to pt1 . +@param color Rectangle color or brightness (grayscale image). +@param thickness Thickness of lines that make up the rectangle. Negative values, like #FILLED, +mean that the function has to draw a filled rectangle. +@param lineType Type of the line. See #LineTypes +@param shift Number of fractional bits in the point coordinates. + */ +CV_EXPORTS_W void rectangle(InputOutputArray img, Point pt1, Point pt2, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @overload + +use `rec` parameter as alternative specification of the drawn rectangle: `r.tl() and +r.br()-Point(1,1)` are opposite corners +*/ +CV_EXPORTS_W void rectangle(InputOutputArray img, Rect rec, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_2.cpp +An example using drawing functions +*/ + +/** @brief Draws a circle. + +The function cv::circle draws a simple or filled circle with a given center and radius. +@param img Image where the circle is drawn. +@param center Center of the circle. +@param radius Radius of the circle. +@param color Circle color. +@param thickness Thickness of the circle outline, if positive. Negative values, like #FILLED, +mean that a filled circle is to be drawn. +@param lineType Type of the circle boundary. See #LineTypes +@param shift Number of fractional bits in the coordinates of the center and in the radius value. + */ +CV_EXPORTS_W void circle(InputOutputArray img, Point center, int radius, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @brief Draws a simple or thick elliptic arc or fills an ellipse sector. + +The function cv::ellipse with more parameters draws an ellipse outline, a filled ellipse, an elliptic +arc, or a filled ellipse sector. The drawing code uses general parametric form. +A piecewise-linear curve is used to approximate the elliptic arc +boundary. If you need more control of the ellipse rendering, you can retrieve the curve using +#ellipse2Poly and then render it with #polylines or fill it with #fillPoly. If you use the first +variant of the function and want to draw the whole ellipse, not an arc, pass `startAngle=0` and +`endAngle=360`. If `startAngle` is greater than `endAngle`, they are swapped. The figure below explains +the meaning of the parameters to draw the blue arc. + +![Parameters of Elliptic Arc](pics/ellipse.svg) + +@param img Image. +@param center Center of the ellipse. +@param axes Half of the size of the ellipse main axes. +@param angle Ellipse rotation angle in degrees. +@param startAngle Starting angle of the elliptic arc in degrees. +@param endAngle Ending angle of the elliptic arc in degrees. +@param color Ellipse color. +@param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that +a filled ellipse sector is to be drawn. +@param lineType Type of the ellipse boundary. See #LineTypes +@param shift Number of fractional bits in the coordinates of the center and values of axes. + */ +CV_EXPORTS_W void ellipse(InputOutputArray img, Point center, Size axes, + double angle, double startAngle, double endAngle, + const Scalar& color, int thickness = 1, + int lineType = LINE_8, int shift = 0); + +/** @overload +@param img Image. +@param box Alternative ellipse representation via RotatedRect. This means that the function draws +an ellipse inscribed in the rotated rectangle. +@param color Ellipse color. +@param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that +a filled ellipse sector is to be drawn. +@param lineType Type of the ellipse boundary. See #LineTypes +*/ +CV_EXPORTS_W void ellipse(InputOutputArray img, const RotatedRect& box, const Scalar& color, + int thickness = 1, int lineType = LINE_8); + +/* ----------------------------------------------------------------------------------------- */ +/* ADDING A SET OF PREDEFINED MARKERS WHICH COULD BE USED TO HIGHLIGHT POSITIONS IN AN IMAGE */ +/* ----------------------------------------------------------------------------------------- */ + +/** @brief Draws a marker on a predefined position in an image. + +The function cv::drawMarker draws a marker on a given position in the image. For the moment several +marker types are supported, see #MarkerTypes for more information. + +@param img Image. +@param position The point where the crosshair is positioned. +@param color Line color. +@param markerType The specific type of marker you want to use, see #MarkerTypes +@param thickness Line thickness. +@param line_type Type of the line, See #LineTypes +@param markerSize The length of the marker axis [default = 20 pixels] + */ +CV_EXPORTS_W void drawMarker(InputOutputArray img, Point position, const Scalar& color, + int markerType = MARKER_CROSS, int markerSize=20, int thickness=1, + int line_type=8); + +/* ----------------------------------------------------------------------------------------- */ +/* END OF MARKER SECTION */ +/* ----------------------------------------------------------------------------------------- */ + +/** @brief Fills a convex polygon. + +The function cv::fillConvexPoly draws a filled convex polygon. This function is much faster than the +function #fillPoly . It can fill not only convex polygons but any monotonic polygon without +self-intersections, that is, a polygon whose contour intersects every horizontal line (scan line) +twice at the most (though, its top-most and/or the bottom edge could be horizontal). + +@param img Image. +@param points Polygon vertices. +@param color Polygon color. +@param lineType Type of the polygon boundaries. See #LineTypes +@param shift Number of fractional bits in the vertex coordinates. + */ +CV_EXPORTS_W void fillConvexPoly(InputOutputArray img, InputArray points, + const Scalar& color, int lineType = LINE_8, + int shift = 0); + +/** @overload */ +CV_EXPORTS void fillConvexPoly(InputOutputArray img, const Point* pts, int npts, + const Scalar& color, int lineType = LINE_8, + int shift = 0); + +/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_1.cpp +An example using drawing functions +Check @ref tutorial_random_generator_and_text "the corresponding tutorial" for more details +*/ + +/** @brief Fills the area bounded by one or more polygons. + +The function cv::fillPoly fills an area bounded by several polygonal contours. The function can fill +complex areas, for example, areas with holes, contours with self-intersections (some of their +parts), and so forth. + +@param img Image. +@param pts Array of polygons where each polygon is represented as an array of points. +@param color Polygon color. +@param lineType Type of the polygon boundaries. See #LineTypes +@param shift Number of fractional bits in the vertex coordinates. +@param offset Optional offset of all points of the contours. + */ +CV_EXPORTS_W void fillPoly(InputOutputArray img, InputArrayOfArrays pts, + const Scalar& color, int lineType = LINE_8, int shift = 0, + Point offset = Point() ); + +/** @overload */ +CV_EXPORTS void fillPoly(InputOutputArray img, const Point** pts, + const int* npts, int ncontours, + const Scalar& color, int lineType = LINE_8, int shift = 0, + Point offset = Point() ); + +/** @brief Draws several polygonal curves. + +@param img Image. +@param pts Array of polygonal curves. +@param isClosed Flag indicating whether the drawn polylines are closed or not. If they are closed, +the function draws a line from the last vertex of each curve to its first vertex. +@param color Polyline color. +@param thickness Thickness of the polyline edges. +@param lineType Type of the line segments. See #LineTypes +@param shift Number of fractional bits in the vertex coordinates. + +The function cv::polylines draws one or more polygonal curves. + */ +CV_EXPORTS_W void polylines(InputOutputArray img, InputArrayOfArrays pts, + bool isClosed, const Scalar& color, + int thickness = 1, int lineType = LINE_8, int shift = 0 ); + +/** @overload */ +CV_EXPORTS void polylines(InputOutputArray img, const Point* const* pts, const int* npts, + int ncontours, bool isClosed, const Scalar& color, + int thickness = 1, int lineType = LINE_8, int shift = 0 ); + +/** @example samples/cpp/contours2.cpp +An example program illustrates the use of cv::findContours and cv::drawContours +\image html WindowsQtContoursOutput.png "Screenshot of the program" +*/ + +/** @example samples/cpp/segment_objects.cpp +An example using drawContours to clean up a background segmentation result +*/ + +/** @brief Draws contours outlines or filled contours. + +The function draws contour outlines in the image if \f$\texttt{thickness} \ge 0\f$ or fills the area +bounded by the contours if \f$\texttt{thickness}<0\f$ . The example below shows how to retrieve +connected components from the binary image and label them: : +@include snippets/imgproc_drawContours.cpp + +@param image Destination image. +@param contours All the input contours. Each contour is stored as a point vector. +@param contourIdx Parameter indicating a contour to draw. If it is negative, all the contours are drawn. +@param color Color of the contours. +@param thickness Thickness of lines the contours are drawn with. If it is negative (for example, +thickness=#FILLED ), the contour interiors are drawn. +@param lineType Line connectivity. See #LineTypes +@param hierarchy Optional information about hierarchy. It is only needed if you want to draw only +some of the contours (see maxLevel ). +@param maxLevel Maximal level for drawn contours. If it is 0, only the specified contour is drawn. +If it is 1, the function draws the contour(s) and all the nested contours. If it is 2, the function +draws the contours, all the nested contours, all the nested-to-nested contours, and so on. This +parameter is only taken into account when there is hierarchy available. +@param offset Optional contour shift parameter. Shift all the drawn contours by the specified +\f$\texttt{offset}=(dx,dy)\f$ . +@note When thickness=#FILLED, the function is designed to handle connected components with holes correctly +even when no hierarchy data is provided. This is done by analyzing all the outlines together +using even-odd rule. This may give incorrect results if you have a joint collection of separately retrieved +contours. In order to solve this problem, you need to call #drawContours separately for each sub-group +of contours, or iterate over the collection using contourIdx parameter. + */ +CV_EXPORTS_W void drawContours( InputOutputArray image, InputArrayOfArrays contours, + int contourIdx, const Scalar& color, + int thickness = 1, int lineType = LINE_8, + InputArray hierarchy = noArray(), + int maxLevel = INT_MAX, Point offset = Point() ); + +/** @brief Clips the line against the image rectangle. + +The function cv::clipLine calculates a part of the line segment that is entirely within the specified +rectangle. It returns false if the line segment is completely outside the rectangle. Otherwise, +it returns true . +@param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) . +@param pt1 First line point. +@param pt2 Second line point. + */ +CV_EXPORTS bool clipLine(Size imgSize, CV_IN_OUT Point& pt1, CV_IN_OUT Point& pt2); + +/** @overload +@param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) . +@param pt1 First line point. +@param pt2 Second line point. +*/ +CV_EXPORTS bool clipLine(Size2l imgSize, CV_IN_OUT Point2l& pt1, CV_IN_OUT Point2l& pt2); + +/** @overload +@param imgRect Image rectangle. +@param pt1 First line point. +@param pt2 Second line point. +*/ +CV_EXPORTS_W bool clipLine(Rect imgRect, CV_OUT CV_IN_OUT Point& pt1, CV_OUT CV_IN_OUT Point& pt2); + +/** @brief Approximates an elliptic arc with a polyline. + +The function ellipse2Poly computes the vertices of a polyline that approximates the specified +elliptic arc. It is used by #ellipse. If `arcStart` is greater than `arcEnd`, they are swapped. + +@param center Center of the arc. +@param axes Half of the size of the ellipse main axes. See #ellipse for details. +@param angle Rotation angle of the ellipse in degrees. See #ellipse for details. +@param arcStart Starting angle of the elliptic arc in degrees. +@param arcEnd Ending angle of the elliptic arc in degrees. +@param delta Angle between the subsequent polyline vertices. It defines the approximation +accuracy. +@param pts Output vector of polyline vertices. + */ +CV_EXPORTS_W void ellipse2Poly( Point center, Size axes, int angle, + int arcStart, int arcEnd, int delta, + CV_OUT std::vector& pts ); + +/** @overload +@param center Center of the arc. +@param axes Half of the size of the ellipse main axes. See #ellipse for details. +@param angle Rotation angle of the ellipse in degrees. See #ellipse for details. +@param arcStart Starting angle of the elliptic arc in degrees. +@param arcEnd Ending angle of the elliptic arc in degrees. +@param delta Angle between the subsequent polyline vertices. It defines the approximation accuracy. +@param pts Output vector of polyline vertices. +*/ +CV_EXPORTS void ellipse2Poly(Point2d center, Size2d axes, int angle, + int arcStart, int arcEnd, int delta, + CV_OUT std::vector& pts); + +/** @brief Draws a text string. + +The function cv::putText renders the specified text string in the image. Symbols that cannot be rendered +using the specified font are replaced by question marks. See #getTextSize for a text rendering code +example. + +@param img Image. +@param text Text string to be drawn. +@param org Bottom-left corner of the text string in the image. +@param fontFace Font type, see #HersheyFonts. +@param fontScale Font scale factor that is multiplied by the font-specific base size. +@param color Text color. +@param thickness Thickness of the lines used to draw a text. +@param lineType Line type. See #LineTypes +@param bottomLeftOrigin When true, the image data origin is at the bottom-left corner. Otherwise, +it is at the top-left corner. + */ +CV_EXPORTS_W void putText( InputOutputArray img, const String& text, Point org, + int fontFace, double fontScale, Scalar color, + int thickness = 1, int lineType = LINE_8, + bool bottomLeftOrigin = false ); + +/** @brief Calculates the width and height of a text string. + +The function cv::getTextSize calculates and returns the size of a box that contains the specified text. +That is, the following code renders some text, the tight box surrounding it, and the baseline: : +@code + String text = "Funny text inside the box"; + int fontFace = FONT_HERSHEY_SCRIPT_SIMPLEX; + double fontScale = 2; + int thickness = 3; + + Mat img(600, 800, CV_8UC3, Scalar::all(0)); + + int baseline=0; + Size textSize = getTextSize(text, fontFace, + fontScale, thickness, &baseline); + baseline += thickness; + + // center the text + Point textOrg((img.cols - textSize.width)/2, + (img.rows + textSize.height)/2); + + // draw the box + rectangle(img, textOrg + Point(0, baseline), + textOrg + Point(textSize.width, -textSize.height), + Scalar(0,0,255)); + // ... and the baseline first + line(img, textOrg + Point(0, thickness), + textOrg + Point(textSize.width, thickness), + Scalar(0, 0, 255)); + + // then put the text itself + putText(img, text, textOrg, fontFace, fontScale, + Scalar::all(255), thickness, 8); +@endcode + +@param text Input text string. +@param fontFace Font to use, see #HersheyFonts. +@param fontScale Font scale factor that is multiplied by the font-specific base size. +@param thickness Thickness of lines used to render the text. See #putText for details. +@param[out] baseLine y-coordinate of the baseline relative to the bottom-most text +point. +@return The size of a box that contains the specified text. + +@see putText + */ +CV_EXPORTS_W Size getTextSize(const String& text, int fontFace, + double fontScale, int thickness, + CV_OUT int* baseLine); + + +/** @brief Calculates the font-specific size to use to achieve a given height in pixels. + +@param fontFace Font to use, see cv::HersheyFonts. +@param pixelHeight Pixel height to compute the fontScale for +@param thickness Thickness of lines used to render the text.See putText for details. +@return The fontSize to use for cv::putText + +@see cv::putText +*/ +CV_EXPORTS_W double getFontScaleFromHeight(const int fontFace, + const int pixelHeight, + const int thickness = 1); + +/** @brief Class for iterating over all pixels on a raster line segment. + +The class LineIterator is used to get each pixel of a raster line connecting +two specified points. +It can be treated as a versatile implementation of the Bresenham algorithm +where you can stop at each pixel and do some extra processing, for +example, grab pixel values along the line or draw a line with an effect +(for example, with XOR operation). + +The number of pixels along the line is stored in LineIterator::count. +The method LineIterator::pos returns the current position in the image: + +@code{.cpp} +// grabs pixels along the line (pt1, pt2) +// from 8-bit 3-channel image to the buffer +LineIterator it(img, pt1, pt2, 8); +LineIterator it2 = it; +vector buf(it.count); + +for(int i = 0; i < it.count; i++, ++it) + buf[i] = *(const Vec3b*)*it; + +// alternative way of iterating through the line +for(int i = 0; i < it2.count; i++, ++it2) +{ + Vec3b val = img.at(it2.pos()); + CV_Assert(buf[i] == val); +} +@endcode +*/ +class CV_EXPORTS LineIterator +{ +public: + /** @brief Initializes iterator object for the given line and image. + + The returned iterator can be used to traverse all pixels on a line that + connects the given two points. + The line will be clipped on the image boundaries. + + @param img Underlying image. + @param pt1 First endpoint of the line. + @param pt2 The other endpoint of the line. + @param connectivity Pixel connectivity of the iterator. Valid values are 4 (iterator can move + up, down, left and right) and 8 (iterator can also move diagonally). + @param leftToRight If true, the line is traversed from the leftmost endpoint to the rightmost + endpoint. Otherwise, the line is traversed from \p pt1 to \p pt2. + */ + LineIterator( const Mat& img, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(&img, Rect(0, 0, img.cols, img.rows), pt1, pt2, connectivity, leftToRight); + ptmode = false; + } + LineIterator( Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, Rect(std::min(pt1.x, pt2.x), + std::min(pt1.y, pt2.y), + std::max(pt1.x, pt2.x) - std::min(pt1.x, pt2.x) + 1, + std::max(pt1.y, pt2.y) - std::min(pt1.y, pt2.y) + 1), + pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + LineIterator( Size boundingAreaSize, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, Rect(0, 0, boundingAreaSize.width, boundingAreaSize.height), + pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + LineIterator( Rect boundingAreaRect, Point pt1, Point pt2, + int connectivity = 8, bool leftToRight = false ) + { + init(0, boundingAreaRect, pt1, pt2, connectivity, leftToRight); + ptmode = true; + } + void init(const Mat* img, Rect boundingAreaRect, Point pt1, Point pt2, int connectivity, bool leftToRight); + + /** @brief Returns pointer to the current pixel. + */ + uchar* operator *(); + + /** @brief Moves iterator to the next pixel on the line. + + This is the prefix version (++it). + */ + LineIterator& operator ++(); + + /** @brief Moves iterator to the next pixel on the line. + + This is the postfix version (it++). + */ + LineIterator operator ++(int); + + /** @brief Returns coordinates of the current pixel. + */ + Point pos() const; + + uchar* ptr; + const uchar* ptr0; + int step, elemSize; + int err, count; + int minusDelta, plusDelta; + int minusStep, plusStep; + int minusShift, plusShift; + Point p; + bool ptmode; +}; + +//! @cond IGNORED + +// === LineIterator implementation === + +inline +uchar* LineIterator::operator *() +{ + return ptmode ? 0 : ptr; +} + +inline +LineIterator& LineIterator::operator ++() +{ + int mask = err < 0 ? -1 : 0; + err += minusDelta + (plusDelta & mask); + if(!ptmode) + { + ptr += minusStep + (plusStep & mask); + } + else + { + p.x += minusShift + (plusShift & mask); + p.y += minusStep + (plusStep & mask); + } + return *this; +} + +inline +LineIterator LineIterator::operator ++(int) +{ + LineIterator it = *this; + ++(*this); + return it; +} + +inline +Point LineIterator::pos() const +{ + if(!ptmode) + { + size_t offset = (size_t)(ptr - ptr0); + int y = (int)(offset/step); + int x = (int)((offset - (size_t)y*step)/elemSize); + return Point(x, y); + } + return p; +} + +//! @endcond + +//! @} imgproc_draw + +//! @} imgproc + +} // cv + + +#include "./imgproc/segmentation.hpp" + + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/bindings.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/bindings.hpp new file mode 100644 index 0000000..c69527a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/bindings.hpp @@ -0,0 +1,34 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_IMGPROC_BINDINGS_HPP +#define OPENCV_IMGPROC_BINDINGS_HPP + +// This file contains special overloads for OpenCV bindings +// No need to use these functions in C++ code. + +namespace cv { + +/** @brief Finds lines in a binary image using the standard Hough transform and get accumulator. + * + * @note This function is for bindings use only. Use original function in C++ code + * + * @sa HoughLines + */ +CV_WRAP static inline +void HoughLinesWithAccumulator( + InputArray image, OutputArray lines, + double rho, double theta, int threshold, + double srn = 0, double stn = 0, + double min_theta = 0, double max_theta = CV_PI +) +{ + std::vector lines_acc; + HoughLines(image, lines_acc, rho, theta, threshold, srn, stn, min_theta, max_theta); + Mat(lines_acc).copyTo(lines); +} + +} // namespace + +#endif // OPENCV_IMGPROC_BINDINGS_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/detail/gcgraph.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/detail/gcgraph.hpp new file mode 100644 index 0000000..f17c6e7 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/detail/gcgraph.hpp @@ -0,0 +1,395 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP +#define OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP + +//! @cond IGNORED + +namespace cv { namespace detail { +template class GCGraph +{ +public: + GCGraph(); + GCGraph( unsigned int vtxCount, unsigned int edgeCount ); + ~GCGraph(); + void create( unsigned int vtxCount, unsigned int edgeCount ); + int addVtx(); + void addEdges( int i, int j, TWeight w, TWeight revw ); + void addTermWeights( int i, TWeight sourceW, TWeight sinkW ); + TWeight maxFlow(); + bool inSourceSegment( int i ); +private: + class Vtx + { + public: + Vtx *next; // initialized and used in maxFlow() only + int parent; + int first; + int ts; + int dist; + TWeight weight; + uchar t; + }; + class Edge + { + public: + int dst; + int next; + TWeight weight; + }; + + std::vector vtcs; + std::vector edges; + TWeight flow; +}; + +template +GCGraph::GCGraph() +{ + flow = 0; +} +template +GCGraph::GCGraph( unsigned int vtxCount, unsigned int edgeCount ) +{ + create( vtxCount, edgeCount ); +} +template +GCGraph::~GCGraph() +{ +} +template +void GCGraph::create( unsigned int vtxCount, unsigned int edgeCount ) +{ + vtcs.reserve( vtxCount ); + edges.reserve( edgeCount + 2 ); + flow = 0; +} + +template +int GCGraph::addVtx() +{ + Vtx v; + memset( &v, 0, sizeof(Vtx)); + vtcs.push_back(v); + return (int)vtcs.size() - 1; +} + +template +void GCGraph::addEdges( int i, int j, TWeight w, TWeight revw ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + CV_Assert( j>=0 && j<(int)vtcs.size() ); + CV_Assert( w>=0 && revw>=0 ); + CV_Assert( i != j ); + + if( !edges.size() ) + edges.resize( 2 ); + + Edge fromI, toI; + fromI.dst = j; + fromI.next = vtcs[i].first; + fromI.weight = w; + vtcs[i].first = (int)edges.size(); + edges.push_back( fromI ); + + toI.dst = i; + toI.next = vtcs[j].first; + toI.weight = revw; + vtcs[j].first = (int)edges.size(); + edges.push_back( toI ); +} + +template +void GCGraph::addTermWeights( int i, TWeight sourceW, TWeight sinkW ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + + TWeight dw = vtcs[i].weight; + if( dw > 0 ) + sourceW += dw; + else + sinkW -= dw; + flow += (sourceW < sinkW) ? sourceW : sinkW; + vtcs[i].weight = sourceW - sinkW; +} + +template +TWeight GCGraph::maxFlow() +{ + CV_Assert(!vtcs.empty()); + CV_Assert(!edges.empty()); + const int TERMINAL = -1, ORPHAN = -2; + Vtx stub, *nilNode = &stub, *first = nilNode, *last = nilNode; + int curr_ts = 0; + stub.next = nilNode; + Vtx *vtxPtr = &vtcs[0]; + Edge *edgePtr = &edges[0]; + + std::vector orphans; + + // initialize the active queue and the graph vertices + for( int i = 0; i < (int)vtcs.size(); i++ ) + { + Vtx* v = vtxPtr + i; + v->ts = 0; + if( v->weight != 0 ) + { + last = last->next = v; + v->dist = 1; + v->parent = TERMINAL; + v->t = v->weight < 0; + } + else + v->parent = 0; + } + first = first->next; + last->next = nilNode; + nilNode->next = 0; + + // run the search-path -> augment-graph -> restore-trees loop + for(;;) + { + Vtx* v, *u; + int e0 = -1, ei = 0, ej = 0; + TWeight minWeight, weight; + uchar vt; + + // grow S & T search trees, find an edge connecting them + while( first != nilNode ) + { + v = first; + if( v->parent ) + { + vt = v->t; + for( ei = v->first; ei != 0; ei = edgePtr[ei].next ) + { + if( edgePtr[ei^vt].weight == 0 ) + continue; + u = vtxPtr+edgePtr[ei].dst; + if( !u->parent ) + { + u->t = vt; + u->parent = ei ^ 1; + u->ts = v->ts; + u->dist = v->dist + 1; + if( !u->next ) + { + u->next = nilNode; + last = last->next = u; + } + continue; + } + + if( u->t != vt ) + { + e0 = ei ^ vt; + break; + } + + if( u->dist > v->dist+1 && u->ts <= v->ts ) + { + // reassign the parent + u->parent = ei ^ 1; + u->ts = v->ts; + u->dist = v->dist + 1; + } + } + if( e0 > 0 ) + break; + } + // exclude the vertex from the active list + first = first->next; + v->next = 0; + } + + if( e0 <= 0 ) + break; + + // find the minimum edge weight along the path + minWeight = edgePtr[e0].weight; + CV_Assert( minWeight > 0 ); + // k = 1: source tree, k = 0: destination tree + for( int k = 1; k >= 0; k-- ) + { + for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst ) + { + if( (ei = v->parent) < 0 ) + break; + weight = edgePtr[ei^k].weight; + minWeight = MIN(minWeight, weight); + CV_Assert( minWeight > 0 ); + } + weight = fabs(v->weight); + minWeight = MIN(minWeight, weight); + CV_Assert( minWeight > 0 ); + } + + // modify weights of the edges along the path and collect orphans + edgePtr[e0].weight -= minWeight; + edgePtr[e0^1].weight += minWeight; + flow += minWeight; + + // k = 1: source tree, k = 0: destination tree + for( int k = 1; k >= 0; k-- ) + { + for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst ) + { + if( (ei = v->parent) < 0 ) + break; + edgePtr[ei^(k^1)].weight += minWeight; + if( (edgePtr[ei^k].weight -= minWeight) == 0 ) + { + orphans.push_back(v); + v->parent = ORPHAN; + } + } + + v->weight = v->weight + minWeight*(1-k*2); + if( v->weight == 0 ) + { + orphans.push_back(v); + v->parent = ORPHAN; + } + } + + // restore the search trees by finding new parents for the orphans + curr_ts++; + while( !orphans.empty() ) + { + Vtx* v2 = orphans.back(); + orphans.pop_back(); + + int d, minDist = INT_MAX; + e0 = 0; + vt = v2->t; + + for( ei = v2->first; ei != 0; ei = edgePtr[ei].next ) + { + if( edgePtr[ei^(vt^1)].weight == 0 ) + continue; + u = vtxPtr+edgePtr[ei].dst; + if( u->t != vt || u->parent == 0 ) + continue; + // compute the distance to the tree root + for( d = 0;; ) + { + if( u->ts == curr_ts ) + { + d += u->dist; + break; + } + ej = u->parent; + d++; + if( ej < 0 ) + { + if( ej == ORPHAN ) + d = INT_MAX-1; + else + { + u->ts = curr_ts; + u->dist = 1; + } + break; + } + u = vtxPtr+edgePtr[ej].dst; + } + + // update the distance + if( ++d < INT_MAX ) + { + if( d < minDist ) + { + minDist = d; + e0 = ei; + } + for( u = vtxPtr+edgePtr[ei].dst; u->ts != curr_ts; u = vtxPtr+edgePtr[u->parent].dst ) + { + u->ts = curr_ts; + u->dist = --d; + } + } + } + + if( (v2->parent = e0) > 0 ) + { + v2->ts = curr_ts; + v2->dist = minDist; + continue; + } + + /* no parent is found */ + v2->ts = 0; + for( ei = v2->first; ei != 0; ei = edgePtr[ei].next ) + { + u = vtxPtr+edgePtr[ei].dst; + ej = u->parent; + if( u->t != vt || !ej ) + continue; + if( edgePtr[ei^(vt^1)].weight && !u->next ) + { + u->next = nilNode; + last = last->next = u; + } + if( ej > 0 && vtxPtr+edgePtr[ej].dst == v2 ) + { + orphans.push_back(u); + u->parent = ORPHAN; + } + } + } + } + return flow; +} + +template +bool GCGraph::inSourceSegment( int i ) +{ + CV_Assert( i>=0 && i<(int)vtcs.size() ); + return vtcs[i].t == 0; +} + +}} // namespace detail, cv + + +//! @endcond + +#endif // OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/hal/hal.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/hal/hal.hpp new file mode 100644 index 0000000..f129012 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/hal/hal.hpp @@ -0,0 +1,246 @@ +#ifndef CV_IMGPROC_HAL_HPP +#define CV_IMGPROC_HAL_HPP + +#include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" +#include "opencv2/core/hal/interface.h" + +namespace cv { namespace hal { + +//! @addtogroup imgproc_hal_functions +//! @{ + +//--------------------------- +//! @cond IGNORED + +struct CV_EXPORTS Filter2D +{ + CV_DEPRECATED static Ptr create(uchar * , size_t , int , + int , int , + int , int , + int , int , + int , double , + int , int , + bool , bool ); + virtual void apply(uchar * , size_t , + uchar * , size_t , + int , int , + int , int , + int , int ) = 0; + virtual ~Filter2D() {} +}; + +struct CV_EXPORTS SepFilter2D +{ + CV_DEPRECATED static Ptr create(int , int , int , + uchar * , int , + uchar * , int , + int , int , + double , int ); + virtual void apply(uchar * , size_t , + uchar * , size_t , + int , int , + int , int , + int , int ) = 0; + virtual ~SepFilter2D() {} +}; + + +struct CV_EXPORTS Morph +{ + CV_DEPRECATED static Ptr create(int , int , int , int , int , + int , uchar * , size_t , + int , int , + int , int , + int , const double *, + int , bool , bool ); + virtual void apply(uchar * , size_t , uchar * , size_t , int , int , + int , int , int , int , + int , int , int , int ) = 0; + virtual ~Morph() {} +}; + +//! @endcond +//--------------------------- + +CV_EXPORTS void filter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType, + bool isSubmatrix); + +CV_EXPORTS void sepFilter2D(int stype, int dtype, int ktype, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernelx_data, int kernelx_len, + uchar * kernely_data, int kernely_len, + int anchor_x, int anchor_y, + double delta, int borderType); + +CV_EXPORTS void morph(int op, int src_type, int dst_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int roi_width, int roi_height, int roi_x, int roi_y, + int roi_width2, int roi_height2, int roi_x2, int roi_y2, + int kernel_type, uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, int anchor_x, int anchor_y, + int borderType, const double borderValue[4], + int iterations, bool isSubmatrix); + + +CV_EXPORTS void resize(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + double inv_scale_x, double inv_scale_y, int interpolation); + +CV_EXPORTS void warpAffine(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[6], int interpolation, int borderType, const double borderValue[4]); + +CV_EXPORTS void warpPerspective(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[9], int interpolation, int borderType, const double borderValue[4]); + +CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, int dcn, bool swapBlue); + +CV_EXPORTS void cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int greenBits); + +CV_EXPORTS void cvtBGR5x5toBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int greenBits); + +CV_EXPORTS void cvtBGRtoGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue); + +CV_EXPORTS void cvtGraytoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn); + +CV_EXPORTS void cvtBGR5x5toGray(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); + +CV_EXPORTS void cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int greenBits); +CV_EXPORTS void cvtBGRtoYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isCbCr); + +CV_EXPORTS void cvtYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isCbCr); + +CV_EXPORTS void cvtBGRtoXYZ(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue); + +CV_EXPORTS void cvtXYZtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue); + +CV_EXPORTS void cvtBGRtoHSV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV); + +CV_EXPORTS void cvtHSVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV); + +CV_EXPORTS void cvtBGRtoLab(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int scn, bool swapBlue, bool isLab, bool srgb); + +CV_EXPORTS void cvtLabtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int depth, int dcn, bool swapBlue, bool isLab, bool srgb); + +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +//! Separate Y and UV planes +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); + +//! Separate Y and UV planes +CV_EXPORTS void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, + uchar * y_data, uchar * uv_data, size_t dst_step, + int width, int height, + int scn, bool swapBlue, int uIdx); + +CV_EXPORTS void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int dcn, bool swapBlue, int uIdx, int ycn); + +CV_EXPORTS void cvtRGBAtoMultipliedRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); + +CV_EXPORTS void cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height); + +CV_EXPORTS void integral(int depth, int sdepth, int sqdepth, + const uchar* src, size_t srcstep, + uchar* sum, size_t sumstep, + uchar* sqsum, size_t sqsumstep, + uchar* tilted, size_t tstep, + int width, int height, int cn); + +//! @} + +}} + +#endif // CV_IMGPROC_HAL_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/hal/interface.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/hal/interface.h new file mode 100644 index 0000000..f8dbcfe --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/hal/interface.h @@ -0,0 +1,46 @@ +#ifndef OPENCV_IMGPROC_HAL_INTERFACE_H +#define OPENCV_IMGPROC_HAL_INTERFACE_H + +//! @addtogroup imgproc_hal_interface +//! @{ + +//! @name Interpolation modes +//! @sa cv::InterpolationFlags +//! @{ +#define CV_HAL_INTER_NEAREST 0 +#define CV_HAL_INTER_LINEAR 1 +#define CV_HAL_INTER_CUBIC 2 +#define CV_HAL_INTER_AREA 3 +#define CV_HAL_INTER_LANCZOS4 4 +//! @} + +//! @name Morphology operations +//! @sa cv::MorphTypes +//! @{ +#define CV_HAL_MORPH_ERODE 0 +#define CV_HAL_MORPH_DILATE 1 +//! @} + +//! @name Threshold types +//! @sa cv::ThresholdTypes +//! @{ +#define CV_HAL_THRESH_BINARY 0 +#define CV_HAL_THRESH_BINARY_INV 1 +#define CV_HAL_THRESH_TRUNC 2 +#define CV_HAL_THRESH_TOZERO 3 +#define CV_HAL_THRESH_TOZERO_INV 4 +#define CV_HAL_THRESH_MASK 7 +#define CV_HAL_THRESH_OTSU 8 +#define CV_HAL_THRESH_TRIANGLE 16 +//! @} + +//! @name Adaptive threshold algorithm +//! @sa cv::AdaptiveThresholdTypes +//! @{ +#define CV_HAL_ADAPTIVE_THRESH_MEAN_C 0 +#define CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C 1 +//! @} + +//! @} + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/imgproc.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/imgproc.hpp new file mode 100644 index 0000000..4175bd0 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/imgproc.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/imgproc.hpp" diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/imgproc_c.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/imgproc_c.h new file mode 100644 index 0000000..86dc119 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/imgproc_c.h @@ -0,0 +1,1177 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_IMGPROC_C_H +#define OPENCV_IMGPROC_IMGPROC_C_H + +#include "opencv2/imgproc/types_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @addtogroup imgproc_c +@{ +*/ + +/*********************** Background statistics accumulation *****************************/ + +/** @brief Adds image to accumulator +@see cv::accumulate +*/ +CVAPI(void) cvAcc( const CvArr* image, CvArr* sum, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Adds squared image to accumulator +@see cv::accumulateSquare +*/ +CVAPI(void) cvSquareAcc( const CvArr* image, CvArr* sqsum, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Adds a product of two images to accumulator +@see cv::accumulateProduct +*/ +CVAPI(void) cvMultiplyAcc( const CvArr* image1, const CvArr* image2, CvArr* acc, + const CvArr* mask CV_DEFAULT(NULL) ); + +/** @brief Adds image to accumulator with weights: acc = acc*(1-alpha) + image*alpha +@see cv::accumulateWeighted +*/ +CVAPI(void) cvRunningAvg( const CvArr* image, CvArr* acc, double alpha, + const CvArr* mask CV_DEFAULT(NULL) ); + +/****************************************************************************************\ +* Image Processing * +\****************************************************************************************/ + +/** Copies source 2D array inside of the larger destination array and + makes a border of the specified type (IPL_BORDER_*) around the copied area. */ +CVAPI(void) cvCopyMakeBorder( const CvArr* src, CvArr* dst, CvPoint offset, + int bordertype, CvScalar value CV_DEFAULT(cvScalarAll(0))); + +/** @brief Smooths the image in one of several ways. + +@param src The source image +@param dst The destination image +@param smoothtype Type of the smoothing, see SmoothMethod_c +@param size1 The first parameter of the smoothing operation, the aperture width. Must be a +positive odd number (1, 3, 5, ...) +@param size2 The second parameter of the smoothing operation, the aperture height. Ignored by +CV_MEDIAN and CV_BILATERAL methods. In the case of simple scaled/non-scaled and Gaussian blur if +size2 is zero, it is set to size1. Otherwise it must be a positive odd number. +@param sigma1 In the case of a Gaussian parameter this parameter may specify Gaussian \f$\sigma\f$ +(standard deviation). If it is zero, it is calculated from the kernel size: +\f[\sigma = 0.3 (n/2 - 1) + 0.8 \quad \text{where} \quad n= \begin{array}{l l} \mbox{\texttt{size1} for horizontal kernel} \\ \mbox{\texttt{size2} for vertical kernel} \end{array}\f] +Using standard sigma for small kernels ( \f$3\times 3\f$ to \f$7\times 7\f$ ) gives better speed. If +sigma1 is not zero, while size1 and size2 are zeros, the kernel size is calculated from the +sigma (to provide accurate enough operation). +@param sigma2 additional parameter for bilateral filtering + +@see cv::GaussianBlur, cv::blur, cv::medianBlur, cv::bilateralFilter. + */ +CVAPI(void) cvSmooth( const CvArr* src, CvArr* dst, + int smoothtype CV_DEFAULT(CV_GAUSSIAN), + int size1 CV_DEFAULT(3), + int size2 CV_DEFAULT(0), + double sigma1 CV_DEFAULT(0), + double sigma2 CV_DEFAULT(0)); + +/** @brief Convolves an image with the kernel. + +@param src input image. +@param dst output image of the same size and the same number of channels as src. +@param kernel convolution kernel (or rather a correlation kernel), a single-channel floating point +matrix; if you want to apply different kernels to different channels, split the image into +separate color planes using split and process them individually. +@param anchor anchor of the kernel that indicates the relative position of a filtered point within +the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor +is at the kernel center. + +@see cv::filter2D + */ +CVAPI(void) cvFilter2D( const CvArr* src, CvArr* dst, const CvMat* kernel, + CvPoint anchor CV_DEFAULT(cvPoint(-1,-1))); + +/** @brief Finds integral image: SUM(X,Y) = sum(x \texttt{hist1}(I)\)}{\frac{\texttt{hist2}(I) \cdot \texttt{scale}}{\texttt{hist1}(I)}}{if \(\texttt{hist1}(I) \ne 0\) and \(\texttt{hist2}(I) \le \texttt{hist1}(I)\)}\f] + +@param hist1 First histogram (the divisor). +@param hist2 Second histogram. +@param dst_hist Destination histogram. +@param scale Scale factor for the destination histogram. + */ +CVAPI(void) cvCalcProbDensity( const CvHistogram* hist1, const CvHistogram* hist2, + CvHistogram* dst_hist, double scale CV_DEFAULT(255) ); + +/** @brief equalizes histogram of 8-bit single-channel image +@see cv::equalizeHist +*/ +CVAPI(void) cvEqualizeHist( const CvArr* src, CvArr* dst ); + + +/** @brief Applies distance transform to binary image +@see cv::distanceTransform +*/ +CVAPI(void) cvDistTransform( const CvArr* src, CvArr* dst, + int distance_type CV_DEFAULT(CV_DIST_L2), + int mask_size CV_DEFAULT(3), + const float* mask CV_DEFAULT(NULL), + CvArr* labels CV_DEFAULT(NULL), + int labelType CV_DEFAULT(CV_DIST_LABEL_CCOMP)); + + +/** @brief Applies fixed-level threshold to grayscale image. + + This is a basic operation applied before retrieving contours +@see cv::threshold +*/ +CVAPI(double) cvThreshold( const CvArr* src, CvArr* dst, + double threshold, double max_value, + int threshold_type ); + +/** @brief Applies adaptive threshold to grayscale image. + + The two parameters for methods CV_ADAPTIVE_THRESH_MEAN_C and + CV_ADAPTIVE_THRESH_GAUSSIAN_C are: + neighborhood size (3, 5, 7 etc.), + and a constant subtracted from mean (...,-3,-2,-1,0,1,2,3,...) +@see cv::adaptiveThreshold +*/ +CVAPI(void) cvAdaptiveThreshold( const CvArr* src, CvArr* dst, double max_value, + int adaptive_method CV_DEFAULT(CV_ADAPTIVE_THRESH_MEAN_C), + int threshold_type CV_DEFAULT(CV_THRESH_BINARY), + int block_size CV_DEFAULT(3), + double param1 CV_DEFAULT(5)); + +/** @brief Fills the connected component until the color difference gets large enough +@see cv::floodFill +*/ +CVAPI(void) cvFloodFill( CvArr* image, CvPoint seed_point, + CvScalar new_val, CvScalar lo_diff CV_DEFAULT(cvScalarAll(0)), + CvScalar up_diff CV_DEFAULT(cvScalarAll(0)), + CvConnectedComp* comp CV_DEFAULT(NULL), + int flags CV_DEFAULT(4), + CvArr* mask CV_DEFAULT(NULL)); + +/****************************************************************************************\ +* Feature detection * +\****************************************************************************************/ + +/** @brief Runs canny edge detector +@see cv::Canny +*/ +CVAPI(void) cvCanny( const CvArr* image, CvArr* edges, double threshold1, + double threshold2, int aperture_size CV_DEFAULT(3) ); + +/** @brief Calculates constraint image for corner detection + + Dx^2 * Dyy + Dxx * Dy^2 - 2 * Dx * Dy * Dxy. + Applying threshold to the result gives coordinates of corners +@see cv::preCornerDetect +*/ +CVAPI(void) cvPreCornerDetect( const CvArr* image, CvArr* corners, + int aperture_size CV_DEFAULT(3) ); + +/** @brief Calculates eigen values and vectors of 2x2 + gradient covariation matrix at every image pixel +@see cv::cornerEigenValsAndVecs +*/ +CVAPI(void) cvCornerEigenValsAndVecs( const CvArr* image, CvArr* eigenvv, + int block_size, int aperture_size CV_DEFAULT(3) ); + +/** @brief Calculates minimal eigenvalue for 2x2 gradient covariation matrix at + every image pixel +@see cv::cornerMinEigenVal +*/ +CVAPI(void) cvCornerMinEigenVal( const CvArr* image, CvArr* eigenval, + int block_size, int aperture_size CV_DEFAULT(3) ); + +/** @brief Harris corner detector: + + Calculates det(M) - k*(trace(M)^2), where M is 2x2 gradient covariation matrix for each pixel +@see cv::cornerHarris +*/ +CVAPI(void) cvCornerHarris( const CvArr* image, CvArr* harris_response, + int block_size, int aperture_size CV_DEFAULT(3), + double k CV_DEFAULT(0.04) ); + +/** @brief Adjust corner position using some sort of gradient search +@see cv::cornerSubPix +*/ +CVAPI(void) cvFindCornerSubPix( const CvArr* image, CvPoint2D32f* corners, + int count, CvSize win, CvSize zero_zone, + CvTermCriteria criteria ); + +/** @brief Finds a sparse set of points within the selected region + that seem to be easy to track +@see cv::goodFeaturesToTrack +*/ +CVAPI(void) cvGoodFeaturesToTrack( const CvArr* image, CvArr* eig_image, + CvArr* temp_image, CvPoint2D32f* corners, + int* corner_count, double quality_level, + double min_distance, + const CvArr* mask CV_DEFAULT(NULL), + int block_size CV_DEFAULT(3), + int use_harris CV_DEFAULT(0), + double k CV_DEFAULT(0.04) ); + +/** @brief Finds lines on binary image using one of several methods. + + line_storage is either memory storage or 1 x _max number of lines_ CvMat, its + number of columns is changed by the function. + method is one of CV_HOUGH_*; + rho, theta and threshold are used for each of those methods; + param1 ~ line length, param2 ~ line gap - for probabilistic, + param1 ~ srn, param2 ~ stn - for multi-scale +@see cv::HoughLines +*/ +CVAPI(CvSeq*) cvHoughLines2( CvArr* image, void* line_storage, int method, + double rho, double theta, int threshold, + double param1 CV_DEFAULT(0), double param2 CV_DEFAULT(0), + double min_theta CV_DEFAULT(0), double max_theta CV_DEFAULT(CV_PI)); + +/** @brief Finds circles in the image +@see cv::HoughCircles +*/ +CVAPI(CvSeq*) cvHoughCircles( CvArr* image, void* circle_storage, + int method, double dp, double min_dist, + double param1 CV_DEFAULT(100), + double param2 CV_DEFAULT(100), + int min_radius CV_DEFAULT(0), + int max_radius CV_DEFAULT(0)); + +/** @brief Fits a line into set of 2d or 3d points in a robust way (M-estimator technique) +@see cv::fitLine +*/ +CVAPI(void) cvFitLine( const CvArr* points, int dist_type, double param, + double reps, double aeps, float* line ); + +/****************************************************************************************\ +* Drawing * +\****************************************************************************************/ + +/****************************************************************************************\ +* Drawing functions work with images/matrices of arbitrary type. * +* For color images the channel order is BGR[A] * +* Antialiasing is supported only for 8-bit image now. * +* All the functions include parameter color that means rgb value (that may be * +* constructed with CV_RGB macro) for color images and brightness * +* for grayscale images. * +* If a drawn figure is partially or completely outside of the image, it is clipped.* +\****************************************************************************************/ + +#define CV_FILLED -1 + +#define CV_AA 16 + +/** @brief Draws 4-connected, 8-connected or antialiased line segment connecting two points +@see cv::line +*/ +CVAPI(void) cvLine( CvArr* img, CvPoint pt1, CvPoint pt2, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ); + +/** @brief Draws a rectangle given two opposite corners of the rectangle (pt1 & pt2) + + if thickness<0 (e.g. thickness == CV_FILLED), the filled box is drawn +@see cv::rectangle +*/ +CVAPI(void) cvRectangle( CvArr* img, CvPoint pt1, CvPoint pt2, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), + int shift CV_DEFAULT(0)); + +/** @brief Draws a rectangle specified by a CvRect structure +@see cv::rectangle +*/ +CVAPI(void) cvRectangleR( CvArr* img, CvRect r, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), + int shift CV_DEFAULT(0)); + + +/** @brief Draws a circle with specified center and radius. + + Thickness works in the same way as with cvRectangle +@see cv::circle +*/ +CVAPI(void) cvCircle( CvArr* img, CvPoint center, int radius, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0)); + +/** @brief Draws ellipse outline, filled ellipse, elliptic arc or filled elliptic sector + + depending on _thickness_, _start_angle_ and _end_angle_ parameters. The resultant figure + is rotated by _angle_. All the angles are in degrees +@see cv::ellipse +*/ +CVAPI(void) cvEllipse( CvArr* img, CvPoint center, CvSize axes, + double angle, double start_angle, double end_angle, + CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0)); + +CV_INLINE void cvEllipseBox( CvArr* img, CvBox2D box, CvScalar color, + int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ) +{ + CvSize axes = cvSize( + cvRound(box.size.width*0.5), + cvRound(box.size.height*0.5) + ); + + cvEllipse( img, cvPointFrom32f( box.center ), axes, box.angle, + 0, 360, color, thickness, line_type, shift ); +} + +/** @brief Fills convex or monotonous polygon. +@see cv::fillConvexPoly +*/ +CVAPI(void) cvFillConvexPoly( CvArr* img, const CvPoint* pts, int npts, CvScalar color, + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0)); + +/** @brief Fills an area bounded by one or more arbitrary polygons +@see cv::fillPoly +*/ +CVAPI(void) cvFillPoly( CvArr* img, CvPoint** pts, const int* npts, + int contours, CvScalar color, + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ); + +/** @brief Draws one or more polygonal curves +@see cv::polylines +*/ +CVAPI(void) cvPolyLine( CvArr* img, CvPoint** pts, const int* npts, int contours, + int is_closed, CvScalar color, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) ); + +#define cvDrawRect cvRectangle +#define cvDrawLine cvLine +#define cvDrawCircle cvCircle +#define cvDrawEllipse cvEllipse +#define cvDrawPolyLine cvPolyLine + +/** @brief Clips the line segment connecting *pt1 and *pt2 + by the rectangular window + + (0<=xptr will point to pt1 (or pt2, see left_to_right description) location in +the image. Returns the number of pixels on the line between the ending points. +@see cv::LineIterator +*/ +CVAPI(int) cvInitLineIterator( const CvArr* image, CvPoint pt1, CvPoint pt2, + CvLineIterator* line_iterator, + int connectivity CV_DEFAULT(8), + int left_to_right CV_DEFAULT(0)); + +#define CV_NEXT_LINE_POINT( line_iterator ) \ +{ \ + int _line_iterator_mask = (line_iterator).err < 0 ? -1 : 0; \ + (line_iterator).err += (line_iterator).minus_delta + \ + ((line_iterator).plus_delta & _line_iterator_mask); \ + (line_iterator).ptr += (line_iterator).minus_step + \ + ((line_iterator).plus_step & _line_iterator_mask); \ +} + + +#define CV_FONT_HERSHEY_SIMPLEX 0 +#define CV_FONT_HERSHEY_PLAIN 1 +#define CV_FONT_HERSHEY_DUPLEX 2 +#define CV_FONT_HERSHEY_COMPLEX 3 +#define CV_FONT_HERSHEY_TRIPLEX 4 +#define CV_FONT_HERSHEY_COMPLEX_SMALL 5 +#define CV_FONT_HERSHEY_SCRIPT_SIMPLEX 6 +#define CV_FONT_HERSHEY_SCRIPT_COMPLEX 7 + +#define CV_FONT_ITALIC 16 + +#define CV_FONT_VECTOR0 CV_FONT_HERSHEY_SIMPLEX + + +/** Font structure */ +typedef struct CvFont +{ + const char* nameFont; //Qt:nameFont + CvScalar color; //Qt:ColorFont -> cvScalar(blue_component, green_component, red_component[, alpha_component]) + int font_face; //Qt: bool italic /** =CV_FONT_* */ + const int* ascii; //!< font data and metrics + const int* greek; + const int* cyrillic; + float hscale, vscale; + float shear; //!< slope coefficient: 0 - normal, >0 - italic + int thickness; //!< Qt: weight /** letters thickness */ + float dx; //!< horizontal interval between letters + int line_type; //!< Qt: PointSize +} +CvFont; + +/** @brief Initializes font structure (OpenCV 1.x API). + +The function initializes the font structure that can be passed to text rendering functions. + +@param font Pointer to the font structure initialized by the function +@param font_face Font name identifier. See cv::HersheyFonts and corresponding old CV_* identifiers. +@param hscale Horizontal scale. If equal to 1.0f , the characters have the original width +depending on the font type. If equal to 0.5f , the characters are of half the original width. +@param vscale Vertical scale. If equal to 1.0f , the characters have the original height depending +on the font type. If equal to 0.5f , the characters are of half the original height. +@param shear Approximate tangent of the character slope relative to the vertical line. A zero +value means a non-italic font, 1.0f means about a 45 degree slope, etc. +@param thickness Thickness of the text strokes +@param line_type Type of the strokes, see line description + +@sa cvPutText + */ +CVAPI(void) cvInitFont( CvFont* font, int font_face, + double hscale, double vscale, + double shear CV_DEFAULT(0), + int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8)); + +CV_INLINE CvFont cvFont( double scale, int thickness CV_DEFAULT(1) ) +{ + CvFont font; + cvInitFont( &font, CV_FONT_HERSHEY_PLAIN, scale, scale, 0, thickness, CV_AA ); + return font; +} + +/** @brief Renders text stroke with specified font and color at specified location. + CvFont should be initialized with cvInitFont +@see cvInitFont, cvGetTextSize, cvFont, cv::putText +*/ +CVAPI(void) cvPutText( CvArr* img, const char* text, CvPoint org, + const CvFont* font, CvScalar color ); + +/** @brief Calculates bounding box of text stroke (useful for alignment) +@see cv::getTextSize +*/ +CVAPI(void) cvGetTextSize( const char* text_string, const CvFont* font, + CvSize* text_size, int* baseline ); + +/** @brief Unpacks color value + +if arrtype is CV_8UC?, _color_ is treated as packed color value, otherwise the first channels +(depending on arrtype) of destination scalar are set to the same value = _color_ +*/ +CVAPI(CvScalar) cvColorToScalar( double packed_color, int arrtype ); + +/** @brief Returns the polygon points which make up the given ellipse. + +The ellipse is define by the box of size 'axes' rotated 'angle' around the 'center'. A partial +sweep of the ellipse arc can be done by specifying arc_start and arc_end to be something other than +0 and 360, respectively. The input array 'pts' must be large enough to hold the result. The total +number of points stored into 'pts' is returned by this function. +@see cv::ellipse2Poly +*/ +CVAPI(int) cvEllipse2Poly( CvPoint center, CvSize axes, + int angle, int arc_start, int arc_end, CvPoint * pts, int delta ); + +/** @brief Draws contour outlines or filled interiors on the image +@see cv::drawContours +*/ +CVAPI(void) cvDrawContours( CvArr *img, CvSeq* contour, + CvScalar external_color, CvScalar hole_color, + int max_level, int thickness CV_DEFAULT(1), + int line_type CV_DEFAULT(8), + CvPoint offset CV_DEFAULT(cvPoint(0,0))); + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/segmentation.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/segmentation.hpp new file mode 100644 index 0000000..c40d501 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/segmentation.hpp @@ -0,0 +1,141 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_IMGPROC_SEGMENTATION_HPP +#define OPENCV_IMGPROC_SEGMENTATION_HPP + +#include "opencv2/imgproc.hpp" + +namespace cv { + +namespace segmentation { + +//! @addtogroup imgproc_segmentation +//! @{ + + +/** @brief Intelligent Scissors image segmentation + * + * This class is used to find the path (contour) between two points + * which can be used for image segmentation. + * + * Usage example: + * @snippet snippets/imgproc_segmentation.cpp usage_example_intelligent_scissors + * + * Reference: "Intelligent Scissors for Image Composition" + * algorithm designed by Eric N. Mortensen and William A. Barrett, Brigham Young University + * @cite Mortensen95intelligentscissors + */ +class CV_EXPORTS_W_SIMPLE IntelligentScissorsMB +{ +public: + CV_WRAP + IntelligentScissorsMB(); + + /** @brief Specify weights of feature functions + * + * Consider keeping weights normalized (sum of weights equals to 1.0) + * Discrete dynamic programming (DP) goal is minimization of costs between pixels. + * + * @param weight_non_edge Specify cost of non-edge pixels (default: 0.43f) + * @param weight_gradient_direction Specify cost of gradient direction function (default: 0.43f) + * @param weight_gradient_magnitude Specify cost of gradient magnitude function (default: 0.14f) + */ + CV_WRAP + IntelligentScissorsMB& setWeights(float weight_non_edge, float weight_gradient_direction, float weight_gradient_magnitude); + + /** @brief Specify gradient magnitude max value threshold + * + * Zero limit value is used to disable gradient magnitude thresholding (default behavior, as described in original article). + * Otherwize pixels with `gradient magnitude >= threshold` have zero cost. + * + * @note Thresholding should be used for images with irregular regions (to avoid stuck on parameters from high-contract areas, like embedded logos). + * + * @param gradient_magnitude_threshold_max Specify gradient magnitude max value threshold (default: 0, disabled) + */ + CV_WRAP + IntelligentScissorsMB& setGradientMagnitudeMaxLimit(float gradient_magnitude_threshold_max = 0.0f); + + /** @brief Switch to "Laplacian Zero-Crossing" edge feature extractor and specify its parameters + * + * This feature extractor is used by default according to article. + * + * Implementation has additional filtering for regions with low-amplitude noise. + * This filtering is enabled through parameter of minimal gradient amplitude (use some small value 4, 8, 16). + * + * @note Current implementation of this feature extractor is based on processing of grayscale images (color image is converted to grayscale image first). + * + * @note Canny edge detector is a bit slower, but provides better results (especially on color images): use setEdgeFeatureCannyParameters(). + * + * @param gradient_magnitude_min_value Minimal gradient magnitude value for edge pixels (default: 0, check is disabled) + */ + CV_WRAP + IntelligentScissorsMB& setEdgeFeatureZeroCrossingParameters(float gradient_magnitude_min_value = 0.0f); + + /** @brief Switch edge feature extractor to use Canny edge detector + * + * @note "Laplacian Zero-Crossing" feature extractor is used by default (following to original article) + * + * @sa Canny + */ + CV_WRAP + IntelligentScissorsMB& setEdgeFeatureCannyParameters( + double threshold1, double threshold2, + int apertureSize = 3, bool L2gradient = false + ); + + /** @brief Specify input image and extract image features + * + * @param image input image. Type is #CV_8UC1 / #CV_8UC3 + */ + CV_WRAP + IntelligentScissorsMB& applyImage(InputArray image); + + /** @brief Specify custom features of input image + * + * Customized advanced variant of applyImage() call. + * + * @param non_edge Specify cost of non-edge pixels. Type is CV_8UC1. Expected values are `{0, 1}`. + * @param gradient_direction Specify gradient direction feature. Type is CV_32FC2. Values are expected to be normalized: `x^2 + y^2 == 1` + * @param gradient_magnitude Specify cost of gradient magnitude function: Type is CV_32FC1. Values should be in range `[0, 1]`. + * @param image **Optional parameter**. Must be specified if subset of features is specified (non-specified features are calculated internally) + */ + CV_WRAP + IntelligentScissorsMB& applyImageFeatures( + InputArray non_edge, InputArray gradient_direction, InputArray gradient_magnitude, + InputArray image = noArray() + ); + + /** @brief Prepares a map of optimal paths for the given source point on the image + * + * @note applyImage() / applyImageFeatures() must be called before this call + * + * @param sourcePt The source point used to find the paths + */ + CV_WRAP void buildMap(const Point& sourcePt); + + /** @brief Extracts optimal contour for the given target point on the image + * + * @note buildMap() must be called before this call + * + * @param targetPt The target point + * @param[out] contour The list of pixels which contains optimal path between the source and the target points of the image. Type is CV_32SC2 (compatible with `std::vector`) + * @param backward Flag to indicate reverse order of retrived pixels (use "true" value to fetch points from the target to the source point) + */ + CV_WRAP void getContour(const Point& targetPt, OutputArray contour, bool backward = false) const; + +#ifndef CV_DOXYGEN + struct Impl; + inline Impl* getImpl() const { return impl.get(); } +protected: + std::shared_ptr impl; +#endif +}; + +//! @} + +} // namespace segmentation +} // namespace cv + +#endif // OPENCV_IMGPROC_SEGMENTATION_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/types_c.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/types_c.h new file mode 100644 index 0000000..d3e55f5 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/imgproc/types_c.h @@ -0,0 +1,659 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_IMGPROC_TYPES_C_H +#define OPENCV_IMGPROC_TYPES_C_H + +#include "opencv2/core/core_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @addtogroup imgproc_c + @{ +*/ + +/** Connected component structure */ +typedef struct CvConnectedComp +{ + double area; /** DBL_EPSILON ? 1./std::sqrt(am00) : 0; + } + operator cv::Moments() const + { + return cv::Moments(m00, m10, m01, m20, m11, m02, m30, m21, m12, m03); + } +#endif +} +CvMoments; + +#ifdef __cplusplus +} // extern "C" + +CV_INLINE CvMoments cvMoments() +{ +#if !defined(CV__ENABLE_C_API_CTORS) + CvMoments self = CV_STRUCT_INITIALIZER; return self; +#else + return CvMoments(); +#endif +} + +CV_INLINE CvMoments cvMoments(const cv::Moments& m) +{ +#if !defined(CV__ENABLE_C_API_CTORS) + double am00 = std::abs(m.m00); + CvMoments self = { + m.m00, m.m10, m.m01, m.m20, m.m11, m.m02, m.m30, m.m21, m.m12, m.m03, + m.mu20, m.mu11, m.mu02, m.mu30, m.mu21, m.mu12, m.mu03, + am00 > DBL_EPSILON ? 1./std::sqrt(am00) : 0 + }; + return self; +#else + return CvMoments(m); +#endif +} + +extern "C" { +#endif // __cplusplus + +/** Hu invariants */ +typedef struct CvHuMoments +{ + double hu1, hu2, hu3, hu4, hu5, hu6, hu7; /**< Hu invariants */ +} +CvHuMoments; + +/** Template matching methods */ +enum +{ + CV_TM_SQDIFF =0, + CV_TM_SQDIFF_NORMED =1, + CV_TM_CCORR =2, + CV_TM_CCORR_NORMED =3, + CV_TM_CCOEFF =4, + CV_TM_CCOEFF_NORMED =5 +}; + +typedef float (CV_CDECL * CvDistanceFunction)( const float* a, const float* b, void* user_param ); + +/** Contour retrieval modes */ +enum +{ + CV_RETR_EXTERNAL=0, + CV_RETR_LIST=1, + CV_RETR_CCOMP=2, + CV_RETR_TREE=3, + CV_RETR_FLOODFILL=4 +}; + +/** Contour approximation methods */ +enum +{ + CV_CHAIN_CODE=0, + CV_CHAIN_APPROX_NONE=1, + CV_CHAIN_APPROX_SIMPLE=2, + CV_CHAIN_APPROX_TC89_L1=3, + CV_CHAIN_APPROX_TC89_KCOS=4, + CV_LINK_RUNS=5 +}; + +/* +Internal structure that is used for sequential retrieving contours from the image. +It supports both hierarchical and plane variants of Suzuki algorithm. +*/ +typedef struct _CvContourScanner* CvContourScanner; + +/** Freeman chain reader state */ +typedef struct CvChainPtReader +{ + CV_SEQ_READER_FIELDS() + char code; + CvPoint pt; + schar deltas[8][2]; +} +CvChainPtReader; + +/** initializes 8-element array for fast access to 3x3 neighborhood of a pixel */ +#define CV_INIT_3X3_DELTAS( deltas, step, nch ) \ + ((deltas)[0] = (nch), (deltas)[1] = -(step) + (nch), \ + (deltas)[2] = -(step), (deltas)[3] = -(step) - (nch), \ + (deltas)[4] = -(nch), (deltas)[5] = (step) - (nch), \ + (deltas)[6] = (step), (deltas)[7] = (step) + (nch)) + + +/** Contour approximation algorithms */ +enum +{ + CV_POLY_APPROX_DP = 0 +}; + +/** Shape matching methods */ +enum +{ + CV_CONTOURS_MATCH_I1 =1, //!< \f[I_1(A,B) = \sum _{i=1...7} \left | \frac{1}{m^A_i} - \frac{1}{m^B_i} \right |\f] + CV_CONTOURS_MATCH_I2 =2, //!< \f[I_2(A,B) = \sum _{i=1...7} \left | m^A_i - m^B_i \right |\f] + CV_CONTOURS_MATCH_I3 =3 //!< \f[I_3(A,B) = \max _{i=1...7} \frac{ \left| m^A_i - m^B_i \right| }{ \left| m^A_i \right| }\f] +}; + +/** Shape orientation */ +enum +{ + CV_CLOCKWISE =1, + CV_COUNTER_CLOCKWISE =2 +}; + + +/** Convexity defect */ +typedef struct CvConvexityDefect +{ + CvPoint* start; /**< point of the contour where the defect begins */ + CvPoint* end; /**< point of the contour where the defect ends */ + CvPoint* depth_point; /**< the farthest from the convex hull point within the defect */ + float depth; /**< distance between the farthest point and the convex hull */ +} CvConvexityDefect; + + +/** Histogram comparison methods */ +enum +{ + CV_COMP_CORREL =0, + CV_COMP_CHISQR =1, + CV_COMP_INTERSECT =2, + CV_COMP_BHATTACHARYYA =3, + CV_COMP_HELLINGER =CV_COMP_BHATTACHARYYA, + CV_COMP_CHISQR_ALT =4, + CV_COMP_KL_DIV =5 +}; + +/** Mask size for distance transform */ +enum +{ + CV_DIST_MASK_3 =3, + CV_DIST_MASK_5 =5, + CV_DIST_MASK_PRECISE =0 +}; + +/** Content of output label array: connected components or pixels */ +enum +{ + CV_DIST_LABEL_CCOMP = 0, + CV_DIST_LABEL_PIXEL = 1 +}; + +/** Distance types for Distance Transform and M-estimators */ +enum +{ + CV_DIST_USER =-1, /**< User defined distance */ + CV_DIST_L1 =1, /**< distance = |x1-x2| + |y1-y2| */ + CV_DIST_L2 =2, /**< the simple euclidean distance */ + CV_DIST_C =3, /**< distance = max(|x1-x2|,|y1-y2|) */ + CV_DIST_L12 =4, /**< L1-L2 metric: distance = 2(sqrt(1+x*x/2) - 1)) */ + CV_DIST_FAIR =5, /**< distance = c^2(|x|/c-log(1+|x|/c)), c = 1.3998 */ + CV_DIST_WELSCH =6, /**< distance = c^2/2(1-exp(-(x/c)^2)), c = 2.9846 */ + CV_DIST_HUBER =7 /**< distance = |x| threshold ? max_value : 0 */ + CV_THRESH_BINARY_INV =1, /**< value = value > threshold ? 0 : max_value */ + CV_THRESH_TRUNC =2, /**< value = value > threshold ? threshold : value */ + CV_THRESH_TOZERO =3, /**< value = value > threshold ? value : 0 */ + CV_THRESH_TOZERO_INV =4, /**< value = value > threshold ? 0 : value */ + CV_THRESH_MASK =7, + CV_THRESH_OTSU =8, /**< use Otsu algorithm to choose the optimal threshold value; + combine the flag with one of the above CV_THRESH_* values */ + CV_THRESH_TRIANGLE =16 /**< use Triangle algorithm to choose the optimal threshold value; + combine the flag with one of the above CV_THRESH_* values, but not + with CV_THRESH_OTSU */ +}; + +/** Adaptive threshold methods */ +enum +{ + CV_ADAPTIVE_THRESH_MEAN_C =0, + CV_ADAPTIVE_THRESH_GAUSSIAN_C =1 +}; + +/** FloodFill flags */ +enum +{ + CV_FLOODFILL_FIXED_RANGE =(1 << 16), + CV_FLOODFILL_MASK_ONLY =(1 << 17) +}; + + +/** Canny edge detector flags */ +enum +{ + CV_CANNY_L2_GRADIENT =(1 << 31) +}; + +/** Variants of a Hough transform */ +enum +{ + CV_HOUGH_STANDARD =0, + CV_HOUGH_PROBABILISTIC =1, + CV_HOUGH_MULTI_SCALE =2, + CV_HOUGH_GRADIENT =3 +}; + + +/* Fast search data structures */ +struct CvFeatureTree; +struct CvLSH; +struct CvLSHOperations; + +/** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/opencv.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/opencv.hpp new file mode 100644 index 0000000..d17b94a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/opencv.hpp @@ -0,0 +1,95 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_ALL_HPP +#define OPENCV_ALL_HPP + +// File that defines what modules where included during the build of OpenCV +// These are purely the defines of the correct HAVE_OPENCV_modulename values +#include "opencv2/opencv_modules.hpp" + +// Then the list of defines is checked to include the correct headers +// Core library is always included --> without no OpenCV functionality available +#include "opencv2/core.hpp" + +// Then the optional modules are checked +#ifdef HAVE_OPENCV_CALIB3D +#include "opencv2/calib3d.hpp" +#endif +#ifdef HAVE_OPENCV_FEATURES2D +#include "opencv2/features2d.hpp" +#endif +#ifdef HAVE_OPENCV_DNN +#include "opencv2/dnn.hpp" +#endif +#ifdef HAVE_OPENCV_FLANN +#include "opencv2/flann.hpp" +#endif +#ifdef HAVE_OPENCV_HIGHGUI +#include "opencv2/highgui.hpp" +#endif +#ifdef HAVE_OPENCV_IMGCODECS +#include "opencv2/imgcodecs.hpp" +#endif +#ifdef HAVE_OPENCV_IMGPROC +#include "opencv2/imgproc.hpp" +#endif +#ifdef HAVE_OPENCV_ML +#include "opencv2/ml.hpp" +#endif +#ifdef HAVE_OPENCV_OBJDETECT +#include "opencv2/objdetect.hpp" +#endif +#ifdef HAVE_OPENCV_PHOTO +#include "opencv2/photo.hpp" +#endif +#ifdef HAVE_OPENCV_STITCHING +#include "opencv2/stitching.hpp" +#endif +#ifdef HAVE_OPENCV_VIDEO +#include "opencv2/video.hpp" +#endif +#ifdef HAVE_OPENCV_VIDEOIO +#include "opencv2/videoio.hpp" +#endif + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/opencv_modules.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/opencv_modules.hpp new file mode 100644 index 0000000..5fafd9c --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/opencv_modules.hpp @@ -0,0 +1,20 @@ +/* + * ** File generated automatically, do not modify ** + * + * This file defines the list of modules available in current build configuration + * + * +*/ + +// This definition means that OpenCV is built with enabled non-free code. +// For example, patented algorithms for non-profit/non-commercial use only. +/* #undef OPENCV_ENABLE_NONFREE */ + +#define HAVE_OPENCV_CORE +#define HAVE_OPENCV_FEATURES2D +#define HAVE_OPENCV_HIGHGUI +#define HAVE_OPENCV_IMGPROC +#define HAVE_OPENCV_PHOTO +#define HAVE_OPENCV_VIDEO + + diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo.hpp new file mode 100644 index 0000000..c2e89a3 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo.hpp @@ -0,0 +1,858 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_PHOTO_HPP +#define OPENCV_PHOTO_HPP + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +/** +@defgroup photo Computational Photography + +This module includes photo processing algorithms +@{ + @defgroup photo_inpaint Inpainting + @defgroup photo_denoise Denoising + @defgroup photo_hdr HDR imaging + +This section describes high dynamic range imaging algorithms namely tonemapping, exposure alignment, +camera calibration with multiple exposures and exposure fusion. + + @defgroup photo_decolor Contrast Preserving Decolorization + +Useful links: + +http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html + + @defgroup photo_clone Seamless Cloning + +Useful links: + +https://www.learnopencv.com/seamless-cloning-using-opencv-python-cpp + + @defgroup photo_render Non-Photorealistic Rendering + +Useful links: + +http://www.inf.ufrgs.br/~eslgastal/DomainTransform + +https://www.learnopencv.com/non-photorealistic-rendering-using-opencv-python-c/ + + @defgroup photo_c C API +@} + */ + +namespace cv +{ + +//! @addtogroup photo +//! @{ + +//! @addtogroup photo_inpaint +//! @{ +//! the inpainting algorithm +enum +{ + INPAINT_NS = 0, //!< Use Navier-Stokes based method + INPAINT_TELEA = 1 //!< Use the algorithm proposed by Alexandru Telea @cite Telea04 +}; + +/** @brief Restores the selected region in an image using the region neighborhood. + +@param src Input 8-bit, 16-bit unsigned or 32-bit float 1-channel or 8-bit 3-channel image. +@param inpaintMask Inpainting mask, 8-bit 1-channel image. Non-zero pixels indicate the area that +needs to be inpainted. +@param dst Output image with the same size and type as src . +@param inpaintRadius Radius of a circular neighborhood of each point inpainted that is considered +by the algorithm. +@param flags Inpainting method that could be cv::INPAINT_NS or cv::INPAINT_TELEA + +The function reconstructs the selected image area from the pixel near the area boundary. The +function may be used to remove dust and scratches from a scanned photo, or to remove undesirable +objects from still images or video. See for more details. + +@note + - An example using the inpainting technique can be found at + opencv_source_code/samples/cpp/inpaint.cpp + - (Python) An example using the inpainting technique can be found at + opencv_source_code/samples/python/inpaint.py + */ +CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask, + OutputArray dst, double inpaintRadius, int flags ); + +//! @} photo_inpaint + +//! @addtogroup photo_denoise +//! @{ + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Big h value perfectly removes noise but also +removes image details, smaller h value preserves details but also preserves some noise + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3, + int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in dst. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ +CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); + +/** @brief Modification of fastNlMeansDenoising function for colored images + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise +@param hColor The same as h but for color components. For most images value equals 10 +will be enough to remove colored noise and do not distort colors + +The function converts image to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using fastNlMeansDenoising function. + */ +CV_EXPORTS_W void fastNlMeansDenoisingColored( InputArray src, OutputArray dst, + float h = 3, float hColor = 3, + int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Modification of fastNlMeansDenoising function for images sequence where consecutive images have been +captured in small period of time. For example video. This version of the function is for grayscale +images or for manual manipulation with colorspaces. For more details see + + +@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or +4-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Bigger h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise + */ +CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Modification of fastNlMeansDenoising function for images sequence where consecutive images have been +captured in small period of time. For example video. This version of the function is for grayscale +images or for manual manipulation with colorspaces. For more details see + + +@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel, +2-channel, 3-channel or 4-channel images sequence. All images should +have the same type and size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Array of parameters regulating filter strength, either one +parameter applied to all channels or one per channel in dst. Big h value +perfectly removes noise but also removes image details, smaller h +value preserves details but also preserves some noise +@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1 + */ +CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + const std::vector& h, + int templateWindowSize = 7, int searchWindowSize = 21, + int normType = NORM_L2); + +/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences + +@param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise. +@param hColor The same as h but for color components. + +The function converts images to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using fastNlMeansDenoisingMulti function. + */ +CV_EXPORTS_W void fastNlMeansDenoisingColoredMulti( InputArrayOfArrays srcImgs, OutputArray dst, + int imgToDenoiseIndex, int temporalWindowSize, + float h = 3, float hColor = 3, + int templateWindowSize = 7, int searchWindowSize = 21); + +/** @brief Primal-dual algorithm is an algorithm for solving special types of variational problems (that is, +finding a function to minimize some functional). As the image denoising, in particular, may be seen +as the variational problem, primal-dual algorithm then can be used to perform denoising and this is +exactly what is implemented. + +It should be noted, that this implementation was taken from the July 2013 blog entry +@cite MA13 , which also contained (slightly more general) ready-to-use source code on Python. +Subsequently, that code was rewritten on C++ with the usage of openCV by Vadim Pisarevsky at the end +of July 2013 and finally it was slightly adapted by later authors. + +Although the thorough discussion and justification of the algorithm involved may be found in +@cite ChambolleEtAl, it might make sense to skim over it here, following @cite MA13 . To begin +with, we consider the 1-byte gray-level images as the functions from the rectangular domain of +pixels (it may be seen as set +\f$\left\{(x,y)\in\mathbb{N}\times\mathbb{N}\mid 1\leq x\leq n,\;1\leq y\leq m\right\}\f$ for some +\f$m,\;n\in\mathbb{N}\f$) into \f$\{0,1,\dots,255\}\f$. We shall denote the noised images as \f$f_i\f$ and with +this view, given some image \f$x\f$ of the same size, we may measure how bad it is by the formula + +\f[\left\|\left\|\nabla x\right\|\right\| + \lambda\sum_i\left\|\left\|x-f_i\right\|\right\|\f] + +\f$\|\|\cdot\|\|\f$ here denotes \f$L_2\f$-norm and as you see, the first addend states that we want our +image to be smooth (ideally, having zero gradient, thus being constant) and the second states that +we want our result to be close to the observations we've got. If we treat \f$x\f$ as a function, this is +exactly the functional what we seek to minimize and here the Primal-Dual algorithm comes into play. + +@param observations This array should contain one or more noised versions of the image that is to +be restored. +@param result Here the denoised image will be stored. There is no need to do pre-allocation of +storage space, as it will be automatically allocated, if necessary. +@param lambda Corresponds to \f$\lambda\f$ in the formulas above. As it is enlarged, the smooth +(blurred) images are treated more favorably than detailed (but maybe more noised) ones. Roughly +speaking, as it becomes smaller, the result will be more blur but more sever outliers will be +removed. +@param niters Number of iterations that the algorithm will run. Of course, as more iterations as +better, but it is hard to quantitatively refine this statement, so just use the default and +increase it if the results are poor. + */ +CV_EXPORTS_W void denoise_TVL1(const std::vector& observations,Mat& result, double lambda=1.0, int niters=30); + +//! @} photo_denoise + +//! @addtogroup photo_hdr +//! @{ + +enum { LDR_SIZE = 256 }; + +/** @brief Base class for tonemapping algorithms - tools that are used to map HDR image to 8-bit range. + */ +class CV_EXPORTS_W Tonemap : public Algorithm +{ +public: + /** @brief Tonemaps image + + @param src source image - CV_32FC3 Mat (float 32 bits 3 channels) + @param dst destination image - CV_32FC3 Mat with values in [0, 1] range + */ + CV_WRAP virtual void process(InputArray src, OutputArray dst) = 0; + + CV_WRAP virtual float getGamma() const = 0; + CV_WRAP virtual void setGamma(float gamma) = 0; +}; + +/** @brief Creates simple linear mapper with gamma correction + +@param gamma positive value for gamma correction. Gamma value of 1.0 implies no correction, gamma +equal to 2.2f is suitable for most displays. +Generally gamma \> 1 brightens the image and gamma \< 1 darkens it. + */ +CV_EXPORTS_W Ptr createTonemap(float gamma = 1.0f); + +/** @brief Adaptive logarithmic mapping is a fast global tonemapping algorithm that scales the image in +logarithmic domain. + +Since it's a global operator the same function is applied to all the pixels, it is controlled by the +bias parameter. + +Optional saturation enhancement is possible as described in @cite FL02 . + +For more information see @cite DM03 . + */ +class CV_EXPORTS_W TonemapDrago : public Tonemap +{ +public: + + CV_WRAP virtual float getSaturation() const = 0; + CV_WRAP virtual void setSaturation(float saturation) = 0; + + CV_WRAP virtual float getBias() const = 0; + CV_WRAP virtual void setBias(float bias) = 0; +}; + +/** @brief Creates TonemapDrago object + +@param gamma gamma value for gamma correction. See createTonemap +@param saturation positive saturation enhancement value. 1.0 preserves saturation, values greater +than 1 increase saturation and values less than 1 decrease it. +@param bias value for bias function in [0, 1] range. Values from 0.7 to 0.9 usually give best +results, default value is 0.85. + */ +CV_EXPORTS_W Ptr createTonemapDrago(float gamma = 1.0f, float saturation = 1.0f, float bias = 0.85f); + + +/** @brief This is a global tonemapping operator that models human visual system. + +Mapping function is controlled by adaptation parameter, that is computed using light adaptation and +color adaptation. + +For more information see @cite RD05 . + */ +class CV_EXPORTS_W TonemapReinhard : public Tonemap +{ +public: + CV_WRAP virtual float getIntensity() const = 0; + CV_WRAP virtual void setIntensity(float intensity) = 0; + + CV_WRAP virtual float getLightAdaptation() const = 0; + CV_WRAP virtual void setLightAdaptation(float light_adapt) = 0; + + CV_WRAP virtual float getColorAdaptation() const = 0; + CV_WRAP virtual void setColorAdaptation(float color_adapt) = 0; +}; + +/** @brief Creates TonemapReinhard object + +@param gamma gamma value for gamma correction. See createTonemap +@param intensity result intensity in [-8, 8] range. Greater intensity produces brighter results. +@param light_adapt light adaptation in [0, 1] range. If 1 adaptation is based only on pixel +value, if 0 it's global, otherwise it's a weighted mean of this two cases. +@param color_adapt chromatic adaptation in [0, 1] range. If 1 channels are treated independently, +if 0 adaptation level is the same for each channel. + */ +CV_EXPORTS_W Ptr +createTonemapReinhard(float gamma = 1.0f, float intensity = 0.0f, float light_adapt = 1.0f, float color_adapt = 0.0f); + +/** @brief This algorithm transforms image to contrast using gradients on all levels of gaussian pyramid, +transforms contrast values to HVS response and scales the response. After this the image is +reconstructed from new contrast values. + +For more information see @cite MM06 . + */ +class CV_EXPORTS_W TonemapMantiuk : public Tonemap +{ +public: + CV_WRAP virtual float getScale() const = 0; + CV_WRAP virtual void setScale(float scale) = 0; + + CV_WRAP virtual float getSaturation() const = 0; + CV_WRAP virtual void setSaturation(float saturation) = 0; +}; + +/** @brief Creates TonemapMantiuk object + +@param gamma gamma value for gamma correction. See createTonemap +@param scale contrast scale factor. HVS response is multiplied by this parameter, thus compressing +dynamic range. Values from 0.6 to 0.9 produce best results. +@param saturation saturation enhancement value. See createTonemapDrago + */ +CV_EXPORTS_W Ptr +createTonemapMantiuk(float gamma = 1.0f, float scale = 0.7f, float saturation = 1.0f); + +/** @brief The base class for algorithms that align images of the same scene with different exposures + */ +class CV_EXPORTS_W AlignExposures : public Algorithm +{ +public: + /** @brief Aligns images + + @param src vector of input images + @param dst vector of aligned images + @param times vector of exposure time values for each image + @param response 256x1 matrix with inverse camera response function for each pixel value, it should + have the same number of channels as images. + */ + CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst, + InputArray times, InputArray response) = 0; +}; + +/** @brief This algorithm converts images to median threshold bitmaps (1 for pixels brighter than median +luminance and 0 otherwise) and than aligns the resulting bitmaps using bit operations. + +It is invariant to exposure, so exposure values and camera response are not necessary. + +In this implementation new image regions are filled with zeros. + +For more information see @cite GW03 . + */ +class CV_EXPORTS_W AlignMTB : public AlignExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + + /** @brief Short version of process, that doesn't take extra arguments. + + @param src vector of input images + @param dst vector of aligned images + */ + CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst) = 0; + + /** @brief Calculates shift between two images, i. e. how to shift the second image to correspond it with the + first. + + @param img0 first image + @param img1 second image + */ + CV_WRAP virtual Point calculateShift(InputArray img0, InputArray img1) = 0; + /** @brief Helper function, that shift Mat filling new regions with zeros. + + @param src input image + @param dst result image + @param shift shift value + */ + CV_WRAP virtual void shiftMat(InputArray src, OutputArray dst, const Point shift) = 0; + /** @brief Computes median threshold and exclude bitmaps of given image. + + @param img input image + @param tb median threshold bitmap + @param eb exclude bitmap + */ + CV_WRAP virtual void computeBitmaps(InputArray img, OutputArray tb, OutputArray eb) = 0; + + CV_WRAP virtual int getMaxBits() const = 0; + CV_WRAP virtual void setMaxBits(int max_bits) = 0; + + CV_WRAP virtual int getExcludeRange() const = 0; + CV_WRAP virtual void setExcludeRange(int exclude_range) = 0; + + CV_WRAP virtual bool getCut() const = 0; + CV_WRAP virtual void setCut(bool value) = 0; +}; + +/** @brief Creates AlignMTB object + +@param max_bits logarithm to the base 2 of maximal shift in each dimension. Values of 5 and 6 are +usually good enough (31 and 63 pixels shift respectively). +@param exclude_range range for exclusion bitmap that is constructed to suppress noise around the +median value. +@param cut if true cuts images, otherwise fills the new regions with zeros. + */ +CV_EXPORTS_W Ptr createAlignMTB(int max_bits = 6, int exclude_range = 4, bool cut = true); + +/** @brief The base class for camera response calibration algorithms. + */ +class CV_EXPORTS_W CalibrateCRF : public Algorithm +{ +public: + /** @brief Recovers inverse camera response. + + @param src vector of input images + @param dst 256x1 matrix with inverse camera response function + @param times vector of exposure time values for each image + */ + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; +}; + +/** @brief Inverse camera response function is extracted for each brightness value by minimizing an objective +function as linear system. Objective function is constructed using pixel values on the same position +in all images, extra term is added to make the result smoother. + +For more information see @cite DM97 . + */ +class CV_EXPORTS_W CalibrateDebevec : public CalibrateCRF +{ +public: + CV_WRAP virtual float getLambda() const = 0; + CV_WRAP virtual void setLambda(float lambda) = 0; + + CV_WRAP virtual int getSamples() const = 0; + CV_WRAP virtual void setSamples(int samples) = 0; + + CV_WRAP virtual bool getRandom() const = 0; + CV_WRAP virtual void setRandom(bool random) = 0; +}; + +/** @brief Creates CalibrateDebevec object + +@param samples number of pixel locations to use +@param lambda smoothness term weight. Greater values produce smoother results, but can alter the +response. +@param random if true sample pixel locations are chosen at random, otherwise they form a +rectangular grid. + */ +CV_EXPORTS_W Ptr createCalibrateDebevec(int samples = 70, float lambda = 10.0f, bool random = false); + +/** @brief Inverse camera response function is extracted for each brightness value by minimizing an objective +function as linear system. This algorithm uses all image pixels. + +For more information see @cite RB99 . + */ +class CV_EXPORTS_W CalibrateRobertson : public CalibrateCRF +{ +public: + CV_WRAP virtual int getMaxIter() const = 0; + CV_WRAP virtual void setMaxIter(int max_iter) = 0; + + CV_WRAP virtual float getThreshold() const = 0; + CV_WRAP virtual void setThreshold(float threshold) = 0; + + CV_WRAP virtual Mat getRadiance() const = 0; +}; + +/** @brief Creates CalibrateRobertson object + +@param max_iter maximal number of Gauss-Seidel solver iterations. +@param threshold target difference between results of two successive steps of the minimization. + */ +CV_EXPORTS_W Ptr createCalibrateRobertson(int max_iter = 30, float threshold = 0.01f); + +/** @brief The base class algorithms that can merge exposure sequence to a single image. + */ +class CV_EXPORTS_W MergeExposures : public Algorithm +{ +public: + /** @brief Merges images. + + @param src vector of input images + @param dst result image + @param times vector of exposure time values for each image + @param response 256x1 matrix with inverse camera response function for each pixel value, it should + have the same number of channels as images. + */ + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) = 0; +}; + +/** @brief The resulting HDR image is calculated as weighted average of the exposures considering exposure +values and camera response. + +For more information see @cite DM97 . + */ +class CV_EXPORTS_W MergeDebevec : public MergeExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; +}; + +/** @brief Creates MergeDebevec object + */ +CV_EXPORTS_W Ptr createMergeDebevec(); + +/** @brief Pixels are weighted using contrast, saturation and well-exposedness measures, than images are +combined using laplacian pyramids. + +The resulting image weight is constructed as weighted average of contrast, saturation and +well-exposedness measures. + +The resulting image doesn't require tonemapping and can be converted to 8-bit image by multiplying +by 255, but it's recommended to apply gamma correction and/or linear tonemapping. + +For more information see @cite MK07 . + */ +class CV_EXPORTS_W MergeMertens : public MergeExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + /** @brief Short version of process, that doesn't take extra arguments. + + @param src vector of input images + @param dst result image + */ + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst) = 0; + + CV_WRAP virtual float getContrastWeight() const = 0; + CV_WRAP virtual void setContrastWeight(float contrast_weiht) = 0; + + CV_WRAP virtual float getSaturationWeight() const = 0; + CV_WRAP virtual void setSaturationWeight(float saturation_weight) = 0; + + CV_WRAP virtual float getExposureWeight() const = 0; + CV_WRAP virtual void setExposureWeight(float exposure_weight) = 0; +}; + +/** @brief Creates MergeMertens object + +@param contrast_weight contrast measure weight. See MergeMertens. +@param saturation_weight saturation measure weight +@param exposure_weight well-exposedness measure weight + */ +CV_EXPORTS_W Ptr +createMergeMertens(float contrast_weight = 1.0f, float saturation_weight = 1.0f, float exposure_weight = 0.0f); + +/** @brief The resulting HDR image is calculated as weighted average of the exposures considering exposure +values and camera response. + +For more information see @cite RB99 . + */ +class CV_EXPORTS_W MergeRobertson : public MergeExposures +{ +public: + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, + InputArray times, InputArray response) CV_OVERRIDE = 0; + CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; +}; + +/** @brief Creates MergeRobertson object + */ +CV_EXPORTS_W Ptr createMergeRobertson(); + +//! @} photo_hdr + +//! @addtogroup photo_decolor +//! @{ + +/** @brief Transforms a color image to a grayscale image. It is a basic tool in digital printing, stylized +black-and-white photograph rendering, and in many single channel image processing applications +@cite CL12 . + +@param src Input 8-bit 3-channel image. +@param grayscale Output 8-bit 1-channel image. +@param color_boost Output 8-bit 3-channel image. + +This function is to be applied on color images. + */ +CV_EXPORTS_W void decolor( InputArray src, OutputArray grayscale, OutputArray color_boost); + +//! @} photo_decolor + +//! @addtogroup photo_clone +//! @{ + + +//! seamlessClone algorithm flags +enum +{ + /** The power of the method is fully expressed when inserting objects with complex outlines into a new background*/ + NORMAL_CLONE = 1, + /** The classic method, color-based selection and alpha masking might be time consuming and often leaves an undesirable + halo. Seamless cloning, even averaged with the original image, is not effective. Mixed seamless cloning based on a loose selection proves effective.*/ + MIXED_CLONE = 2, + /** Monochrome transfer allows the user to easily replace certain features of one object by alternative features.*/ + MONOCHROME_TRANSFER = 3}; + + +/** @example samples/cpp/tutorial_code/photo/seamless_cloning/cloning_demo.cpp +An example using seamlessClone function +*/ +/** @brief Image editing tasks concern either global changes (color/intensity corrections, filters, +deformations) or local changes concerned to a selection. Here we are interested in achieving local +changes, ones that are restricted to a region manually selected (ROI), in a seamless and effortless +manner. The extent of the changes ranges from slight distortions to complete replacement by novel +content @cite PM03 . + +@param src Input 8-bit 3-channel image. +@param dst Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param p Point in dst image where object is placed. +@param blend Output image with the same size and type as dst. +@param flags Cloning method that could be cv::NORMAL_CLONE, cv::MIXED_CLONE or cv::MONOCHROME_TRANSFER + */ +CV_EXPORTS_W void seamlessClone( InputArray src, InputArray dst, InputArray mask, Point p, + OutputArray blend, int flags); + +/** @brief Given an original color image, two differently colored versions of this image can be mixed +seamlessly. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src . +@param red_mul R-channel multiply factor. +@param green_mul G-channel multiply factor. +@param blue_mul B-channel multiply factor. + +Multiplication factor is between .5 to 2.5. + */ +CV_EXPORTS_W void colorChange(InputArray src, InputArray mask, OutputArray dst, float red_mul = 1.0f, + float green_mul = 1.0f, float blue_mul = 1.0f); + +/** @brief Applying an appropriate non-linear transformation to the gradient field inside the selection and +then integrating back with a Poisson solver, modifies locally the apparent illumination of an image. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src. +@param alpha Value ranges between 0-2. +@param beta Value ranges between 0-2. + +This is useful to highlight under-exposed foreground objects or to reduce specular reflections. + */ +CV_EXPORTS_W void illuminationChange(InputArray src, InputArray mask, OutputArray dst, + float alpha = 0.2f, float beta = 0.4f); + +/** @brief By retaining only the gradients at edge locations, before integrating with the Poisson solver, one +washes out the texture of the selected region, giving its contents a flat aspect. Here Canny Edge %Detector is used. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src. +@param low_threshold %Range from 0 to 100. +@param high_threshold Value \> 100. +@param kernel_size The size of the Sobel kernel to be used. + +@note +The algorithm assumes that the color of the source image is close to that of the destination. This +assumption means that when the colors don't match, the source image color gets tinted toward the +color of the destination image. + */ +CV_EXPORTS_W void textureFlattening(InputArray src, InputArray mask, OutputArray dst, + float low_threshold = 30, float high_threshold = 45, + int kernel_size = 3); + +//! @} photo_clone + +//! @addtogroup photo_render +//! @{ + +//! Edge preserving filters +enum +{ + RECURS_FILTER = 1, //!< Recursive Filtering + NORMCONV_FILTER = 2 //!< Normalized Convolution Filtering +}; + +/** @brief Filtering is the fundamental operation in image and video processing. Edge-preserving smoothing +filters are used in many different applications @cite EM11 . + +@param src Input 8-bit 3-channel image. +@param dst Output 8-bit 3-channel image. +@param flags Edge preserving filters: cv::RECURS_FILTER or cv::NORMCONV_FILTER +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. + */ +CV_EXPORTS_W void edgePreservingFilter(InputArray src, OutputArray dst, int flags = 1, + float sigma_s = 60, float sigma_r = 0.4f); + +/** @brief This filter enhances the details of a particular image. + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. + */ +CV_EXPORTS_W void detailEnhance(InputArray src, OutputArray dst, float sigma_s = 10, + float sigma_r = 0.15f); + +/** @example samples/cpp/tutorial_code/photo/non_photorealistic_rendering/npr_demo.cpp +An example using non-photorealistic line drawing functions +*/ +/** @brief Pencil-like non-photorealistic line drawing + +@param src Input 8-bit 3-channel image. +@param dst1 Output 8-bit 1-channel image. +@param dst2 Output image with the same size and type as src. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. +@param shade_factor %Range between 0 to 0.1. + */ +CV_EXPORTS_W void pencilSketch(InputArray src, OutputArray dst1, OutputArray dst2, + float sigma_s = 60, float sigma_r = 0.07f, float shade_factor = 0.02f); + +/** @brief Stylization aims to produce digital imagery with a wide variety of effects not focused on +photorealism. Edge-aware filters are ideal for stylization, as they can abstract regions of low +contrast while preserving, or enhancing, high-contrast features. + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src. +@param sigma_s %Range between 0 to 200. +@param sigma_r %Range between 0 to 1. + */ +CV_EXPORTS_W void stylization(InputArray src, OutputArray dst, float sigma_s = 60, + float sigma_r = 0.45f); + +//! @} photo_render + +//! @} photo + +} // cv + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/cuda.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/cuda.hpp new file mode 100644 index 0000000..a2f3816 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/cuda.hpp @@ -0,0 +1,132 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_PHOTO_CUDA_HPP +#define OPENCV_PHOTO_CUDA_HPP + +#include "opencv2/core/cuda.hpp" + +namespace cv { namespace cuda { + +//! @addtogroup photo_denoise +//! @{ + +/** @brief Performs pure non local means denoising without any simplification, and thus it is not fast. + +@param src Source image. Supports only CV_8UC1, CV_8UC2 and CV_8UC3. +@param dst Destination image. +@param h Filter sigma regulating filter strength for color. +@param search_window Size of search window. +@param block_size Size of block used for computing weights. +@param borderMode Border type. See borderInterpolate for details. BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param stream Stream for the asynchronous version. + +@sa + fastNlMeansDenoising + */ +CV_EXPORTS void nonLocalMeans(InputArray src, OutputArray dst, + float h, + int search_window = 21, + int block_size = 7, + int borderMode = BORDER_DEFAULT, + Stream& stream = Stream::Null()); + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit 1-channel, 2-channel or 3-channel image. +@param dst Output image with the same size and type as src . +@param h Parameter regulating filter strength. Big h value perfectly removes noise but also +removes image details, smaller h value preserves details but also preserves some noise +@param search_window Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater search_window - greater +denoising time. Recommended value 21 pixels +@param block_size Size in pixels of the template patch that is used to compute weights. Should be +odd. Recommended value 7 pixels +@param stream Stream for the asynchronous invocations. + +This function expected to be applied to grayscale images. For colored images look at +FastNonLocalMeansDenoising::labMethod. + +@sa + fastNlMeansDenoising + */ +CV_EXPORTS void fastNlMeansDenoising(InputArray src, OutputArray dst, + float h, + int search_window = 21, + int block_size = 7, + Stream& stream = Stream::Null()); + +/** @brief Modification of fastNlMeansDenoising function for colored images + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src . +@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but +also removes image details, smaller h value preserves details but also preserves some noise +@param photo_render float The same as h but for color components. For most images value equals 10 will be +enough to remove colored noise and do not distort colors +@param search_window Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater search_window - greater +denoising time. Recommended value 21 pixels +@param block_size Size in pixels of the template patch that is used to compute weights. Should be +odd. Recommended value 7 pixels +@param stream Stream for the asynchronous invocations. + +The function converts image to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using FastNonLocalMeansDenoising::simpleMethod function. + +@sa + fastNlMeansDenoisingColored + */ +CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst, + float h_luminance, float photo_render, + int search_window = 21, + int block_size = 7, + Stream& stream = Stream::Null()); + +//! @} photo + +}} // namespace cv { namespace cuda { + +#endif /* OPENCV_PHOTO_CUDA_HPP */ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/legacy/constants_c.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/legacy/constants_c.h new file mode 100644 index 0000000..ec1d440 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/legacy/constants_c.h @@ -0,0 +1,14 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_PHOTO_LEGACY_CONSTANTS_H +#define OPENCV_PHOTO_LEGACY_CONSTANTS_H + +enum InpaintingModes +{ + CV_INPAINT_NS =0, + CV_INPAINT_TELEA =1 +}; + +#endif // OPENCV_PHOTO_LEGACY_CONSTANTS_H diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/photo.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/photo.hpp new file mode 100644 index 0000000..8af5e9f --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/photo/photo.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/photo.hpp" diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video.hpp new file mode 100644 index 0000000..cc6ed7a --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video.hpp @@ -0,0 +1,59 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_VIDEO_HPP +#define OPENCV_VIDEO_HPP + +/** + @defgroup video Video Analysis + @{ + @defgroup video_motion Motion Analysis + @defgroup video_track Result_Object Tracking + @defgroup video_c C API + @} +*/ + +#include "opencv2/video/tracking.hpp" +#include "opencv2/video/background_segm.hpp" + +#endif //OPENCV_VIDEO_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/background_segm.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/background_segm.hpp new file mode 100644 index 0000000..e1dfa15 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/background_segm.hpp @@ -0,0 +1,317 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_BACKGROUND_SEGM_HPP +#define OPENCV_BACKGROUND_SEGM_HPP + +#include "opencv2/core.hpp" + +namespace cv +{ + +//! @addtogroup video_motion +//! @{ + +/** @brief Base class for background/foreground segmentation. : + +The class is only used to define the common interface for the whole family of background/foreground +segmentation algorithms. + */ +class CV_EXPORTS_W BackgroundSubtractor : public Algorithm +{ +public: + /** @brief Computes a foreground mask. + + @param image Next video frame. + @param fgmask The output foreground mask as an 8-bit binary image. + @param learningRate The value between 0 and 1 that indicates how fast the background model is + learnt. Negative parameter value makes the algorithm to use some automatically chosen learning + rate. 0 means that the background model is not updated at all, 1 means that the background model + is completely reinitialized from the last frame. + */ + CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) = 0; + + /** @brief Computes a background image. + + @param backgroundImage The output background image. + + @note Sometimes the background image can be very blurry, as it contain the average background + statistics. + */ + CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const = 0; +}; + + +/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm. + +The class implements the Gaussian mixture model background subtraction described in @cite Zivkovic2004 +and @cite Zivkovic2006 . + */ +class CV_EXPORTS_W BackgroundSubtractorMOG2 : public BackgroundSubtractor +{ +public: + /** @brief Returns the number of last frames that affect the background model + */ + CV_WRAP virtual int getHistory() const = 0; + /** @brief Sets the number of last frames that affect the background model + */ + CV_WRAP virtual void setHistory(int history) = 0; + + /** @brief Returns the number of gaussian components in the background model + */ + CV_WRAP virtual int getNMixtures() const = 0; + /** @brief Sets the number of gaussian components in the background model. + + The model needs to be reinitalized to reserve memory. + */ + CV_WRAP virtual void setNMixtures(int nmixtures) = 0;//needs reinitialization! + + /** @brief Returns the "background ratio" parameter of the algorithm + + If a foreground pixel keeps semi-constant value for about backgroundRatio\*history frames, it's + considered background and added to the model as a center of a new component. It corresponds to TB + parameter in the paper. + */ + CV_WRAP virtual double getBackgroundRatio() const = 0; + /** @brief Sets the "background ratio" parameter of the algorithm + */ + CV_WRAP virtual void setBackgroundRatio(double ratio) = 0; + + /** @brief Returns the variance threshold for the pixel-model match + + The main threshold on the squared Mahalanobis distance to decide if the sample is well described by + the background model or not. Related to Cthr from the paper. + */ + CV_WRAP virtual double getVarThreshold() const = 0; + /** @brief Sets the variance threshold for the pixel-model match + */ + CV_WRAP virtual void setVarThreshold(double varThreshold) = 0; + + /** @brief Returns the variance threshold for the pixel-model match used for new mixture component generation + + Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the + existing components (corresponds to Tg in the paper). If a pixel is not close to any component, it + is considered foreground or added as a new component. 3 sigma =\> Tg=3\*3=9 is default. A smaller Tg + value generates more components. A higher Tg value may result in a small number of components but + they can grow too large. + */ + CV_WRAP virtual double getVarThresholdGen() const = 0; + /** @brief Sets the variance threshold for the pixel-model match used for new mixture component generation + */ + CV_WRAP virtual void setVarThresholdGen(double varThresholdGen) = 0; + + /** @brief Returns the initial variance of each gaussian component + */ + CV_WRAP virtual double getVarInit() const = 0; + /** @brief Sets the initial variance of each gaussian component + */ + CV_WRAP virtual void setVarInit(double varInit) = 0; + + CV_WRAP virtual double getVarMin() const = 0; + CV_WRAP virtual void setVarMin(double varMin) = 0; + + CV_WRAP virtual double getVarMax() const = 0; + CV_WRAP virtual void setVarMax(double varMax) = 0; + + /** @brief Returns the complexity reduction threshold + + This parameter defines the number of samples needed to accept to prove the component exists. CT=0.05 + is a default value for all the samples. By setting CT=0 you get an algorithm very similar to the + standard Stauffer&Grimson algorithm. + */ + CV_WRAP virtual double getComplexityReductionThreshold() const = 0; + /** @brief Sets the complexity reduction threshold + */ + CV_WRAP virtual void setComplexityReductionThreshold(double ct) = 0; + + /** @brief Returns the shadow detection flag + + If true, the algorithm detects shadows and marks them. See createBackgroundSubtractorMOG2 for + details. + */ + CV_WRAP virtual bool getDetectShadows() const = 0; + /** @brief Enables or disables shadow detection + */ + CV_WRAP virtual void setDetectShadows(bool detectShadows) = 0; + + /** @brief Returns the shadow value + + Shadow value is the value used to mark shadows in the foreground mask. Default value is 127. Value 0 + in the mask always means background, 255 means foreground. + */ + CV_WRAP virtual int getShadowValue() const = 0; + /** @brief Sets the shadow value + */ + CV_WRAP virtual void setShadowValue(int value) = 0; + + /** @brief Returns the shadow threshold + + A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in + the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel + is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiara, + *Detecting Moving Shadows...*, IEEE PAMI,2003. + */ + CV_WRAP virtual double getShadowThreshold() const = 0; + /** @brief Sets the shadow threshold + */ + CV_WRAP virtual void setShadowThreshold(double threshold) = 0; + + /** @brief Computes a foreground mask. + + @param image Next video frame. Floating point frame will be used without scaling and should be in range \f$[0,255]\f$. + @param fgmask The output foreground mask as an 8-bit binary image. + @param learningRate The value between 0 and 1 that indicates how fast the background model is + learnt. Negative parameter value makes the algorithm to use some automatically chosen learning + rate. 0 means that the background model is not updated at all, 1 means that the background model + is completely reinitialized from the last frame. + */ + CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0; +}; + +/** @brief Creates MOG2 Background Subtractor + +@param history Length of the history. +@param varThreshold Threshold on the squared Mahalanobis distance between the pixel and the model +to decide whether a pixel is well described by the background model. This parameter does not +affect the background update. +@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the +speed a bit, so if you do not need this feature, set the parameter to false. + */ +CV_EXPORTS_W Ptr + createBackgroundSubtractorMOG2(int history=500, double varThreshold=16, + bool detectShadows=true); + +/** @brief K-nearest neighbours - based Background/Foreground Segmentation Algorithm. + +The class implements the K-nearest neighbours background subtraction described in @cite Zivkovic2006 . +Very efficient if number of foreground pixels is low. + */ +class CV_EXPORTS_W BackgroundSubtractorKNN : public BackgroundSubtractor +{ +public: + /** @brief Returns the number of last frames that affect the background model + */ + CV_WRAP virtual int getHistory() const = 0; + /** @brief Sets the number of last frames that affect the background model + */ + CV_WRAP virtual void setHistory(int history) = 0; + + /** @brief Returns the number of data samples in the background model + */ + CV_WRAP virtual int getNSamples() const = 0; + /** @brief Sets the number of data samples in the background model. + + The model needs to be reinitalized to reserve memory. + */ + CV_WRAP virtual void setNSamples(int _nN) = 0;//needs reinitialization! + + /** @brief Returns the threshold on the squared distance between the pixel and the sample + + The threshold on the squared distance between the pixel and the sample to decide whether a pixel is + close to a data sample. + */ + CV_WRAP virtual double getDist2Threshold() const = 0; + /** @brief Sets the threshold on the squared distance + */ + CV_WRAP virtual void setDist2Threshold(double _dist2Threshold) = 0; + + /** @brief Returns the number of neighbours, the k in the kNN. + + K is the number of samples that need to be within dist2Threshold in order to decide that that + pixel is matching the kNN background model. + */ + CV_WRAP virtual int getkNNSamples() const = 0; + /** @brief Sets the k in the kNN. How many nearest neighbours need to match. + */ + CV_WRAP virtual void setkNNSamples(int _nkNN) = 0; + + /** @brief Returns the shadow detection flag + + If true, the algorithm detects shadows and marks them. See createBackgroundSubtractorKNN for + details. + */ + CV_WRAP virtual bool getDetectShadows() const = 0; + /** @brief Enables or disables shadow detection + */ + CV_WRAP virtual void setDetectShadows(bool detectShadows) = 0; + + /** @brief Returns the shadow value + + Shadow value is the value used to mark shadows in the foreground mask. Default value is 127. Value 0 + in the mask always means background, 255 means foreground. + */ + CV_WRAP virtual int getShadowValue() const = 0; + /** @brief Sets the shadow value + */ + CV_WRAP virtual void setShadowValue(int value) = 0; + + /** @brief Returns the shadow threshold + + A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in + the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel + is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiara, + *Detecting Moving Shadows...*, IEEE PAMI,2003. + */ + CV_WRAP virtual double getShadowThreshold() const = 0; + /** @brief Sets the shadow threshold + */ + CV_WRAP virtual void setShadowThreshold(double threshold) = 0; +}; + +/** @brief Creates KNN Background Subtractor + +@param history Length of the history. +@param dist2Threshold Threshold on the squared distance between the pixel and the sample to decide +whether a pixel is close to that sample. This parameter does not affect the background update. +@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the +speed a bit, so if you do not need this feature, set the parameter to false. + */ +CV_EXPORTS_W Ptr + createBackgroundSubtractorKNN(int history=500, double dist2Threshold=400.0, + bool detectShadows=true); + +//! @} video_motion + +} // cv + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/detail/tracking.detail.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/detail/tracking.detail.hpp new file mode 100644 index 0000000..07e369b --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/detail/tracking.detail.hpp @@ -0,0 +1,406 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_VIDEO_DETAIL_TRACKING_HPP +#define OPENCV_VIDEO_DETAIL_TRACKING_HPP + +/* + * Partially based on: + * ==================================================================================================================== + * - [AAM] S. Salti, A. Cavallaro, L. Di Stefano, Adaptive Appearance Modeling for Video Tracking: Survey and Evaluation + * - [AMVOT] X. Li, W. Hu, C. Shen, Z. Zhang, A. Dick, A. van den Hengel, A Survey of Appearance Models in Visual Result_Object Tracking + * + * This Tracking API has been designed with PlantUML. If you modify this API please change UML files under modules/tracking/doc/uml + * + */ + +#include "opencv2/core.hpp" + +namespace cv { +namespace detail { +inline namespace tracking { + +/** @addtogroup tracking_detail +@{ +*/ + +/************************************ TrackerFeature Base Classes ************************************/ + +/** @brief Abstract base class for TrackerFeature that represents the feature. +*/ +class CV_EXPORTS TrackerFeature +{ +public: + virtual ~TrackerFeature(); + + /** @brief Compute the features in the images collection + @param images The images + @param response The output response + */ + void compute(const std::vector& images, Mat& response); + +protected: + virtual bool computeImpl(const std::vector& images, Mat& response) = 0; +}; + +/** @brief Class that manages the extraction and selection of features + +@cite AAM Feature Extraction and Feature Set Refinement (Feature Processing and Feature Selection). +See table I and section III C @cite AMVOT Appearance modelling -\> Visual representation (Table II, +section 3.1 - 3.2) + +TrackerFeatureSet is an aggregation of TrackerFeature + +@sa + TrackerFeature + +*/ +class CV_EXPORTS TrackerFeatureSet +{ +public: + TrackerFeatureSet(); + + ~TrackerFeatureSet(); + + /** @brief Extract features from the images collection + @param images The input images + */ + void extraction(const std::vector& images); + + /** @brief Add TrackerFeature in the collection. Return true if TrackerFeature is added, false otherwise + @param feature The TrackerFeature class + */ + bool addTrackerFeature(const Ptr& feature); + + /** @brief Get the TrackerFeature collection (TrackerFeature name, TrackerFeature pointer) + */ + const std::vector>& getTrackerFeatures() const; + + /** @brief Get the responses + @note Be sure to call extraction before getResponses Example TrackerFeatureSet::getResponses + */ + const std::vector& getResponses() const; + +private: + void clearResponses(); + bool blockAddTrackerFeature; + + std::vector> features; // list of features + std::vector responses; // list of response after compute +}; + +/************************************ TrackerSampler Base Classes ************************************/ + +/** @brief Abstract base class for TrackerSamplerAlgorithm that represents the algorithm for the specific +sampler. +*/ +class CV_EXPORTS TrackerSamplerAlgorithm +{ +public: + virtual ~TrackerSamplerAlgorithm(); + + /** @brief Computes the regions starting from a position in an image. + + Return true if samples are computed, false otherwise + + @param image The current frame + @param boundingBox The bounding box from which regions can be calculated + + @param sample The computed samples @cite AAM Fig. 1 variable Sk + */ + virtual bool sampling(const Mat& image, const Rect& boundingBox, std::vector& sample) = 0; +}; + +/** + * \brief Class that manages the sampler in order to select regions for the update the model of the tracker + * [AAM] Sampling e Labeling. See table I and section III B + */ + +/** @brief Class that manages the sampler in order to select regions for the update the model of the tracker + +@cite AAM Sampling e Labeling. See table I and section III B + +TrackerSampler is an aggregation of TrackerSamplerAlgorithm +@sa + TrackerSamplerAlgorithm + */ +class CV_EXPORTS TrackerSampler +{ +public: + TrackerSampler(); + + ~TrackerSampler(); + + /** @brief Computes the regions starting from a position in an image + @param image The current frame + @param boundingBox The bounding box from which regions can be calculated + */ + void sampling(const Mat& image, Rect boundingBox); + + /** @brief Return the collection of the TrackerSamplerAlgorithm + */ + const std::vector>& getSamplers() const; + + /** @brief Return the samples from all TrackerSamplerAlgorithm, @cite AAM Fig. 1 variable Sk + */ + const std::vector& getSamples() const; + + /** @brief Add TrackerSamplerAlgorithm in the collection. Return true if sampler is added, false otherwise + @param sampler The TrackerSamplerAlgorithm + */ + bool addTrackerSamplerAlgorithm(const Ptr& sampler); + +private: + std::vector> samplers; + std::vector samples; + bool blockAddTrackerSampler; + + void clearSamples(); +}; + +/************************************ TrackerModel Base Classes ************************************/ + +/** @brief Abstract base class for TrackerTargetState that represents a possible state of the target. + +See @cite AAM \f$\hat{x}^{i}_{k}\f$ all the states candidates. + +Inherits this class with your Target state, In own implementation you can add scale variation, +width, height, orientation, etc. +*/ +class CV_EXPORTS TrackerTargetState +{ +public: + virtual ~TrackerTargetState() {}; + /** @brief Get the position + * @return The position + */ + Point2f getTargetPosition() const; + + /** @brief Set the position + * @param position The position + */ + void setTargetPosition(const Point2f& position); + /** @brief Get the width of the target + * @return The width of the target + */ + int getTargetWidth() const; + + /** @brief Set the width of the target + * @param width The width of the target + */ + void setTargetWidth(int width); + /** @brief Get the height of the target + * @return The height of the target + */ + int getTargetHeight() const; + + /** @brief Set the height of the target + * @param height The height of the target + */ + void setTargetHeight(int height); + +protected: + Point2f targetPosition; + int targetWidth; + int targetHeight; +}; + +/** @brief Represents the model of the target at frame \f$k\f$ (all states and scores) + +See @cite AAM The set of the pair \f$\langle \hat{x}^{i}_{k}, C^{i}_{k} \rangle\f$ +@sa TrackerTargetState +*/ +typedef std::vector, float>> ConfidenceMap; + +/** @brief Represents the estimate states for all frames + +@cite AAM \f$x_{k}\f$ is the trajectory of the target up to time \f$k\f$ + +@sa TrackerTargetState +*/ +typedef std::vector> Trajectory; + +/** @brief Abstract base class for TrackerStateEstimator that estimates the most likely target state. + +See @cite AAM State estimator + +See @cite AMVOT Statistical modeling (Fig. 3), Table III (generative) - IV (discriminative) - V (hybrid) +*/ +class CV_EXPORTS TrackerStateEstimator +{ +public: + virtual ~TrackerStateEstimator(); + + /** @brief Estimate the most likely target state, return the estimated state + @param confidenceMaps The overall appearance model as a list of :cConfidenceMap + */ + Ptr estimate(const std::vector& confidenceMaps); + + /** @brief Update the ConfidenceMap with the scores + @param confidenceMaps The overall appearance model as a list of :cConfidenceMap + */ + void update(std::vector& confidenceMaps); + + /** @brief Create TrackerStateEstimator by tracker state estimator type + @param trackeStateEstimatorType The TrackerStateEstimator name + + The modes available now: + + - "BOOSTING" -- Boosting-based discriminative appearance models. See @cite AMVOT section 4.4 + + The modes available soon: + + - "SVM" -- SVM-based discriminative appearance models. See @cite AMVOT section 4.5 + */ + static Ptr create(const String& trackeStateEstimatorType); + + /** @brief Get the name of the specific TrackerStateEstimator + */ + String getClassName() const; + +protected: + virtual Ptr estimateImpl(const std::vector& confidenceMaps) = 0; + virtual void updateImpl(std::vector& confidenceMaps) = 0; + String className; +}; + +/** @brief Abstract class that represents the model of the target. + +It must be instantiated by specialized tracker + +See @cite AAM Ak + +Inherits this with your TrackerModel +*/ +class CV_EXPORTS TrackerModel +{ +public: + TrackerModel(); + + virtual ~TrackerModel(); + + /** @brief Set TrackerEstimator, return true if the tracker state estimator is added, false otherwise + @param trackerStateEstimator The TrackerStateEstimator + @note You can add only one TrackerStateEstimator + */ + bool setTrackerStateEstimator(Ptr trackerStateEstimator); + + /** @brief Estimate the most likely target location + + @cite AAM ME, Model Estimation table I + @param responses Features extracted from TrackerFeatureSet + */ + void modelEstimation(const std::vector& responses); + + /** @brief Update the model + + @cite AAM MU, Model Update table I + */ + void modelUpdate(); + + /** @brief Run the TrackerStateEstimator, return true if is possible to estimate a new state, false otherwise + */ + bool runStateEstimator(); + + /** @brief Set the current TrackerTargetState in the Trajectory + @param lastTargetState The current TrackerTargetState + */ + void setLastTargetState(const Ptr& lastTargetState); + + /** @brief Get the last TrackerTargetState from Trajectory + */ + Ptr getLastTargetState() const; + + /** @brief Get the list of the ConfidenceMap + */ + const std::vector& getConfidenceMaps() const; + + /** @brief Get the last ConfidenceMap for the current frame + */ + const ConfidenceMap& getLastConfidenceMap() const; + + /** @brief Get the TrackerStateEstimator + */ + Ptr getTrackerStateEstimator() const; + +private: + void clearCurrentConfidenceMap(); + +protected: + std::vector confidenceMaps; + Ptr stateEstimator; + ConfidenceMap currentConfidenceMap; + Trajectory trajectory; + int maxCMLength; + + virtual void modelEstimationImpl(const std::vector& responses) = 0; + virtual void modelUpdateImpl() = 0; +}; + +/************************************ Specific TrackerStateEstimator Classes ************************************/ + +// None + +/************************************ Specific TrackerSamplerAlgorithm Classes ************************************/ + +/** @brief TrackerSampler based on CSC (current state centered), used by MIL algorithm TrackerMIL + */ +class CV_EXPORTS TrackerSamplerCSC : public TrackerSamplerAlgorithm +{ +public: + ~TrackerSamplerCSC(); + + enum MODE + { + MODE_INIT_POS = 1, //!< mode for init positive samples + MODE_INIT_NEG = 2, //!< mode for init negative samples + MODE_TRACK_POS = 3, //!< mode for update positive samples + MODE_TRACK_NEG = 4, //!< mode for update negative samples + MODE_DETECT = 5 //!< mode for detect samples + }; + + struct CV_EXPORTS Params + { + Params(); + float initInRad; //!< radius for gathering positive instances during init + float trackInPosRad; //!< radius for gathering positive instances during tracking + float searchWinSize; //!< size of search window + int initMaxNegNum; //!< # negative samples to use during init + int trackMaxPosNum; //!< # positive samples to use during training + int trackMaxNegNum; //!< # negative samples to use during training + }; + + /** @brief Constructor + @param parameters TrackerSamplerCSC parameters TrackerSamplerCSC::Params + */ + TrackerSamplerCSC(const TrackerSamplerCSC::Params& parameters = TrackerSamplerCSC::Params()); + + /** @brief Set the sampling mode of TrackerSamplerCSC + @param samplingMode The sampling mode + + The modes are: + + - "MODE_INIT_POS = 1" -- for the positive sampling in initialization step + - "MODE_INIT_NEG = 2" -- for the negative sampling in initialization step + - "MODE_TRACK_POS = 3" -- for the positive sampling in update step + - "MODE_TRACK_NEG = 4" -- for the negative sampling in update step + - "MODE_DETECT = 5" -- for the sampling in detection step + */ + void setMode(int samplingMode); + + bool sampling(const Mat& image, const Rect& boundingBox, std::vector& sample) CV_OVERRIDE; + +private: + Params params; + int mode; + RNG rng; + + std::vector sampleImage(const Mat& img, int x, int y, int w, int h, float inrad, float outrad = 0, int maxnum = 1000000); +}; + +//! @} + +}}} // namespace cv::detail::tracking + +#endif // OPENCV_VIDEO_DETAIL_TRACKING_HPP diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/legacy/constants_c.h b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/legacy/constants_c.h new file mode 100644 index 0000000..1a98f52 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/legacy/constants_c.h @@ -0,0 +1,16 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_VIDEO_LEGACY_CONSTANTS_H +#define OPENCV_VIDEO_LEGACY_CONSTANTS_H + +enum +{ + CV_LKFLOW_PYR_A_READY = 1, + CV_LKFLOW_PYR_B_READY = 2, + CV_LKFLOW_INITIAL_GUESSES = 4, + CV_LKFLOW_GET_MIN_EIGENVALS = 8 +}; + +#endif // OPENCV_VIDEO_LEGACY_CONSTANTS_H diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/tracking.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/tracking.hpp new file mode 100644 index 0000000..5eea2c1 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/tracking.hpp @@ -0,0 +1,857 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_TRACKING_HPP +#define OPENCV_TRACKING_HPP + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +namespace cv +{ + +//! @addtogroup video_track +//! @{ + +enum { OPTFLOW_USE_INITIAL_FLOW = 4, + OPTFLOW_LK_GET_MIN_EIGENVALS = 8, + OPTFLOW_FARNEBACK_GAUSSIAN = 256 + }; + +/** @brief Finds an object center, size, and orientation. + +@param probImage Back projection of the object histogram. See calcBackProject. +@param window Initial search window. +@param criteria Stop criteria for the underlying meanShift. +returns +(in old interfaces) Number of iterations CAMSHIFT took to converge +The function implements the CAMSHIFT object tracking algorithm @cite Bradski98 . First, it finds an +object center using meanShift and then adjusts the window size and finds the optimal rotation. The +function returns the rotated rectangle structure that includes the object position, size, and +orientation. The next position of the search window can be obtained with RotatedRect::boundingRect() + +See the OpenCV sample camshiftdemo.c that tracks colored objects. + +@note +- (Python) A sample explaining the camshift tracking algorithm can be found at + opencv_source_code/samples/python/camshift.py + */ +CV_EXPORTS_W RotatedRect CamShift( InputArray probImage, CV_IN_OUT Rect& window, + TermCriteria criteria ); +/** @example samples/cpp/camshiftdemo.cpp +An example using the mean-shift tracking algorithm +*/ + +/** @brief Finds an object on a back projection image. + +@param probImage Back projection of the object histogram. See calcBackProject for details. +@param window Initial search window. +@param criteria Stop criteria for the iterative search algorithm. +returns +: Number of iterations CAMSHIFT took to converge. +The function implements the iterative object search algorithm. It takes the input back projection of +an object and the initial position. The mass center in window of the back projection image is +computed and the search window center shifts to the mass center. The procedure is repeated until the +specified number of iterations criteria.maxCount is done or until the window center shifts by less +than criteria.epsilon. The algorithm is used inside CamShift and, unlike CamShift , the search +window size or orientation do not change during the search. You can simply pass the output of +calcBackProject to this function. But better results can be obtained if you pre-filter the back +projection and remove the noise. For example, you can do this by retrieving connected components +with findContours , throwing away contours with small area ( contourArea ), and rendering the +remaining contours with drawContours. + + */ +CV_EXPORTS_W int meanShift( InputArray probImage, CV_IN_OUT Rect& window, TermCriteria criteria ); + +/** @brief Constructs the image pyramid which can be passed to calcOpticalFlowPyrLK. + +@param img 8-bit input image. +@param pyramid output pyramid. +@param winSize window size of optical flow algorithm. Must be not less than winSize argument of +calcOpticalFlowPyrLK. It is needed to calculate required padding for pyramid levels. +@param maxLevel 0-based maximal pyramid level number. +@param withDerivatives set to precompute gradients for the every pyramid level. If pyramid is +constructed without the gradients then calcOpticalFlowPyrLK will calculate them internally. +@param pyrBorder the border mode for pyramid layers. +@param derivBorder the border mode for gradients. +@param tryReuseInputImage put ROI of input image into the pyramid if possible. You can pass false +to force data copying. +@return number of levels in constructed pyramid. Can be less than maxLevel. + */ +CV_EXPORTS_W int buildOpticalFlowPyramid( InputArray img, OutputArrayOfArrays pyramid, + Size winSize, int maxLevel, bool withDerivatives = true, + int pyrBorder = BORDER_REFLECT_101, + int derivBorder = BORDER_CONSTANT, + bool tryReuseInputImage = true ); + +/** @example samples/cpp/lkdemo.cpp +An example using the Lucas-Kanade optical flow algorithm +*/ + +/** @brief Calculates an optical flow for a sparse feature set using the iterative Lucas-Kanade method with +pyramids. + +@param prevImg first 8-bit input image or pyramid constructed by buildOpticalFlowPyramid. +@param nextImg second input image or pyramid of the same size and the same type as prevImg. +@param prevPts vector of 2D points for which the flow needs to be found; point coordinates must be +single-precision floating-point numbers. +@param nextPts output vector of 2D points (with single-precision floating-point coordinates) +containing the calculated new positions of input features in the second image; when +OPTFLOW_USE_INITIAL_FLOW flag is passed, the vector must have the same size as in the input. +@param status output status vector (of unsigned chars); each element of the vector is set to 1 if +the flow for the corresponding features has been found, otherwise, it is set to 0. +@param err output vector of errors; each element of the vector is set to an error for the +corresponding feature, type of the error measure can be set in flags parameter; if the flow wasn't +found then the error is not defined (use the status parameter to find such cases). +@param winSize size of the search window at each pyramid level. +@param maxLevel 0-based maximal pyramid level number; if set to 0, pyramids are not used (single +level), if set to 1, two levels are used, and so on; if pyramids are passed to input then +algorithm will use as many levels as pyramids have but no more than maxLevel. +@param criteria parameter, specifying the termination criteria of the iterative search algorithm +(after the specified maximum number of iterations criteria.maxCount or when the search window +moves by less than criteria.epsilon. +@param flags operation flags: + - **OPTFLOW_USE_INITIAL_FLOW** uses initial estimations, stored in nextPts; if the flag is + not set, then prevPts is copied to nextPts and is considered the initial estimate. + - **OPTFLOW_LK_GET_MIN_EIGENVALS** use minimum eigen values as an error measure (see + minEigThreshold description); if the flag is not set, then L1 distance between patches + around the original and a moved point, divided by number of pixels in a window, is used as a + error measure. +@param minEigThreshold the algorithm calculates the minimum eigen value of a 2x2 normal matrix of +optical flow equations (this matrix is called a spatial gradient matrix in @cite Bouguet00), divided +by number of pixels in a window; if this value is less than minEigThreshold, then a corresponding +feature is filtered out and its flow is not processed, so it allows to remove bad points and get a +performance boost. + +The function implements a sparse iterative version of the Lucas-Kanade optical flow in pyramids. See +@cite Bouguet00 . The function is parallelized with the TBB library. + +@note + +- An example using the Lucas-Kanade optical flow algorithm can be found at + opencv_source_code/samples/cpp/lkdemo.cpp +- (Python) An example using the Lucas-Kanade optical flow algorithm can be found at + opencv_source_code/samples/python/lk_track.py +- (Python) An example using the Lucas-Kanade tracker for homography matching can be found at + opencv_source_code/samples/python/lk_homography.py + */ +CV_EXPORTS_W void calcOpticalFlowPyrLK( InputArray prevImg, InputArray nextImg, + InputArray prevPts, InputOutputArray nextPts, + OutputArray status, OutputArray err, + Size winSize = Size(21,21), int maxLevel = 3, + TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), + int flags = 0, double minEigThreshold = 1e-4 ); + +/** @brief Computes a dense optical flow using the Gunnar Farneback's algorithm. + +@param prev first 8-bit single-channel input image. +@param next second input image of the same size and the same type as prev. +@param flow computed flow image that has the same size as prev and type CV_32FC2. +@param pyr_scale parameter, specifying the image scale (\<1) to build pyramids for each image; +pyr_scale=0.5 means a classical pyramid, where each next layer is twice smaller than the previous +one. +@param levels number of pyramid layers including the initial image; levels=1 means that no extra +layers are created and only the original images are used. +@param winsize averaging window size; larger values increase the algorithm robustness to image +noise and give more chances for fast motion detection, but yield more blurred motion field. +@param iterations number of iterations the algorithm does at each pyramid level. +@param poly_n size of the pixel neighborhood used to find polynomial expansion in each pixel; +larger values mean that the image will be approximated with smoother surfaces, yielding more +robust algorithm and more blurred motion field, typically poly_n =5 or 7. +@param poly_sigma standard deviation of the Gaussian that is used to smooth derivatives used as a +basis for the polynomial expansion; for poly_n=5, you can set poly_sigma=1.1, for poly_n=7, a +good value would be poly_sigma=1.5. +@param flags operation flags that can be a combination of the following: + - **OPTFLOW_USE_INITIAL_FLOW** uses the input flow as an initial flow approximation. + - **OPTFLOW_FARNEBACK_GAUSSIAN** uses the Gaussian \f$\texttt{winsize}\times\texttt{winsize}\f$ + filter instead of a box filter of the same size for optical flow estimation; usually, this + option gives z more accurate flow than with a box filter, at the cost of lower speed; + normally, winsize for a Gaussian window should be set to a larger value to achieve the same + level of robustness. + +The function finds an optical flow for each prev pixel using the @cite Farneback2003 algorithm so that + +\f[\texttt{prev} (y,x) \sim \texttt{next} ( y + \texttt{flow} (y,x)[1], x + \texttt{flow} (y,x)[0])\f] + +@note + +- An example using the optical flow algorithm described by Gunnar Farneback can be found at + opencv_source_code/samples/cpp/fback.cpp +- (Python) An example using the optical flow algorithm described by Gunnar Farneback can be + found at opencv_source_code/samples/python/opt_flow.py + */ +CV_EXPORTS_W void calcOpticalFlowFarneback( InputArray prev, InputArray next, InputOutputArray flow, + double pyr_scale, int levels, int winsize, + int iterations, int poly_n, double poly_sigma, + int flags ); + +/** @brief Computes an optimal affine transformation between two 2D point sets. + +@param src First input 2D point set stored in std::vector or Mat, or an image stored in Mat. +@param dst Second input 2D point set of the same size and the same type as A, or another image. +@param fullAffine If true, the function finds an optimal affine transformation with no additional +restrictions (6 degrees of freedom). Otherwise, the class of transformations to choose from is +limited to combinations of translation, rotation, and uniform scaling (4 degrees of freedom). + +The function finds an optimal affine transform *[A|b]* (a 2 x 3 floating-point matrix) that +approximates best the affine transformation between: + +* Two point sets +* Two raster images. In this case, the function first finds some features in the src image and + finds the corresponding features in dst image. After that, the problem is reduced to the first + case. +In case of point sets, the problem is formulated as follows: you need to find a 2x2 matrix *A* and +2x1 vector *b* so that: + +\f[[A^*|b^*] = arg \min _{[A|b]} \sum _i \| \texttt{dst}[i] - A { \texttt{src}[i]}^T - b \| ^2\f] +where src[i] and dst[i] are the i-th points in src and dst, respectively +\f$[A|b]\f$ can be either arbitrary (when fullAffine=true ) or have a form of +\f[\begin{bmatrix} a_{11} & a_{12} & b_1 \\ -a_{12} & a_{11} & b_2 \end{bmatrix}\f] +when fullAffine=false. + +@deprecated Use cv::estimateAffine2D, cv::estimateAffinePartial2D instead. If you are using this function +with images, extract points using cv::calcOpticalFlowPyrLK and then use the estimation functions. + +@sa +estimateAffine2D, estimateAffinePartial2D, getAffineTransform, getPerspectiveTransform, findHomography + */ +CV_DEPRECATED CV_EXPORTS Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine ); + +enum +{ + MOTION_TRANSLATION = 0, + MOTION_EUCLIDEAN = 1, + MOTION_AFFINE = 2, + MOTION_HOMOGRAPHY = 3 +}; + +/** @brief Computes the Enhanced Correlation Coefficient value between two images @cite EP08 . + +@param templateImage single-channel template image; CV_8U or CV_32F array. +@param inputImage single-channel input image to be warped to provide an image similar to + templateImage, same type as templateImage. +@param inputMask An optional mask to indicate valid values of inputImage. + +@sa +findTransformECC + */ + +CV_EXPORTS_W double computeECC(InputArray templateImage, InputArray inputImage, InputArray inputMask = noArray()); + +/** @example samples/cpp/image_alignment.cpp +An example using the image alignment ECC algorithm +*/ + +/** @brief Finds the geometric transform (warp) between two images in terms of the ECC criterion @cite EP08 . + +@param templateImage single-channel template image; CV_8U or CV_32F array. +@param inputImage single-channel input image which should be warped with the final warpMatrix in +order to provide an image similar to templateImage, same type as templateImage. +@param warpMatrix floating-point \f$2\times 3\f$ or \f$3\times 3\f$ mapping matrix (warp). +@param motionType parameter, specifying the type of motion: + - **MOTION_TRANSLATION** sets a translational motion model; warpMatrix is \f$2\times 3\f$ with + the first \f$2\times 2\f$ part being the unity matrix and the rest two parameters being + estimated. + - **MOTION_EUCLIDEAN** sets a Euclidean (rigid) transformation as motion model; three + parameters are estimated; warpMatrix is \f$2\times 3\f$. + - **MOTION_AFFINE** sets an affine motion model (DEFAULT); six parameters are estimated; + warpMatrix is \f$2\times 3\f$. + - **MOTION_HOMOGRAPHY** sets a homography as a motion model; eight parameters are + estimated;\`warpMatrix\` is \f$3\times 3\f$. +@param criteria parameter, specifying the termination criteria of the ECC algorithm; +criteria.epsilon defines the threshold of the increment in the correlation coefficient between two +iterations (a negative criteria.epsilon makes criteria.maxcount the only termination criterion). +Default values are shown in the declaration above. +@param inputMask An optional mask to indicate valid values of inputImage. +@param gaussFiltSize An optional value indicating size of gaussian blur filter; (DEFAULT: 5) + +The function estimates the optimum transformation (warpMatrix) with respect to ECC criterion +(@cite EP08), that is + +\f[\texttt{warpMatrix} = \arg\max_{W} \texttt{ECC}(\texttt{templateImage}(x,y),\texttt{inputImage}(x',y'))\f] + +where + +\f[\begin{bmatrix} x' \\ y' \end{bmatrix} = W \cdot \begin{bmatrix} x \\ y \\ 1 \end{bmatrix}\f] + +(the equation holds with homogeneous coordinates for homography). It returns the final enhanced +correlation coefficient, that is the correlation coefficient between the template image and the +final warped input image. When a \f$3\times 3\f$ matrix is given with motionType =0, 1 or 2, the third +row is ignored. + +Unlike findHomography and estimateRigidTransform, the function findTransformECC implements an +area-based alignment that builds on intensity similarities. In essence, the function updates the +initial transformation that roughly aligns the images. If this information is missing, the identity +warp (unity matrix) is used as an initialization. Note that if images undergo strong +displacements/rotations, an initial transformation that roughly aligns the images is necessary +(e.g., a simple euclidean/similarity transform that allows for the images showing the same image +content approximately). Use inverse warping in the second image to take an image close to the first +one, i.e. use the flag WARP_INVERSE_MAP with warpAffine or warpPerspective. See also the OpenCV +sample image_alignment.cpp that demonstrates the use of the function. Note that the function throws +an exception if algorithm does not converges. + +@sa +computeECC, estimateAffine2D, estimateAffinePartial2D, findHomography + */ +CV_EXPORTS_W double findTransformECC( InputArray templateImage, InputArray inputImage, + InputOutputArray warpMatrix, int motionType, + TermCriteria criteria, + InputArray inputMask, int gaussFiltSize); + +/** @overload */ +CV_EXPORTS_W +double findTransformECC(InputArray templateImage, InputArray inputImage, + InputOutputArray warpMatrix, int motionType = MOTION_AFFINE, + TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 50, 0.001), + InputArray inputMask = noArray()); + +/** @example samples/cpp/kalman.cpp +An example using the standard Kalman filter +*/ + +/** @brief Kalman filter class. + +The class implements a standard Kalman filter , +@cite Welch95 . However, you can modify transitionMatrix, controlMatrix, and measurementMatrix to get +an extended Kalman filter functionality. +@note In C API when CvKalman\* kalmanFilter structure is not needed anymore, it should be released +with cvReleaseKalman(&kalmanFilter) + */ +class CV_EXPORTS_W KalmanFilter +{ +public: + CV_WRAP KalmanFilter(); + /** @overload + @param dynamParams Dimensionality of the state. + @param measureParams Dimensionality of the measurement. + @param controlParams Dimensionality of the control vector. + @param type Type of the created matrices that should be CV_32F or CV_64F. + */ + CV_WRAP KalmanFilter( int dynamParams, int measureParams, int controlParams = 0, int type = CV_32F ); + + /** @brief Re-initializes Kalman filter. The previous content is destroyed. + + @param dynamParams Dimensionality of the state. + @param measureParams Dimensionality of the measurement. + @param controlParams Dimensionality of the control vector. + @param type Type of the created matrices that should be CV_32F or CV_64F. + */ + void init( int dynamParams, int measureParams, int controlParams = 0, int type = CV_32F ); + + /** @brief Computes a predicted state. + + @param control The optional input control + */ + CV_WRAP const Mat& predict( const Mat& control = Mat() ); + + /** @brief Updates the predicted state from the measurement. + + @param measurement The measured system parameters + */ + CV_WRAP const Mat& correct( const Mat& measurement ); + + CV_PROP_RW Mat statePre; //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) + CV_PROP_RW Mat statePost; //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + CV_PROP_RW Mat transitionMatrix; //!< state transition matrix (A) + CV_PROP_RW Mat controlMatrix; //!< control matrix (B) (not used if there is no control) + CV_PROP_RW Mat measurementMatrix; //!< measurement matrix (H) + CV_PROP_RW Mat processNoiseCov; //!< process noise covariance matrix (Q) + CV_PROP_RW Mat measurementNoiseCov;//!< measurement noise covariance matrix (R) + CV_PROP_RW Mat errorCovPre; //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/ + CV_PROP_RW Mat gain; //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R) + CV_PROP_RW Mat errorCovPost; //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k) + + // temporary matrices + Mat temp1; + Mat temp2; + Mat temp3; + Mat temp4; + Mat temp5; +}; + + +/** @brief Read a .flo file + + @param path Path to the file to be loaded + + The function readOpticalFlow loads a flow field from a file and returns it as a single matrix. + Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the + flow in the horizontal direction (u), second - vertical (v). + */ +CV_EXPORTS_W Mat readOpticalFlow( const String& path ); +/** @brief Write a .flo to disk + + @param path Path to the file to be written + @param flow Flow field to be stored + + The function stores a flow field in a file, returns true on success, false otherwise. + The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds + to the flow in the horizontal direction (u), second - vertical (v). + */ +CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ); + +/** + Base class for dense optical flow algorithms +*/ +class CV_EXPORTS_W DenseOpticalFlow : public Algorithm +{ +public: + /** @brief Calculates an optical flow. + + @param I0 first 8-bit single-channel input image. + @param I1 second input image of the same size and the same type as prev. + @param flow computed flow image that has the same size as prev and type CV_32FC2. + */ + CV_WRAP virtual void calc( InputArray I0, InputArray I1, InputOutputArray flow ) = 0; + /** @brief Releases all inner buffers. + */ + CV_WRAP virtual void collectGarbage() = 0; +}; + +/** @brief Base interface for sparse optical flow algorithms. + */ +class CV_EXPORTS_W SparseOpticalFlow : public Algorithm +{ +public: + /** @brief Calculates a sparse optical flow. + + @param prevImg First input image. + @param nextImg Second input image of the same size and the same type as prevImg. + @param prevPts Vector of 2D points for which the flow needs to be found. + @param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image. + @param status Output status vector. Each element of the vector is set to 1 if the + flow for the corresponding features has been found. Otherwise, it is set to 0. + @param err Optional output vector that contains error response for each point (inverse confidence). + */ + CV_WRAP virtual void calc(InputArray prevImg, InputArray nextImg, + InputArray prevPts, InputOutputArray nextPts, + OutputArray status, + OutputArray err = cv::noArray()) = 0; +}; + + +/** @brief Class computing a dense optical flow using the Gunnar Farneback's algorithm. + */ +class CV_EXPORTS_W FarnebackOpticalFlow : public DenseOpticalFlow +{ +public: + CV_WRAP virtual int getNumLevels() const = 0; + CV_WRAP virtual void setNumLevels(int numLevels) = 0; + + CV_WRAP virtual double getPyrScale() const = 0; + CV_WRAP virtual void setPyrScale(double pyrScale) = 0; + + CV_WRAP virtual bool getFastPyramids() const = 0; + CV_WRAP virtual void setFastPyramids(bool fastPyramids) = 0; + + CV_WRAP virtual int getWinSize() const = 0; + CV_WRAP virtual void setWinSize(int winSize) = 0; + + CV_WRAP virtual int getNumIters() const = 0; + CV_WRAP virtual void setNumIters(int numIters) = 0; + + CV_WRAP virtual int getPolyN() const = 0; + CV_WRAP virtual void setPolyN(int polyN) = 0; + + CV_WRAP virtual double getPolySigma() const = 0; + CV_WRAP virtual void setPolySigma(double polySigma) = 0; + + CV_WRAP virtual int getFlags() const = 0; + CV_WRAP virtual void setFlags(int flags) = 0; + + CV_WRAP static Ptr create( + int numLevels = 5, + double pyrScale = 0.5, + bool fastPyramids = false, + int winSize = 13, + int numIters = 10, + int polyN = 5, + double polySigma = 1.1, + int flags = 0); +}; + +/** @brief Variational optical flow refinement + +This class implements variational refinement of the input flow field, i.e. +it uses input flow to initialize the minimization of the following functional: +\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$, +where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms +respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the +influence of outliers. A complete formulation and a description of the minimization +procedure can be found in @cite Brox2004 +*/ +class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow +{ +public: + /** @brief @ref calc function overload to handle separate horizontal (u) and vertical (v) flow components + (to avoid extra splits/merges) */ + CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0; + + /** @brief Number of outer (fixed-point) iterations in the minimization procedure. + @see setFixedPointIterations */ + CV_WRAP virtual int getFixedPointIterations() const = 0; + /** @copybrief getFixedPointIterations @see getFixedPointIterations */ + CV_WRAP virtual void setFixedPointIterations(int val) = 0; + + /** @brief Number of inner successive over-relaxation (SOR) iterations + in the minimization procedure to solve the respective linear system. + @see setSorIterations */ + CV_WRAP virtual int getSorIterations() const = 0; + /** @copybrief getSorIterations @see getSorIterations */ + CV_WRAP virtual void setSorIterations(int val) = 0; + + /** @brief Relaxation factor in SOR + @see setOmega */ + CV_WRAP virtual float getOmega() const = 0; + /** @copybrief getOmega @see getOmega */ + CV_WRAP virtual void setOmega(float val) = 0; + + /** @brief Weight of the smoothness term + @see setAlpha */ + CV_WRAP virtual float getAlpha() const = 0; + /** @copybrief getAlpha @see getAlpha */ + CV_WRAP virtual void setAlpha(float val) = 0; + + /** @brief Weight of the color constancy term + @see setDelta */ + CV_WRAP virtual float getDelta() const = 0; + /** @copybrief getDelta @see getDelta */ + CV_WRAP virtual void setDelta(float val) = 0; + + /** @brief Weight of the gradient constancy term + @see setGamma */ + CV_WRAP virtual float getGamma() const = 0; + /** @copybrief getGamma @see getGamma */ + CV_WRAP virtual void setGamma(float val) = 0; + + /** @brief Creates an instance of VariationalRefinement + */ + CV_WRAP static Ptr create(); +}; + +/** @brief DIS optical flow algorithm. + +This class implements the Dense Inverse Search (DIS) optical flow algorithm. More +details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected +parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is +still relatively fast, use DeepFlow if you need better quality and don't care about speed. + +This implementation includes several additional features compared to the algorithm described in the paper, +including spatial propagation of flow vectors (@ref getUseSpatialPropagation), as well as an option to +utilize an initial flow approximation passed to @ref calc (which is, essentially, temporal propagation, +if the previous frame's flow field is passed). +*/ +class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow +{ +public: + enum + { + PRESET_ULTRAFAST = 0, + PRESET_FAST = 1, + PRESET_MEDIUM = 2 + }; + + /** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level + corresponds to the original image resolution). The final flow is obtained by bilinear upscaling. + @see setFinestScale */ + CV_WRAP virtual int getFinestScale() const = 0; + /** @copybrief getFinestScale @see getFinestScale */ + CV_WRAP virtual void setFinestScale(int val) = 0; + + /** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well + enough in most cases. + @see setPatchSize */ + CV_WRAP virtual int getPatchSize() const = 0; + /** @copybrief getPatchSize @see getPatchSize */ + CV_WRAP virtual void setPatchSize(int val) = 0; + + /** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond + to higher flow quality. + @see setPatchStride */ + CV_WRAP virtual int getPatchStride() const = 0; + /** @copybrief getPatchStride @see getPatchStride */ + CV_WRAP virtual void setPatchStride(int val) = 0; + + /** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values + may improve quality in some cases. + @see setGradientDescentIterations */ + CV_WRAP virtual int getGradientDescentIterations() const = 0; + /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ + CV_WRAP virtual void setGradientDescentIterations(int val) = 0; + + /** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to + disable variational refinement completely. Higher values will typically result in more smooth and + high-quality flow. + @see setGradientDescentIterations */ + CV_WRAP virtual int getVariationalRefinementIterations() const = 0; + /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ + CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0; + + /** @brief Weight of the smoothness term + @see setVariationalRefinementAlpha */ + CV_WRAP virtual float getVariationalRefinementAlpha() const = 0; + /** @copybrief getVariationalRefinementAlpha @see getVariationalRefinementAlpha */ + CV_WRAP virtual void setVariationalRefinementAlpha(float val) = 0; + + /** @brief Weight of the color constancy term + @see setVariationalRefinementDelta */ + CV_WRAP virtual float getVariationalRefinementDelta() const = 0; + /** @copybrief getVariationalRefinementDelta @see getVariationalRefinementDelta */ + CV_WRAP virtual void setVariationalRefinementDelta(float val) = 0; + + /** @brief Weight of the gradient constancy term + @see setVariationalRefinementGamma */ + CV_WRAP virtual float getVariationalRefinementGamma() const = 0; + /** @copybrief getVariationalRefinementGamma @see getVariationalRefinementGamma */ + CV_WRAP virtual void setVariationalRefinementGamma(float val) = 0; + + + /** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on + by default as it typically provides a noticeable quality boost because of increased robustness to + illumination variations. Turn it off if you are certain that your sequence doesn't contain any changes + in illumination. + @see setUseMeanNormalization */ + CV_WRAP virtual bool getUseMeanNormalization() const = 0; + /** @copybrief getUseMeanNormalization @see getUseMeanNormalization */ + CV_WRAP virtual void setUseMeanNormalization(bool val) = 0; + + /** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by + default, as it tends to work better on average and can sometimes help recover from major errors + introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this + option off can make the output flow field a bit smoother, however. + @see setUseSpatialPropagation */ + CV_WRAP virtual bool getUseSpatialPropagation() const = 0; + /** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */ + CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0; + + /** @brief Creates an instance of DISOpticalFlow + + @param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM + */ + CV_WRAP static Ptr create(int preset = DISOpticalFlow::PRESET_FAST); +}; + +/** @brief Class used for calculating a sparse optical flow. + +The class can calculate an optical flow for a sparse feature set using the +iterative Lucas-Kanade method with pyramids. + +@sa calcOpticalFlowPyrLK + +*/ +class CV_EXPORTS_W SparsePyrLKOpticalFlow : public SparseOpticalFlow +{ +public: + CV_WRAP virtual Size getWinSize() const = 0; + CV_WRAP virtual void setWinSize(Size winSize) = 0; + + CV_WRAP virtual int getMaxLevel() const = 0; + CV_WRAP virtual void setMaxLevel(int maxLevel) = 0; + + CV_WRAP virtual TermCriteria getTermCriteria() const = 0; + CV_WRAP virtual void setTermCriteria(TermCriteria& crit) = 0; + + CV_WRAP virtual int getFlags() const = 0; + CV_WRAP virtual void setFlags(int flags) = 0; + + CV_WRAP virtual double getMinEigThreshold() const = 0; + CV_WRAP virtual void setMinEigThreshold(double minEigThreshold) = 0; + + CV_WRAP static Ptr create( + Size winSize = Size(21, 21), + int maxLevel = 3, TermCriteria crit = + TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), + int flags = 0, + double minEigThreshold = 1e-4); +}; + + + + +/** @brief Base abstract class for the long-term tracker + */ +class CV_EXPORTS_W Tracker +{ +protected: + Tracker(); +public: + virtual ~Tracker(); + + /** @brief Initialize the tracker with a known bounding box that surrounded the target + @param image The initial frame + @param boundingBox The initial bounding box + */ + CV_WRAP virtual + void init(InputArray image, const Rect& boundingBox) = 0; + + /** @brief Update the tracker, find the new most likely bounding box for the target + @param image The current frame + @param boundingBox The bounding box that represent the new target location, if true was returned, not + modified otherwise + + @return True means that target was located and false means that tracker cannot locate target in + current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed + missing from the frame (say, out of sight) + */ + CV_WRAP virtual + bool update(InputArray image, CV_OUT Rect& boundingBox) = 0; +}; + + + +/** @brief The MIL algorithm trains a classifier in an online manner to separate the object from the +background. + +Multiple Instance Learning avoids the drift problem for a robust tracking. The implementation is +based on @cite MIL . + +Original code can be found here + */ +class CV_EXPORTS_W TrackerMIL : public Tracker +{ +protected: + TrackerMIL(); // use ::create() +public: + virtual ~TrackerMIL() CV_OVERRIDE; + + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + //parameters for sampler + CV_PROP_RW float samplerInitInRadius; //!< radius for gathering positive instances during init + CV_PROP_RW int samplerInitMaxNegNum; //!< # negative samples to use during init + CV_PROP_RW float samplerSearchWinSize; //!< size of search window + CV_PROP_RW float samplerTrackInRadius; //!< radius for gathering positive instances during tracking + CV_PROP_RW int samplerTrackMaxPosNum; //!< # positive samples to use during tracking + CV_PROP_RW int samplerTrackMaxNegNum; //!< # negative samples to use during tracking + CV_PROP_RW int featureSetNumFeatures; //!< # features + }; + + /** @brief Create MIL tracker instance + * @param parameters MIL parameters TrackerMIL::Params + */ + static CV_WRAP + Ptr create(const TrackerMIL::Params ¶meters = TrackerMIL::Params()); + + //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; + //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; +}; + + + +/** @brief the GOTURN (Generic Result_Object Tracking Using Regression Networks) tracker + * + * GOTURN (@cite GOTURN) is kind of trackers based on Convolutional Neural Networks (CNN). While taking all advantages of CNN trackers, + * GOTURN is much faster due to offline training without online fine-tuning nature. + * GOTURN tracker addresses the problem of single target tracking: given a bounding box label of an object in the first frame of the video, + * we track that object through the rest of the video. NOTE: Current method of GOTURN does not handle occlusions; however, it is fairly + * robust to viewpoint changes, lighting changes, and deformations. + * Inputs of GOTURN are two RGB patches representing Target and Search patches resized to 227x227. + * Outputs of GOTURN are predicted bounding box coordinates, relative to Search patch coordinate system, in format X1,Y1,X2,Y2. + * Original paper is here: + * As long as original authors implementation: + * Implementation of training algorithm is placed in separately here due to 3d-party dependencies: + * + * GOTURN architecture goturn.prototxt and trained model goturn.caffemodel are accessible on opencv_extra GitHub repository. + */ +class CV_EXPORTS_W TrackerGOTURN : public Tracker +{ +protected: + TrackerGOTURN(); // use ::create() +public: + virtual ~TrackerGOTURN() CV_OVERRIDE; + + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + CV_PROP_RW std::string modelTxt; + CV_PROP_RW std::string modelBin; + }; + + /** @brief Constructor + @param parameters GOTURN parameters TrackerGOTURN::Params + */ + static CV_WRAP + Ptr create(const TrackerGOTURN::Params& parameters = TrackerGOTURN::Params()); + + //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; + //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; +}; + +class CV_EXPORTS_W TrackerDaSiamRPN : public Tracker +{ +protected: + TrackerDaSiamRPN(); // use ::create() +public: + virtual ~TrackerDaSiamRPN() CV_OVERRIDE; + + struct CV_EXPORTS_W_SIMPLE Params + { + CV_WRAP Params(); + CV_PROP_RW std::string model; + CV_PROP_RW std::string kernel_cls1; + CV_PROP_RW std::string kernel_r1; + CV_PROP_RW int backend; + CV_PROP_RW int target; + }; + + /** @brief Constructor + @param parameters DaSiamRPN parameters TrackerDaSiamRPN::Params + */ + static CV_WRAP + Ptr create(const TrackerDaSiamRPN::Params& parameters = TrackerDaSiamRPN::Params()); + + /** @brief Return tracking score + */ + CV_WRAP virtual float getTrackingScore() = 0; + + //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; + //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; +}; + + +//! @} video_track + +} // cv + +#endif diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/video.hpp b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/video.hpp new file mode 100644 index 0000000..8267b85 --- /dev/null +++ b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/jni/include/opencv2/video/video.hpp @@ -0,0 +1,48 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef __OPENCV_BUILD +#error this is a compatibility header which should not be used inside the OpenCV library +#endif + +#include "opencv2/video.hpp" diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_core.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_core.a new file mode 100644 index 0000000..7fcf30d Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_core.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_features2d.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_features2d.a new file mode 100644 index 0000000..33ee719 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_features2d.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_highgui.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_highgui.a new file mode 100644 index 0000000..1b15c0b Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_highgui.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_imgproc.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_imgproc.a new file mode 100644 index 0000000..4b1ee83 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_imgproc.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_photo.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_photo.a new file mode 100644 index 0000000..217877c Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_photo.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_video.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_video.a new file mode 100644 index 0000000..94646aa Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/arm64-v8a/libopencv_video.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_core.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_core.a new file mode 100644 index 0000000..ebbf964 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_core.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_features2d.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_features2d.a new file mode 100644 index 0000000..830c93a Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_features2d.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_highgui.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_highgui.a new file mode 100644 index 0000000..a58aa64 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_highgui.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_imgproc.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_imgproc.a new file mode 100644 index 0000000..842b2d6 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_imgproc.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_photo.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_photo.a new file mode 100644 index 0000000..ccf03a8 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_photo.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_video.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_video.a new file mode 100644 index 0000000..e9222d1 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/armeabi-v7a/libopencv_video.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_core.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_core.a new file mode 100644 index 0000000..9f1677b Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_core.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_features2d.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_features2d.a new file mode 100644 index 0000000..f3c3d04 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_features2d.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_highgui.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_highgui.a new file mode 100644 index 0000000..fc2e984 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_highgui.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_imgproc.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_imgproc.a new file mode 100644 index 0000000..f54bd35 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_imgproc.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_photo.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_photo.a new file mode 100644 index 0000000..36b6be1 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_photo.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_video.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_video.a new file mode 100644 index 0000000..8156ca1 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86/libopencv_video.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_core.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_core.a new file mode 100644 index 0000000..8bc6939 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_core.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_features2d.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_features2d.a new file mode 100644 index 0000000..5c396ef Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_features2d.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_highgui.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_highgui.a new file mode 100644 index 0000000..7424963 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_highgui.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_imgproc.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_imgproc.a new file mode 100644 index 0000000..65233bd Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_imgproc.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_photo.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_photo.a new file mode 100644 index 0000000..f4b2dc2 Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_photo.a differ diff --git a/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_video.a b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_video.a new file mode 100644 index 0000000..22368be Binary files /dev/null and b/android-demo/app/src/main/jni/opencv-mobile-4.6.0-android/sdk/native/staticlibs/x86_64/libopencv_video.a differ diff --git a/android-demo/app/src/main/jni/yolo.cpp b/android-demo/app/src/main/jni/yolo.cpp new file mode 100644 index 0000000..45f3f3c --- /dev/null +++ b/android-demo/app/src/main/jni/yolo.cpp @@ -0,0 +1,417 @@ +#include "yolo.h" +#include +#include +#include +#include "OCsort.h" +#include "cpu.h" +//#include /*为了使用内存安全的 snprintf 函数*/ +#include +#include +#include +/*************************************************************/ +/************************* 工具函数区 *************************/ +/*************************************************************/ +static float fast_exp(float x) { + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; +} + +static float sigmoid(float x) { + return 1.0f / (1.0f + fast_exp(-x)); +} + +static float intersection_area(const Object &a, const Object &b) { + cv::Rect_ inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector &faceobjects, int left, int right) { + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + // #pragma omp parallel sections + { + // #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + // #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector &faceobjects) { + if (faceobjects.empty()) + return; + qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector &faceobjects, std::vector &picked, + float nms_threshold) { + picked.clear(); + + const int n = faceobjects.size(); + + std::vector areas(n); + for (int i = 0; i < n; i++) { + areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height; + } + + for (int i = 0; i < n; i++) { + const Object &a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int) picked.size(); j++) { + const Object &b = faceobjects[picked[j]]; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + if (keep) + picked.push_back(i); + } +} + +static void +generate_grids_and_stride(const int target_w, const int target_h, std::vector &strides, + std::vector &grid_strides) { + for (int i = 0; i < (int) strides.size(); i++) { + int stride = strides[i]; + int num_grid_w = target_w / stride; + int num_grid_h = target_h / stride; + for (int g1 = 0; g1 < num_grid_h; g1++) { + for (int g0 = 0; g0 < num_grid_w; g0++) { + GridAndStride gs; + gs.grid0 = g0; + gs.grid1 = g1; + gs.stride = stride; + grid_strides.push_back(gs); + } + } + } +} + +static void generate_proposals(std::vector grid_strides, const ncnn::Mat &pred, + float prob_threshold, std::vector &objects) { + const int num_points = grid_strides.size(); + const int num_class = 80; + const int reg_max_1 = 16; + + for (int i = 0; i < num_points; i++) { + const float *scores = pred.row(i) + 4 * reg_max_1; + + // find label with max score + int label = -1; + float score = -FLT_MAX; + for (int k = 0; k < num_class; k++) { + float confidence = scores[k]; + if (confidence > score) { + label = k; + score = confidence; + } + } + float box_prob = sigmoid(score); + if (box_prob >= prob_threshold) { + ncnn::Mat bbox_pred(reg_max_1, 4, (void *) pred.row(i)); + { + ncnn::Layer *softmax = ncnn::create_layer("Softmax"); + + ncnn::ParamDict pd; + pd.set(0, 1); // axis + pd.set(1, 1); + softmax->load_param(pd); + + ncnn::Option opt; + opt.num_threads = 1; + opt.use_packing_layout = false; + + softmax->create_pipeline(opt); + + softmax->forward_inplace(bbox_pred, opt); + + softmax->destroy_pipeline(opt); + + delete softmax; + } + + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) { + float dis = 0.f; + const float *dis_after_sm = bbox_pred.row(k); + for (int l = 0; l < reg_max_1; l++) { + dis += l * dis_after_sm[l]; + } + + pred_ltrb[k] = dis * grid_strides[i].stride; + } + + float pb_cx = (grid_strides[i].grid0 + 0.5f) * grid_strides[i].stride; + float pb_cy = (grid_strides[i].grid1 + 0.5f) * grid_strides[i].stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = label; + obj.prob = box_prob; + + objects.push_back(obj); + } + } +} + +/*************************************************************/ +/************************* Yolo方法具体实现 *************************/ +/*************************************************************/ + +Yolo::Yolo() { + blob_pool_allocator.set_size_compare_ratio(0.f); + workspace_pool_allocator.set_size_compare_ratio(0.f); +} + +/** + * 用来加载Yolo模型 + * @param mgr + * @param modeltype + * @param _target_size + * @param _mean_vals + * @param _norm_vals + * @param use_gpu + * @return + */ +int Yolo::load(AAssetManager *mgr, const char *modeltype, int _target_size, const float *_mean_vals, + const float *_norm_vals, bool use_gpu) { + yolo.clear(); + blob_pool_allocator.clear(); + workspace_pool_allocator.clear(); + ncnn::set_cpu_powersave(2); + ncnn::set_omp_num_threads(ncnn::get_big_cpu_count()); + yolo.opt = ncnn::Option(); +#if NCNN_VULKAN + yolo.opt.use_vulkan_compute = use_gpu; +#endif + yolo.opt.num_threads = ncnn::get_big_cpu_count(); + yolo.opt.blob_allocator = &blob_pool_allocator; + yolo.opt.workspace_allocator = &workspace_pool_allocator; + + char parampath[256]; + char modelpath[256]; + sprintf(parampath, "yolov8%s.param", modeltype); + sprintf(modelpath, "yolov8%s.bin", modeltype); + + yolo.load_param(mgr, parampath); + yolo.load_model(mgr, modelpath); + + target_size = _target_size; + mean_vals[0] = _mean_vals[0]; + mean_vals[1] = _mean_vals[1]; + mean_vals[2] = _mean_vals[2]; + norm_vals[0] = _norm_vals[0]; + norm_vals[1] = _norm_vals[1]; + norm_vals[2] = _norm_vals[2]; + + return 0; +} + +/** + * 输入图片,输出检测结果 + * @param rgb + * @param objects + * @param prob_threshold + * @param nms_threshold + * @return + */ +int Yolo::detect(const cv::Mat &rgb, std::vector &objects, float prob_threshold, + float nms_threshold) { + int width = rgb.cols; + int height = rgb.rows; + // pad to multiple of 32 + int w = width; + int h = height; + float scale = 1.f; + if (w > h) { + scale = (float) target_size / w; + w = target_size; + h = h * scale; + } else { + scale = (float) target_size / h; + h = target_size; + w = w * scale; + } + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB2BGR, width, height, + w, h); + // pad to target_size rectangle + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, + ncnn::BORDER_CONSTANT, 0.f); + in_pad.substract_mean_normalize(0, norm_vals); + ncnn::Extractor ex = yolo.create_extractor(); + ex.input("images", in_pad); + std::vector proposals; + ncnn::Mat out; + ex.extract("output", out); + std::vector strides = {8, 16, 32}; // might have stride=64 + std::vector grid_strides; + generate_grids_and_stride(in_pad.w, in_pad.h, strides, grid_strides); + generate_proposals(grid_strides, out, prob_threshold, proposals); + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + // apply nms with nms_threshold + std::vector picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + int count = picked.size(); + objects.resize(count); + for (int i = 0; i < count; i++) { + objects[i] = proposals[picked[i]]; + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + // clip + x0 = std::max(std::min(x0, (float) (width - 1)), 0.f); + y0 = std::max(std::min(y0, (float) (height - 1)), 0.f); + x1 = std::max(std::min(x1, (float) (width - 1)), 0.f); + y1 = std::max(std::min(y1, (float) (height - 1)), 0.f); + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + // sort objects by area + struct { + bool operator()(const Object &a, const Object &b) const { + return a.rect.area() > b.rect.area(); + } + } objects_area_greater; + std::sort(objects.begin(), objects.end(), objects_area_greater); + return 0; +} + +// note: 创建一个追踪器(全局变量) +ocsort::OCSort tracker(0, 50, 1, 0.22136877277096445, 1, "giou", 0.3941737016672115, true); + +Eigen::MatrixXf convertToEigen(const std::vector &objects) { + // Create a matrix of size (n, 5) where n is the number of objects + Eigen::MatrixXf objects_eigen(objects.size(), 6); + for (size_t i = 0; i < objects.size(); ++i) { + const Object &obj = objects[i]; + // Convert cv::Rect_ (x, y, width, height) to (x1, y1, x2, y2) + float x1 = obj.rect.x; + float y1 = obj.rect.y; + float x2 = obj.rect.x + obj.rect.width; + float y2 = obj.rect.y + obj.rect.height; + // Set the values in the Eigen matrix (x1, y1, x2, y2, prob) + objects_eigen(i, 0) = x1; + objects_eigen(i, 1) = y1; + objects_eigen(i, 2) = x2; + objects_eigen(i, 3) = y2; + objects_eigen(i, 4) = obj.prob; + objects_eigen(i, 5) = obj.label; + } + return objects_eigen; +} + +std::vector convertToTLWH(const Eigen::RowVectorXf &input) { + // 从输入中提取 x1, y1, x2, y2 + float x1 = input(0); + float y1 = input(1); + float x2 = input(2); + float y2 = input(3); + + // 计算 tlwh 格式 [x, y, width, height] + float width = x2 - x1; + float height = y2 - y1; + + // 将结果存储在 std::vector 中并返回 + return {x1, y1, width, height}; +} + +int Yolo::draw(cv::Mat &rgb, const std::vector &objects) { + // 开始追踪,将结果保存在 output_stracks 中 + // note: 把objects转成 Eigen::MatrixXf + Eigen::MatrixXf objects_eigen = convertToEigen(objects); + std::vector output_stracks; + if (!objects.empty()) + output_stracks = tracker.update(objects_eigen); + else + output_stracks = tracker.update(Eigen::MatrixXf(objects.size(), 6)); + + int color_index = 0; + + // [Debug] 调试用 + if (output_stracks.size() != objects.size()) + __android_log_print(ANDROID_LOG_WARN, "??", "%s", "tracking objects number not match"); + + for (size_t i = 0; i < output_stracks.size(); i++) { + const Object &obj = objects[i]; + char TEXT[256]; + if (obj.label != 0) continue; + // 标识追踪到的目标 注意到我们的输出是: `,,,,,,`, 所以第4个元素是UID + sprintf(TEXT, "UID:%d %s", int(output_stracks.at(i)[4]), class_names[obj.label]); + + // 记录 tlwh(top-left,width-height) 的,方面Opencv直接画矩形 + std::vector tlwh = convertToTLWH(output_stracks.at(i)); + // 为目标框分配颜色 + const unsigned char *color = colors[color_index % 19]; + color_index++; + cv::Scalar cc(color[0], color[1], color[2]); + // 绘制追踪到的目标的矩形 + cv::rectangle(rgb, cv::Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), cc, 2); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(TEXT, cv::FONT_HERSHEY_SIMPLEX, 0.5, + 1, &baseLine); + int x = tlwh[0]; + int y = tlwh[1] - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > rgb.cols) + x = rgb.cols - label_size.width; + // 这个 rectangle 是为了label准备的 + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, label_size.height + baseLine)), cc, + -1); + // label 的颜色 + cv::Scalar textcc = (color[0] + color[1] + color[2] >= 381) ? cv::Scalar(0, 0, 0) + : cv::Scalar(255, 255, 255); + // 绘制label文字 + cv::putText(rgb, TEXT, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5, + textcc, 1); + } + return 0; +} \ No newline at end of file diff --git a/android-demo/app/src/main/jni/yolo.h b/android-demo/app/src/main/jni/yolo.h new file mode 100644 index 0000000..ff0cd04 --- /dev/null +++ b/android-demo/app/src/main/jni/yolo.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include "Object.h" +#include + +static const char *class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", + "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", + "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", + "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", + "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", + "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", + "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", + "scissors", "teddy bear", + "hair drier", "toothbrush" +}; + +static const unsigned char colors[19][3] = { + {54, 67, 244}, + {99, 30, 233}, + {176, 39, 156}, + {183, 58, 103}, + {181, 81, 63}, + {243, 150, 33}, + {244, 169, 3}, + {212, 188, 0}, + {136, 150, 0}, + {80, 175, 76}, + {74, 195, 139}, + {57, 220, 205}, + {59, 235, 255}, + {7, 193, 255}, + {0, 152, 255}, + {34, 87, 255}, + {72, 85, 121}, + {158, 158, 158}, + {139, 125, 96} +}; + +struct GridAndStride { + int grid0; + int grid1; + int stride; +}; + +class Yolo { +public: + Yolo(); + + int load(const char *modeltype, int target_size, const float *mean_vals, const float *norm_vals, + bool use_gpu = false); + + int load(AAssetManager *mgr, const char *modeltype, int target_size, const float *mean_vals, + const float *norm_vals, bool use_gpu = false); + + int detect(const cv::Mat &rgb, std::vector &objects, float prob_threshold = 0.4f, + float nms_threshold = 0.5f); + + int draw(cv::Mat &rgb, const std::vector &objects); + +private: + ncnn::Net yolo; + int target_size; + float mean_vals[3]; + float norm_vals[3]; + ncnn::UnlockedPoolAllocator blob_pool_allocator; + ncnn::PoolAllocator workspace_pool_allocator; +}; + diff --git a/android-demo/app/src/main/jni/yolov8ncnn.cpp b/android-demo/app/src/main/jni/yolov8ncnn.cpp new file mode 100644 index 0000000..acf2abe --- /dev/null +++ b/android-demo/app/src/main/jni/yolov8ncnn.cpp @@ -0,0 +1,234 @@ +#include +#include +#include + +#include + +#include + +#include +#include + +#include +#include + +#include "yolo.h" +#include "Object.h" +#include "ndkcamera.h" +#include "net.h" + +#include +#include + +#if __ARM_NEON +#include +#endif // __ARM_NEON + +static int draw_unsupported(cv::Mat &rgb) { + const char text[] = "unsupported"; + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 1.0, 1, &baseLine); + int y = (rgb.rows - label_size.height) / 2; + int x = (rgb.cols - label_size.width) / 2; + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + cv::putText(rgb, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0, 0, 0)); + return 0; +} + +static int draw_fps(cv::Mat &rgb) { + // resolve moving average + float avg_fps = 0.f; + { + static double t0 = 0.f; + static float fps_history[10] = {0.f}; + double t1 = ncnn::get_current_time(); + if (t0 == 0.f) { + t0 = t1; + return 0; + } + float fps = 1000.f / (t1 - t0); + t0 = t1; + for (int i = 9; i >= 1; i--) { + fps_history[i] = fps_history[i - 1]; + } + fps_history[0] = fps; + if (fps_history[9] == 0.f) { + return 0; + } + for (int i = 0; i < 10; i++) { + avg_fps += fps_history[i]; + } + avg_fps /= 10.f; + } + char text[32]; + sprintf(text, "FPS=%.2f", avg_fps); + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + int y = 0; + int x = rgb.cols - label_size.width; + cv::rectangle(rgb, cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + cv::putText(rgb, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + return 0; +} + +/*声明用于做目标检测的yolo*/ +static Yolo *g_yolo = nullptr; + +static ncnn::Mutex Lock; + +// note: 必须需要继承官方提供的 NdkCameraWindow,重写 on_image_render。 +class MyNdkCamera : public NdkCameraWindow { +public: + virtual void on_image_render(cv::Mat &rgb) const; +}; + +void MyNdkCamera::on_image_render(cv::Mat &rgb) const { + // 下面是整个检测流程 + { + ncnn::MutexLockGuard g(Lock); + if (g_yolo) { + std::vector objects; + g_yolo->detect(rgb, objects); + // 追踪 Object + // ocsort的追踪函数就写在这个 draw 函数的实现里面。 + g_yolo->draw(rgb, objects); + } else { + draw_unsupported(rgb); + } + } + draw_fps(rgb); +} + +/*************************************************************/ +/*********************下面都是用来跟Java交互的 *******************/ +/*****note:JNI方法的命名要求:Java_<包名>_<类名>_<方法名> **********/ +/*************************************************************/ + +static MyNdkCamera *g_camera = nullptr; + +extern "C" { +JNIEXPORT jint +JNI_OnLoad(JavaVM *vm, void *reserved) { + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnLoad"); + + g_camera = new MyNdkCamera; + + return JNI_VERSION_1_4; +} + +JNIEXPORT void JNI_OnUnload(JavaVM *vm, void *reserved) { + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnUnload"); + + { + ncnn::MutexLockGuard g(Lock); + + delete g_yolo; + g_yolo = 0; + } + + delete g_camera; + g_camera = 0; +} + +// public native boolean loadModel(AssetManager mgr, int modelid, int cpugpu); +JNIEXPORT jboolean JNICALL +Java_com_gyq_yolov8_Yolov8Ncnn_loadModel(JNIEnv *env, jobject thiz, jobject assetManager, + jint modelid, jint cpugpu) { + if (modelid < 0 || modelid > 6 || cpugpu < 0 || cpugpu > 1) { + return JNI_FALSE; + } + + AAssetManager *mgr = AAssetManager_fromJava(env, assetManager); + + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p", mgr); + + // 显示在主界面上,切换模型size的标识 + const char *modeltypes[] = + { + "n", + "s", + }; + // 图片经过resize预处理成多大后,才进行检测 + const int target_sizes[] = + { + 320, + 320, + }; + // mean_vals 和 norm_vals 都是对输入图像进行预处理的参数 + const float mean_vals[][3] = + { + {103.53f, 116.28f, 123.675f}, + {103.53f, 116.28f, 123.675f}, + }; + + const float norm_vals[][3] = + { + {1 / 255.f, 1 / 255.f, 1 / 255.f}, + {1 / 255.f, 1 / 255.f, 1 / 255.f}, + }; + + const char *modeltype = modeltypes[(int) modelid]; + int target_size = target_sizes[(int) modelid]; + bool use_gpu = (int) cpugpu == 1; + + // reload + { + ncnn::MutexLockGuard g(Lock); + + if (use_gpu && ncnn::get_gpu_count() == 0) { + // no gpu + delete g_yolo; + g_yolo = 0; + } else { + if (!g_yolo) + g_yolo = new Yolo; + g_yolo->load(mgr, modeltype, target_size, mean_vals[(int) modelid], + norm_vals[(int) modelid], use_gpu); + } + } + + return JNI_TRUE; +} + +// public native boolean openCamera(int facing); +JNIEXPORT jboolean JNICALL +Java_com_gyq_yolov8_Yolov8Ncnn_openCamera(JNIEnv *env, jobject thiz, jint facing) { + if (facing < 0 || facing > 1) + return JNI_FALSE; + + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "openCamera %d", facing); + + g_camera->open((int) facing); + + return JNI_TRUE; +} + +// public native boolean closeCamera(); +JNIEXPORT jboolean + +JNICALL Java_com_gyq_yolov8_Yolov8Ncnn_closeCamera(JNIEnv *env, jobject thiz) { + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "closeCamera"); + + g_camera->close(); + + return JNI_TRUE; +} + +// public native boolean setOutputWindow(Surface surface); +JNIEXPORT jboolean JNICALL +Java_com_gyq_yolov8_Yolov8Ncnn_setOutputWindow(JNIEnv *env, jobject thiz, jobject surface) { + ANativeWindow *win = ANativeWindow_fromSurface(env, surface); + + __android_log_print(ANDROID_LOG_DEBUG, "ncnn", "setOutputWindow %p", win); + + g_camera->set_window(win); + + return JNI_TRUE; +} +} diff --git a/android-demo/app/src/main/res/layout/main.xml b/android-demo/app/src/main/res/layout/main.xml new file mode 100644 index 0000000..14dc85f --- /dev/null +++ b/android-demo/app/src/main/res/layout/main.xml @@ -0,0 +1,46 @@ + + + + + +